100字范文 > python 阿里云平台合成语音（TTS）

python 阿里云平台合成语音（TTS）

时间：2020-08-27 00:41:36

1. 安装阿里云pythonSDKcore：

pip3 install aliyun-python-sdk-core-v3

2. 安装ali_speech python SDK, 从github上下载

/aliyun/alibabacloud-nls-python-sdk

解压之后，安装

cd alibabacloud-nls-python-sdk

sudo python3 setup.py install

3. 增加阿里云speech配置文件,命名为ali_wav_config

4. 生成语音

执行脚本：

./ali_wav.py ./words.txt

#!/usr/bin/env python3# -*- coding: utf-8 -*-import sysimport osimport threadingimport ali_speechimport loggingimport timeimport jsonimport base64from ali_speech.callbacks import SpeechSynthesizerCallbackfrom ali_speech.constant import TTSFormatfrom ali_speech.constant import TTSSampleRatefrom aliyunsdkcore.client import AcsClientfrom aliyunsdkcore.request import CommonRequestwords_file = ""config_file_name = "/ali_wav_config"speaker = "xiaoyun"volume = 50speech_rate = 0pitch_rate = 0thread_list = []MAX_THREAD = 10class MyCallback(SpeechSynthesizerCallback):# 参数name用于指定保存音频的文件def __init__(self, name):self._name = nameself._fout = open(name, 'wb')def on_binary_data_received(self, raw):#print('MyCallback.on_binary_data_received: %s' % len(raw))self._fout.write(raw)def on_completed(self, message):#print('MyCallback.OnRecognitionCompleted: %s' % message)self._fout.close()#使用ffmpeg 工具将wav文件中的静音部分剪切掉，如果不剪切也可以os.system("ffmpeg -i " + self._name + \" -af silenceremove=start_periods=1:" + \"start_duration=0:start_threshold=-100dB:" + \"stop_periods=1:stop_duration=2:stop_threshold=-100dB -y -ac 1 -ar 16000 " + \self._name + " > /dev/null 2>&1")print(self._name + " Done!!!")def on_task_failed(self, message):#print('MyCallback.OnRecognitionTaskFailed-task_id:%s, status_text:%s' % (# message['header']['task_id'], message['header']['status_text']))self._fout.close()def on_channel_closed(self):print('MyCallback.OnRecognitionChannelClosed')def on_metainfo(self, message):print('MyCallback.on_metainfo: %s' % message)def process(client, appkey, token, text, audio_name):global speaker,volume,speech_rate,pitch_ratecallback = MyCallback(audio_name)synthesizer = client.create_synthesizer(callback)synthesizer.set_appkey(appkey)synthesizer.set_token(token)synthesizer.set_voice(speaker)synthesizer.set_text(text)synthesizer.set_format(TTSFormat.WAV)synthesizer.set_sample_rate(TTSSampleRate.SAMPLE_RATE_16K)synthesizer.set_volume(volume)synthesizer.set_speech_rate(speech_rate)synthesizer.set_pitch_rate(pitch_rate)try:ret = synthesizer.start()if ret < 0:return retsynthesizer.wait_completed()except Exception as e:print(e)finally:synthesizer.close()def process_multithread(client, appkey, token, text, audio_name):global thread_listthread = threading.Thread(target=process, args=(client, appkey, token, text, audio_name))thread_list.append(thread)thread.start()def get_token():client = AcsClient("xxxxxxxxxxxxxxxx",base64.b64decode(b'xxxxxxxxxxxxxxxxxxxxx').decode(),"cn-shanghai");# 创建request，并设置参数request = CommonRequest()request.set_method('POST')request.set_domain('nls--')request.set_version('-02-28')request.set_action_name('CreateToken')response = client.do_action_with_exception(request)content = json.loads(response.decode())token = content['Token']['Id']return tokendef main():global words_file,speaker,volume,speech_rate,pitch_rate,config_file_name,thread_listwords_file = sys.argv[1]if len(words_file) == 0:print("ali tts arg error")print(sys._getframe().f_lineno)returnif os.path.exists(words_file) == False:print("file " + words_file + " not exist")print(sys._getframe().f_lineno)returnconfig_file = os.path.dirname(sys.argv[0])config_file_name = config_file + config_file_nameif os.path.exists(config_file_name) == False:print("config file " + config_file_name + " not exist")print(sys._getframe().f_lineno)returnwith open(config_file_name,'r',encoding='utf-8') as config_f:for line in config_f:value = line.strip().replace("\n","").replace("\r","").replace("\t","").replace('\"',"")if len(value) == 0:continueif value[0] == "#":continuelst = value.split("=")if len(lst):if lst[0].find("speaker") != -1:speaker = lst[-1].strip().lower()elif lst[0].find("volume") != -1:volume = int(lst[-1].strip())elif lst[0].find("speech_rate") != -1:speech_rate = int(lst[-1].strip())elif lst[0].find("pitch_rate") != -1:pitch_rate = int(lst[-1].strip())"""print(speaker)print(volume)print(speech_rate)print(pitch_rate)"""client = ali_speech.NlsClient()# 设置输出日志信息的级别：DEBUG、INFO、WARNING、ERRORclient.set_log_level('ERROR')appkey = 'xxxxxxxxxxxx'token = get_token()wav_path = os.path.dirname(words_file)with open(words_file,'r',encoding='utf-8') as r_file:r_lines = r_file.readlines()word_idx = 0for line in r_lines:text = line.replace("\n","").replace("\r","").replace("\t"," ")text = ' '.join(text.split())if len(text):if word_idx < 10:audio_name = wav_path + "/00" + str(word_idx) + "-" + text.replace(" ","-") + ".wav"elif word_idx < 100:audio_name = wav_path + "/0" + str(word_idx) + "-" + text.replace(" ","-") + ".wav"else:audio_name = wav_path + "/" + str(word_idx) + "-" + text.replace(" ","-") + ".wav"process_multithread(client, appkey, token, text, audio_name)word_idx += 1global MAX_THREADif (word_idx % MAX_THREAD == 0):for thread in thread_list:thread.join()thread_list = []for thread in thread_list:thread.join()os.system("stty sane")if __name__ == "__main__":if len(sys.argv) < 2:print("ali tts arg error")print(sys._getframe().f_lineno)exit()main()

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。