pyttsx3
pip install pyttsx3import pyttsx3def say_text(engine, words, voice):# 设置音色engine.setProperty('voice', voice)engine.say(words)engine.runAndWait()engine.stop()def main():# 创建对象engine = pyttsx3.init()# 设置语音速率engine.setProperty('rate', 100)# 设置语音音量,音量最小为 0,最大为 1engine.setProperty('volume', 1.0)# words = input('请输入要转换的文本:')words = '这是一个小练习'say_text(engine, words, 'zh')if __name__ == '__main__':main()
tts
pip install tts
from gtts import gTTSimport oslanguage = 'zh' # 或 zh-tw# slow=False时,语速为Normaloutput = gTTS(text="这是中文", lang=language, slow=False)output.save(filename)# Play the converted fileos.system(f"start {filename}")
win32com
python -m pip install pyHookpython -m pip install pywin32
import win32com.clientspeaker = win32com.client.Dispatch("SAPI.SpVoice")speaker.Speak("你好,这是中文!")
ttskit
python -m pip install -U ttskit pyworldpython -m pip install ffmpeg
zhtts
python3 -m pip install -U ffmpeg pyworld ttskit # 会安装tensorflow-cpu版本,但使用时会报CUDA的错python3 -m pip install tensorflow-gpu
import zhttsimport sounddevice as sdtts = zhtts.TTS() # use fastspeech2 by defaulttext1 = ",这是一个开源的端到端中文语音合成系统"# 下面是自带的函数,借助Pycharm查看出来的。audio为numpy数组可直接传入播放器。mel = tts.text2mel(text1)print(mel.shape, type(mel))audio = tts.mel2audio(mel)print(audio, type(audio))# 下面这里可以先看【6.sounddevice播放音频】sd.play(audio, samplerate=24000) # samplerate=24000为通过其他包转换为.wav文件,再读取该文件获取的sd.wait()
souddevice播放音频
python -m pip install soundfile sounddevice
import soundfileimport sounddevicedevs=sd.query_devices() #返回系统所有的声音设备print(devs) # 带><的是默认播放设备# 1 代表设备号;麦克风阵列 (Synaptics Audio)代表设备名称;MME (2 in, 0 out)代表驱动为MME和2个输入通道"""0 Microsoft Sound Mapper - Input, MME (2 in, 0 out)> 1 麦克风阵列 (Synaptics Audio), MME (2 in, 0 out)2 Microsoft Sound Mapper - Output, MME (0 in, 2 out)< 3 扬声器 (Synaptics Audio), MME (0 in, 2 out)4 主声音捕获驱动程序, Windows DirectSound (2 in, 0 out)5 麦克风阵列 (Synaptics Audio), Windows DirectSound (2 in, 0 out)6 主声音驱动程序, Windows DirectSound (0 in, 2 out)7 扬声器 (Synaptics Audio), Windows DirectSound (0 in, 2 out)8 扬声器 (Synaptics Audio), Windows WASAPI (0 in, 2 out)9 麦克风阵列 (Synaptics Audio), Windows WASAPI (2 in, 0 out)10 麦克风阵列 1 (Synaptics Audio capture), Windows WDM-KS (2 in, 0 out)11 麦克风阵列 2 (Synaptics Audio capture), Windows WDM-KS (4 in, 0 out)12 麦克风阵列 3 (Synaptics Audio capture), Windows WDM-KS (4 in, 0 out)13 Output 1 (Synaptics Audio output), Windows WDM-KS (0 in, 2 out)14 Output 2 (Synaptics Audio output), Windows WDM-KS (0 in, 8 out)15 Input (Synaptics Audio output), Windows WDM-KS (2 in, 0 out)16 耳机 (@System32\drivers\bthhfenum.sys,#2;%1 Hands-Free AG Audio%0;(iGene-U2)), Windows WDM-KS (0 in, 1 out)17 耳机 (@System32\drivers\bthhfenum.sys,#2;%1 Hands-Free AG Audio%0;(iGene-U2)), Windows WDM-KS (1 in, 0 out)18 耳机 (), Windows WDM-KS (0 in, 2 out)"""data, sampler = soundfile.read(path)try:sd.default.device[1] = sd.default.device[1]# 可以通过设备号指定播放设备sd.play(data, samplerate=sampler) # data为numpy数组,samplerate可能为采样率吧(不太清楚)except Exception as e:print("播放失败")sd.wait()
Python语音转文字
import pyttsx3import ioimport syssys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')engine = pyttsx3.init()engine.setProperty('voice', 'zh')engine.say('请输入您要播放的文件路径')engine.runAndWait()path_dir = input('请输入您要播放的文件路径后回车:')with open(path_dir, 'r') as f:engine = pyttsx3.init()engine.setProperty('voice', 'zh')lines = f.readlines()print('要说的话:',lines)for line in lines:engine.say(line)engine.runAndWait()"""Saving Voice to a file"""engine = pyttsx3.init()engine.setProperty('voice', 'zh')# On linux make sure that 'espeak' and 'ffmpeg' are installedengine.save_to_file('迪迦奥特曼(dijia.top)', 'test.mp3')engine.runAndWait()engine.stop()
speech 实现语音识别
import speechwhile True:say = speech.input() # 接收语音speech.say("you said:"+say) #说话if say == "你好":speech.say("How are you?")elif say == "天气":speech.say("今天天气晴!")
SpeechRecognation
pip install SpeechRecognition