实时中文语音识别,离线Python最便捷方式
废话不多说,直接上代码,先安装环境
需要安装的包:
json
pyaudio
numpy
vosk
新建一个py文件写入:
import json
import pyaudio
import numpy as np
from vosk import Model, KaldiRecognizer, SetLogLevel
def SaveWave(model):
# 设置音频参数
FORMAT = pyaudio.paInt16 # 音频流的格式
RATE = 44100 # 采样率,单位Hz
CHUNK = 4000 # 单位帧
THRESHOLDNUM = 30 # 静默时间,超过这个个数就保存文件
THRESHOLD = 100 # 设定停止采集阈值
audio = pyaudio.PyAudio()
stream = audio.open(format=FORMAT,
channels=1,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
frames = []
print("开始录音...")
count = 0
while count < THRESHOLDNUM:
data = stream.read(CHUNK, exception_on_overflow=False)
np_data = np.frombuffer(data, dtype=np.int16)
frame_energy = np.mean(np.abs(np_data))
# print(frame_energy)
# 如果能量低于阈值持续时间过长,则停止录音
if frame_energy < THRESHOLD:
count += 1
elif count > 0:
count -= 1
frames.append(data)
print("停止录音!")
stream.stop_stream()
stream.close()
audio.terminate()
rec = KaldiRecognizer(model, RATE)
rec.SetWords(True)
str_ret = ""
for data in frames:
if rec.AcceptWaveform(data):
result = json.loads(rec.Result())
if 'text' in result:
str_ret += result['text']
result = json.loads(rec.FinalResult())
if 'text' in result:
str_ret += result['text']
str_ret = "".join(str_ret.split())
return str_ret
if __name__ == "__main__":
model = Model("vosk-model-small-cn-0.22")
SetLogLevel(-1)
while 1:
res = SaveWave(model)
if res != "" and res != None:
print(res)
下载
https://alphacephei.com/vosk/models/vosk-model-small-cn-0.22.zip
并解压到和上述文件同目录下并开始运行即可!
运行效果:
开始录音...
停止录音
我说一句话
方便救急,比配其他环境快得多,5分钟就能跑起来!!!
给出一个比较大的模型:
https://alphacephei.com/vosk/models/vosk-model-cn-0.15.zip
替换路径即可
作者:迟钝皮纳德