1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
| import json import os import pvporcupine import pyaudio import struct import wave import time import subprocess import tempfile import threading import queue import signal from aip import AipSpeech import appbuilder
with open(os.path.join(os.path.dirname(__file__), "config.json"), "r", encoding="utf-8") as f: CFG = json.load(f)
porc = pvporcupine.create(**CFG["porcupine"])
baidu_cfg = CFG["baidu_asr_tts"] client = AipSpeech(baidu_cfg["APP_ID"], baidu_cfg["API_KEY"], baidu_cfg["SECRET_KEY"])
os.environ["APPBUILDER_TOKEN"] = CFG["qianfan"]["APPBUILDER_TOKEN"] _qianfan_client = appbuilder.AppBuilderClient(CFG["qianfan"]["APP_ID"]) _qianfan_conversation_id = _qianfan_client.create_conversation()
def ask_qianfan(query: str) -> str: try: print("千帆请求中...") ans = _qianfan_client.run(_qianfan_conversation_id, query).content.answer print("千帆返回:", ans) return ans except Exception as e: print("千帆请求失败:", e) return "网络开小差了,稍后再试"
SAMPLE_RATE = 16000 FRAME_LEN = 512 SILENCE_SEC = 1.0 SILENCE_RMS = 150 WAV_FILE = "temp.wav"
pa = pyaudio.PyAudio() kw_stream = pa.open(format=pyaudio.paInt16, channels=1, rate=SAMPLE_RATE, input=True, frames_per_buffer=FRAME_LEN)
def play_prompt(audio_file): if os.path.exists(audio_file): os.system(f"aplay -q {audio_file}") else: print(f"警告:音频文件 {audio_file} 不存在,跳过播放")
stop_play = threading.Event() text_queue = queue.Queue() preload_queue = queue.Queue(maxsize=1) _play_thread = None
def _play_worker(): while True: text = text_queue.get() if text is None: break
stop_play.clear() if stop_play.is_set(): continue answer = ask_qianfan(text) if stop_play.is_set(): continue
seg_list, cur, cur_len = [], '', 0 for ch in answer: ch_len = len(ch.encode('gbk', errors='ignore')) if cur_len + ch_len > 512 and cur: seg_list.append(cur) cur, cur_len = ch, ch_len else: cur += ch cur_len += ch_len if cur: seg_list.append(cur) if not seg_list: continue if stop_play.is_set(): continue
tmp_base = tempfile.mktemp() n = len(seg_list)
def _preload_one(idx): if idx >= n or stop_play.is_set(): return None seg = seg_list[idx] try: print(f"TTS 合成段落 {idx}: {seg}") wav_bytes = client.synthesis(seg, 'zh', 1, {'spd': 5, 'pit': 5, 'vol': 9, 'per': 0}) if not isinstance(wav_bytes, bytes): return None except Exception as e: print('TTS 异常:', e) return None raw = tmp_base + f'_{idx}_raw.wav' with open(raw, 'wb') as f: f.write(wav_bytes) target = tmp_base + f'_{idx}_16k.wav' subprocess.run(['ffmpeg', '-y', '-i', raw, '-ar', '16000', '-ac', '1', '-sample_fmt', 's16', '-b:a', '256k', target], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) os.unlink(raw) return target
next_wav = _preload_one(0) if not next_wav: continue
for i in range(n): if stop_play.is_set(): try: os.unlink(next_wav) except: pass break print("播放段落", i) player = subprocess.Popen(['aplay', '-q', next_wav]) if i + 1 < n: t = threading.Thread(target=lambda idx: preload_queue.put(_preload_one(idx)), args=(i + 1,), daemon=True) t.start() while player.poll() is None: if stop_play.is_set(): player.send_signal(signal.SIGTERM) player.wait(timeout=0.5) break time.sleep(0.05) try: os.unlink(next_wav) except: pass if stop_play.is_set(): break if i + 1 < n: try: next_wav = preload_queue.get(timeout=10) except queue.Empty: next_wav = None if not next_wav: break
_play_thread = threading.Thread(target=_play_worker, daemon=True) _play_thread.start()
def record_audio(filename=WAV_FILE, silence_sec=SILENCE_SEC, threshold=SILENCE_RMS): print("开始录音...") max_silent = int(silence_sec * SAMPLE_RATE / FRAME_LEN) rec_stream = pa.open(format=pyaudio.paInt16, channels=1, rate=SAMPLE_RATE, input=True, frames_per_buffer=FRAME_LEN * 4) frames, silent_frames = [], 0 for _ in range(max_silent * 10): try: data = rec_stream.read(FRAME_LEN, exception_on_overflow=False) except OSError: continue frames.append(data) pcm = struct.unpack("h" * FRAME_LEN, data) rms = (sum(x * x for x in pcm) / FRAME_LEN) ** 0.5 print(f"\rRMS={rms:.0f} ", end="", flush=True) if rms < threshold: silent_frames += 1 else: silent_frames = 0 if silent_frames > max_silent: break rec_stream.stop_stream(); rec_stream.close() print("\n录音结束") with wave.open(filename, 'wb') as wf: wf.setnchannels(1) wf.setsampwidth(pa.get_sample_size(pyaudio.paInt16)) wf.setframerate(SAMPLE_RATE) wf.writeframes(b''.join(frames)) return filename
def asr(filename): with open(filename, 'rb') as f: res = client.asr(f.read(), 'wav', 16000, {'dev_pid': 1537}) return res['result'][0] if res.get('err_no') == 0 else None
try: print("等待唤醒词“小派”...") while True: pcm = kw_stream.read(FRAME_LEN, exception_on_overflow=False) pcm = struct.unpack("h" * FRAME_LEN, pcm) if porc.process(pcm) >= 0: print("\n【唤醒】检测到“小派”!") stop_play.set() play_prompt(CFG["audio"]["prompt_wav"])
wav = record_audio() play_prompt(CFG["audio"]["confirm_wav"]) text = asr(wav) if text: print("识别结果:", text) text_queue.put(text) else: text_queue.put("我没听清,请再说一遍") print("等待下次唤醒...") except KeyboardInterrupt: print("\n程序退出") finally: stop_play.set() text_queue.put(None) _play_thread.join(timeout=2) kw_stream.close() pa.terminate() porc.delete()
|