安装依赖:
pip install azure-cognitiveservices-speech
代码如下:(大家可以换成自己的 key)
import azure.cognitiveservices.speech as speechsdk
import time
import re
from xml.sax.saxutils import escape
def start_voice_interpreter():
speech_key = "4ahKWCGXqoob2qwwel9HPTebKD0Mk1WMg43EDoaieIPJ6V33XyWGJQQJ99BLACYeBjFXJ3w3AAAYACOGLK1w"
service_region = "eastus"
# === 1. 定义支持的语言配置 ===
# Azure 自动检测最多支持 10 种语言。这里配置了最常用的 9 种外语 + 中文。
# 格式: (识别语言代码, 翻译目标代码, 语音合成人名称)
langs_setup = [
("zh-CN", "zh-Hans", "zh-CN-XiaoxiaoNeural"), # 中文
("en-US", "en", "en-US-AvaNeural"), # 英语
("ja-JP", "ja", "ja-JP-NanamiNeural"), # 日语
("ko-KR", "ko", "ko-KR-SunHiNeural"), # 韩语
("fr-FR", "fr", "fr-FR-DeniseNeural"), # 法语
("es-ES", "es", "es-ES-ElviraNeural"), # 西班牙语
("de-DE", "de", "de-DE-KatjaNeural"), # 德语
("ru-RU", "ru", "ru-RU-SvetlanaNeural"), # 俄语
("it-IT", "it", "it-IT-ElsaNeural"), # 意大利语
("pt-BR", "pt", "pt-BR-FranciscaNeural") # 葡萄牙语
]
# 提取检测列表 (Azure 限制最多 10 个)
detect_candidates = [x[0] for x in langs_setup]
# 建立映射方便查找
# 识别代码 -> 翻译代码 (如 'ja-JP' -> 'ja')
src_to_target_map = {x[0]: x[1] for x in langs_setup}
# 翻译代码 -> 发音人 (如 'ja' -> 'ja-JP-NanamiNeural')
voice_map = {x[1]: x[2] for x in langs_setup}
# === 2. 初始化配置 ===
translation_config = speechsdk.translation.SpeechTranslationConfig(
subscription=speech_key, region=service_region)
# 添加所有语言作为翻译目标 (因为我们可能需要把中文翻译成任何一种外语)
for _, target_code, _ in langs_setup:
translation_config.add_target_language(target_code)
# 自动语言检测配置
auto_detect_config = speechsdk.languageconfig.AutoDetectSourceLanguageConfig(
languages=detect_candidates
)
# 语音合成配置
tts_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=tts_config)
# 识别器
audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
recognizer = speechsdk.translation.TranslationRecognizer(
translation_config=translation_config,
audio_config=audio_config,
auto_detect_source_language_config=auto_detect_config
)
# 强制连续识别模式
recognizer.properties.set_property(
property_id=speechsdk.PropertyId.SpeechServiceConnection_LanguageIdMode,
value='Continuous'
)
# === 状态变量 ===
# 用于记录“当前对话的外语是什么”。默认英语。
# 当识别到别人说日语时,这个变量会变成 'ja'。之后你说中文,就会翻译成 'ja'。
state = {"last_foreign_target": "en"}
# === 播放函数 ===
def play_translation(text, language_code):
voice_name = voice_map.get(language_code, "en-US-AvaNeural")
# 简单的 XML 转义防止报错
safe_text = escape(text)
ssml_string = f"""
<speak version='1.0' xml:lang='{language_code}'>
<voice name='{voice_name}'>{safe_text}</voice>
</speak>
"""
try:
speech_synthesizer.speak_ssml_async(ssml_string)
except Exception as e:
print(f"[播放错误]: {e}")
# === 结果处理回调 ===
def result_callback(evt):
if evt.result.reason == speechsdk.ResultReason.TranslatedSpeech:
# 获取 Azure 检测到的源语言 (例如 "ja-JP")
detected_src_lang = evt.result.properties.get(
speechsdk.PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult
)
text = evt.result.text
# 找到对应的简写代码 (例如 "ja")
# Azure 有时返回 "ja-JP" 有时可能带额外信息,这里做个模糊匹配
current_lang_code = None
for src_full, target_short, _ in langs_setup:
if src_full in detected_src_lang:
current_lang_code = target_short
break
if not current_lang_code:
print(f">> [忽略]: 无法识别的语言代码 {detected_src_lang}")
return
print(f"\n[识别语言]: {detected_src_lang} ({current_lang_code}) | [原文]: {text}")
# 逻辑分支
# --- 情况 A: 识别到中文 (你自己说话) ---
if current_lang_code == "zh-Hans":
# 检查是否误判 (识别为中文但全是英文字母)
is_latin_text = bool(re.search(r"[a-zA-Z]", text))
has_chinese_char = bool(re.search(r"[\u4e00-\u9fa5]", text))
if is_latin_text and not has_chinese_char:
print(">> [系统警告]: 检测到中文模式,但内容似英文,Azure 可能误判,跳过翻译。")
return
# 正常中文 -> 翻译成“最近一次的外语”
target_lang = state["last_foreign_target"]
trans_text = evt.result.translations.get(target_lang, "")
print(f"[中文 -> {target_lang}]: {trans_text}")
play_translation(trans_text, target_lang)
# --- 情况 B: 识别到外语 (对方说话) ---
else:
# 更新状态:记住这个外语
state["last_foreign_target"] = current_lang_code
# 外语 -> 翻译成中文
target_lang = "zh-Hans"
trans_text = evt.result.translations.get(target_lang, "")
print(f"[{current_lang_code} -> 中文]: {trans_text}")
play_translation(trans_text, target_lang)
recognizer.recognized.connect(result_callback)
# 开始
print("--------------------------------------------------")
print("万能翻译官已启动。")
print("1. 听到外语(英/日/韩/法/德/西/俄/意/葡) -> 翻译成中文")
print("2. 听到中文 -> 翻译成刚才那门外语 (默认英语)")
print("--------------------------------------------------")
recognizer.start_continuous_recognition()
try:
while True: time.sleep(0.5)
except KeyboardInterrupt:
recognizer.stop_continuous_recognition()
if __name__ == "__main__":
start_voice_interpreter()