Skip to content

Commit b4bcbbb

Browse files
Add support for multiple languages for Azure transcriber (vocodedev#295)
* Add support for multiple languages for Azure transcriber * cleanup --------- Co-authored-by: Kian <kianhooshmand@berkeley.edu>
1 parent a4e112d commit b4bcbbb

File tree

2 files changed

+27
-7
lines changed

2 files changed

+27
-7
lines changed

vocode/streaming/models/transcriber.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
from .audio_encoding import AudioEncoding
1414
from .model import TypedModel
1515

16+
AZURE_DEFAULT_LANGUAGE = "en-US"
17+
1618

1719
class TranscriberType(str, Enum):
1820
BASE = "transcriber_base"
@@ -119,7 +121,8 @@ class GoogleTranscriberConfig(TranscriberConfig, type=TranscriberType.GOOGLE.val
119121

120122

121123
class AzureTranscriberConfig(TranscriberConfig, type=TranscriberType.AZURE.value):
122-
pass
124+
language: str = AZURE_DEFAULT_LANGUAGE
125+
candidate_languages: Optional[List[str]] = None
123126

124127

125128
class AssemblyAITranscriberConfig(

vocode/streaming/transcriber/azure_transcriber.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
1-
import asyncio
21
import logging
32
import queue
43
from typing import Optional
5-
import threading
64

75
from azure.cognitiveservices.speech.audio import (
8-
AudioInputStream,
96
PushAudioInputStream,
107
AudioStreamFormat,
118
AudioStreamWaveFormat,
@@ -54,9 +51,29 @@ def __init__(
5451
region=getenv("AZURE_SPEECH_REGION"),
5552
)
5653

57-
self.speech = speechsdk.SpeechRecognizer(
58-
speech_config=speech_config, audio_config=config
59-
)
54+
speech_params = {
55+
"speech_config": speech_config,
56+
"audio_config": config,
57+
}
58+
59+
if self.transcriber_config.candidate_languages:
60+
speech_config.set_property(
61+
property_id=speechsdk.PropertyId.SpeechServiceConnection_LanguageIdMode,
62+
value="Continuous",
63+
)
64+
auto_detect_source_language_config = (
65+
speechsdk.languageconfig.AutoDetectSourceLanguageConfig(
66+
languages=self.transcriber_config.candidate_languages
67+
)
68+
)
69+
70+
speech_params[
71+
"auto_detect_source_language_config"
72+
] = auto_detect_source_language_config
73+
else:
74+
speech_params["language"] = self.transcriber_config.language
75+
76+
self.speech = speechsdk.SpeechRecognizer(**speech_params)
6077

6178
self._ended = False
6279
self.is_ready = False

0 commit comments

Comments
 (0)