Language Identification from Blob Triggered Azure Function
See original GitHub issueDescribe the bug Goal is to identify the language from a Blob Stream inside of an Azure Function using the SourceLanguageRecognizer. I have tried two approaches which did not work.
- Using recognize_one() with a SAS URI to the blob
- Using start_continuous_recognition() with a PushAudioInputStream
To Reproduce Steps to reproduce the behavior:
- Recognizing with Blob Sas URI
def language_identification(locale, blobname):
languages.append(locale)
speech_config = speechsdk.SpeechConfig(subscription=subscription_key, region=service_region)
speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceConnection_SingleLanguageIdPriority,
value='Accuracy')
auto_detect_source_language_config = speechsdk.languageconfig.AutoDetectSourceLanguageConfig(languages=languages)
audio_input = speechsdk.audio.AudioConfig(filename=blobname)
speech_language_detection = speechsdk.SourceLanguageRecognizer(speech_config=speech_config,
auto_detect_source_language_config=auto_detect_source_language_config,
audio_config=audio_input)
result = speech_language_detection.recognize_once()
if result.reason == speechsdk.ResultReason.RecognizedSpeech:
logging.info("RECOGNIZED: {}".format(result))
locale = result.properties[speechsdk.PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult]
logging.info("Detected Language: {}".format(locale))
return locale
elif result.reason == speechsdk.ResultReason.NoMatch:
logging.warning("No speech could be recognized - use the phone country code as main language")
return locale
elif result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = result.cancellation_details
logging.info("Speech Recognition canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
raise Exception("Error details: {}".format(cancellation_details.error_details))
Where blobname is a correct Sas URI to the .wav file in the Azure Storage Account Here I receive the following error: seems like the SDK is not able to handle remote file locations
- Tried to read the audio Stream I receive from the Blob Storage trigger in the Azure Function
def language_identification_stream(locale, audioBlob):
languages.append(locale)
speech_config = speechsdk.SpeechConfig(subscription=subscription_key, region=service_region)
speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceConnection_SingleLanguageIdPriority,
value='Accuracy')
speech_config.set_property(speechsdk.PropertyId.Speech_LogFilename, "./LogFile.txt")
auto_detect_source_language_config = speechsdk.languageconfig.AutoDetectSourceLanguageConfig(languages=languages)
audio_steam = speechsdk.audio.PushAudioInputStream()
audio_input = speechsdk.audio.AudioConfig(stream=audio_steam)
source_language_recognizer = speechsdk.SourceLanguageRecognizer(speech_config=speech_config,
auto_detect_source_language_config=auto_detect_source_language_config,
audio_config=audio_input)
done = False
detailResult = ""
def stop_cb(evt):
"""callback that signals to stop continuous recognition upon receiving an event `evt`"""
print('CLOSING on {}'.format(evt))
nonlocal done
done = True
def audio_recognized(evt):
"""
callback that catches the recognized result of audio from an event 'evt'.
:param evt: event listened to catch recognition result.
:return:
"""
if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech:
logging.info("RECOGNIZED: {}".format(evt.result.properties))
if evt.result.properties.get(speechsdk.PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult) == None:
logging.info("Unable to detect any language")
else:
detectedSrcLang = evt.result.properties[speechsdk.PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult]
jsonResult = evt.result.properties[speechsdk.PropertyId.SpeechServiceResponse_JsonResult]
nonlocal detailResult
detailResult = json.loads(jsonResult)
startOffset = detailResult['Offset']
duration = detailResult['Duration']
if duration >= 0:
endOffset = duration + startOffset
else:
endOffset = 0
logging.info("Detected language = " + detectedSrcLang + ", startOffset = " + str(startOffset) + " nanoseconds, endOffset = " + str(endOffset) + " nanoseconds, Duration = " + str(duration) + " nanoseconds.")
# Connect callbacks to the events fired by the speech recognizer
source_language_recognizer.recognized.connect(audio_recognized)
source_language_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
source_language_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
source_language_recognizer.canceled.connect((lambda evt: print('CANCELED {}'.format(evt))))
# stop continuous recognition on either session stopped or canceled events
source_language_recognizer.session_stopped.connect(stop_cb)
source_language_recognizer.canceled.connect(stop_cb)
# Start continuous speech recognition
source_language_recognizer.start_continuous_recognition()
try:
while(True):
readSamples = audioBlob.read(size= 10000)
if not readSamples:
print("Print Writing Stream done")
logging.info("Logging Writing Stream done")
break
audio_steam.write(readSamples)
time.sleep(.1)
finally:
audio_steam.close()
audioBlob.close()
while not done:
time.sleep(.5)
source_language_recognizer.stop_continuous_recognition()
return detailResult
Where languages is an array of 4 language codes and audioBlob is of type azure.functions.blob.InputStream Error message:
Expected behavior Expecting a correct response from the SourceLanguageRecognizer for either stream or remote file.
Version of the Cognitive Services Speech SDK 1.19.0
Platform, Operating System, and Programming Language
- Linux Azure Function v3
- Programming language: Python 3.9.7
Additional context
LogFile.txt harounitalien_+396648414202_0064030f5e74b281-11111.zip
Issue Analytics
- State:
- Created 2 years ago
- Reactions:1
- Comments:16
Top GitHub Comments
Closing this issue as answered and solution given. @harounshehata Thanks a lot for the details, we will improve our documentation and LID service side to allow multiple formats.
@harounshehata Glad to know it helps,