Nodejs sample of continuous Language Identification with push stream
See original GitHub issueI’m trying to set up continuous language identification for speech-to-text recognition. I converted a Python code sample to Node.js. While the Python code works fine, I’m getting an error in the Node.js code this.privAudioSource.id is not a function
.
This is what we have done so far
"use strict";
const speechSdk = require('microsoft-cognitiveservices-speech-sdk');
const fs = require('fs');
(function () {
const filename = "inputtest_sg_125.wav";
const key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
const region = "xxxxxx"
const pushStream = speechSdk.AudioInputStream.createPushStream();
var audioConfig = speechSdk.AudioConfig.fromStreamInput(pushStream);
// custom endpoint(as mentioned in the speechSDK docs)
const endpointString = `wss://${region}.stt.speech.microsoft.com/speech/universal/v2`;
const speechConfig = speechSdk.SpeechConfig.fromEndpoint(endpointString,key);
speechConfig.setProperty(speechSdk.PropertyId.SpeechServiceConnection_LanguageIdMode, "continuous");
// slect candidate languages
var autoDetectConfig = (speechSdk.AutoDetectSourceLanguageConfig.fromLanguages(['en-SG', 'zh-CN']));
// create the speech recognizer.
var reco = new speechSdk.SpeechRecognizer(speechConfig, autoDetectConfig, audioConfig);
// The event recognizing signals that an intermediate recognition result is received.
// You will receive one or more recognizing events as a speech phrase is recognized, with each containing
// more recognized speech. The event will contain the text for the recognition since the last phrase was recognized.
reco.recognizing = function (s, e) {
var str = "(recognizing) Reason: " + speechSdk.ResultReason[e.result.reason] + " Text: " + e.result.text;
console.log(str);
};
// The event recognized signals that a final recognition result is received.
// This is the final event that a phrase has been recognized.
// For continuous recognition, you will get one recognized event for each phrase recognized.
reco.recognized = function (s, e) {
// Indicates that recognizable speech was not detected, and that recognition is done.
if (e.result.reason === speechSdk.ResultReason.NoMatch) {
var noMatchDetail = speechSdk.NoMatchDetails.fromResult(e.result);
console.log("\r\n(recognized) Reason: " + speechSdk.ResultReason[e.result.reason] + " NoMatchReason: " + speechSdk.NoMatchReason[noMatchDetail.reason]);
} else {
console.log("\r\n(recognized) Reason: " + speechSdk.ResultReason[e.result.reason] + " Text: " + e.result.text);
}
};
// The event signals that the service has stopped processing speech.
// https://docs.microsoft.com/javascript/api/microsoft-cognitiveservices-speech-sdk/speechrecognitioncanceledeventargs?view=azure-node-latest
// This can happen for two broad classes of reasons.
// 1. An error is encountered.
// In this case the .errorDetails property will contain a textual representation of the error.
// 2. Speech was detected to have ended.
// This can be caused by the end of the specified file being reached, or ~20 seconds of silence from a microphone input.
reco.canceled = function (s, e) {
var str = "(cancel) Reason: " + speechSdk.CancellationReason[e.reason];
if (e.reason === speechSdk.CancellationReason.Error) {
str += ": " + e.errorDetails;
}
console.log(str);
};
// Signals that a new session has started with the speech service
reco.sessionStarted = function (s, e) {
var str = "(sessionStarted) SessionId: " + e.sessionId;
console.log(str);
const { WaveFile } = require("wavefile");
const wav = new WaveFile(fs.readFileSync(filename));
pushStream.write(wav.data.samples);
};
// Signals the end of a session with the speech service.
reco.sessionStopped = function (s, e) {
var str = "(sessionStopped) SessionId: " + e.sessionId;
console.log(str);
};
// Signals that the speech service has started to detect speech.
reco.speechStartDetected = function (s, e) {
var str = "(speechStartDetected) SessionId: " + e.sessionId;
console.log(str);
};
// Signals that the speech service has detected that speech has stopped.
reco.speechEndDetected = function (s, e) {
var str = "(speechEndDetected) SessionId: " + e.sessionId;
console.log(str);
};
// start the recognizer and wait for a result.
reco.startContinuousRecognitionAsync(
function () { utils.call_log(_that.clientId, "Recognizer online..."); }
);
// }
}());
The error I’m getting When I run the above code snippet
C:\onecg\autopilot\node_modules\microsoft-cognitiveservices-speech-sdk\distrib\lib\src\common.speech\ServiceRecognizerBase.js:86
this.privRequestSession = new Exports_4.RequestSession(this.privAudioSource.id());
^
TypeError: this.privAudioSource.id is not a function
at SpeechServiceRecognizer.ServiceRecognizerBase (C:\onecg\autopilot\node_modules\microsoft-cognitiveservices-speech-sdk\distrib\lib\src\common.speech\ServiceRecognizerBase.js:86:85)
at new SpeechServiceRecognizer (C:\onecg\autopilot\node_modules\microsoft-cognitiveservices-speech-sdk\distrib\lib\src\common.speech\SpeechServiceRecognizer.js:61:28)
at SpeechRecognizer.createServiceRecognizer (C:\onecg\autopilot\node_modules\microsoft-cognitiveservices-speech-sdk\distrib\lib\src\sdk\SpeechRecognizer.js:299:16)
at SpeechRecognizer.Recognizer.implCommonRecognizerSetup (C:\onecg\autopilot\node_modules\microsoft-cognitiveservices-speech-sdk\distrib\lib\src\sdk\Recognizer.js:156:30)
at SpeechRecognizer.Recognizer (C:\onecg\autopilot\node_modules\microsoft-cognitiveservices-speech-sdk\distrib\lib\src\sdk\Recognizer.js:61:14)
at new SpeechRecognizer (C:\onecg\autopilot\node_modules\microsoft-cognitiveservices-speech-sdk\distrib\lib\src\sdk\SpeechRecognizer.js:76:24)
at C:\onecg\autopilot\test-examples\azure_speech_to.js:23:16
at Object.<anonymous> (C:\onecg\autopilot\test-examples\azure_speech_to.js:94:2)
at Module._compile (node:internal/modules/cjs/loader:1196:14)
at Object.Module._extensions..js (node:internal/modules/cjs/loader:1250:10)
Am I missing something?
Edited1: Added .wav extension Edited2: replaced the position of the key and endpoint
Issue Analytics
- State:
- Created 2 months ago
- Comments:8 (4 by maintainers)
Top Results From Across the Web
Cognitive Services Speech SDK for JavaScript
The previous example uses single-shot recognition, which recognizes a single utterance. You can also use continuous recognition to control when ...
Read more >Node.js Streams: Everything you need to know
When we push a null object, that means we want to signal that the stream does not have any more data. To consume...
Read more >Node.js Readable Streams Explained
Implementing read streams in Node.js can be confusing. Streams are very stateful, so how they function can depend on the mode they're in....
Read more >Speech Recognititon Streaming API
This Streaming API provides an interface to accept chunks of continuous audio stream ... in multiple programming languages like python, nodejs, java, etc....
Read more >HTTP/2 | Node.js v20.5.1 Documentation
A new HTTP/2 HEADERS frame with a previously unused stream ID is received;; The http2stream.pushStream() method is called. On the client side, instances...
Read more >Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start FreeTop Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
Top GitHub Comments
The region is not getting set correctly. You don’t actually need to set the custom endpoint above if you set
autoDetectConfig.mode = speechSdk.LanguageIdMode.Continuous;
(That automatically sets the endpoint to v2.) You also don’t need to set the SpeechServiceConnection_LanguageIdMode if you use the autoDetectConfig.mode property.Using that mode property for autoDetectConfig, you can then use the normal
const speechConfig = speechSdk.SpeechConfig.fromSubscription(key, region);
pattern, which should set the region for your endpoint properly.@glharper Thank you so much! It’s working now