question-mark
Stuck on an issue?

Lightrun Answers was designed to reduce the constant googling that comes with debugging 3rd party libraries. It collects links to all the places you might be looking at while hunting down a tough bug.

And, if you’re still stuck at the end, we’re happy to hop on a call to see how we can help out.

Nodejs sample of continuous Language Identification with push stream

See original GitHub issue

I’m trying to set up continuous language identification for speech-to-text recognition. I converted a Python code sample to Node.js. While the Python code works fine, I’m getting an error in the Node.js code this.privAudioSource.id is not a function.

This is what we have done so far

"use strict";
const speechSdk = require('microsoft-cognitiveservices-speech-sdk');
const fs = require('fs');



(function () {
    const filename = "inputtest_sg_125.wav";
    const key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
    const region = "xxxxxx"

    const pushStream = speechSdk.AudioInputStream.createPushStream();
    var audioConfig = speechSdk.AudioConfig.fromStreamInput(pushStream);

    // custom endpoint(as mentioned in the speechSDK docs)
    const endpointString = `wss://${region}.stt.speech.microsoft.com/speech/universal/v2`;
    const speechConfig = speechSdk.SpeechConfig.fromEndpoint(endpointString,key);
    speechConfig.setProperty(speechSdk.PropertyId.SpeechServiceConnection_LanguageIdMode, "continuous");
    // slect candidate languages
    var autoDetectConfig = (speechSdk.AutoDetectSourceLanguageConfig.fromLanguages(['en-SG', 'zh-CN']));

    // create the speech recognizer.
    var reco = new speechSdk.SpeechRecognizer(speechConfig, autoDetectConfig, audioConfig);

    // The event recognizing signals that an intermediate recognition result is received.
    // You will receive one or more recognizing events as a speech phrase is recognized, with each containing
    // more recognized speech. The event will contain the text for the recognition since the last phrase was recognized.
    reco.recognizing = function (s, e) {
        var str = "(recognizing) Reason: " + speechSdk.ResultReason[e.result.reason] + " Text: " + e.result.text;
        console.log(str);
    };

    // The event recognized signals that a final recognition result is received.
    // This is the final event that a phrase has been recognized.
    // For continuous recognition, you will get one recognized event for each phrase recognized.
    reco.recognized = function (s, e) {
        // Indicates that recognizable speech was not detected, and that recognition is done.
        if (e.result.reason === speechSdk.ResultReason.NoMatch) {
            var noMatchDetail = speechSdk.NoMatchDetails.fromResult(e.result);
            console.log("\r\n(recognized)  Reason: " + speechSdk.ResultReason[e.result.reason] + " NoMatchReason: " + speechSdk.NoMatchReason[noMatchDetail.reason]);
        } else {
            console.log("\r\n(recognized)  Reason: " + speechSdk.ResultReason[e.result.reason] + " Text: " + e.result.text);
        }
    };

    // The event signals that the service has stopped processing speech.
    // https://docs.microsoft.com/javascript/api/microsoft-cognitiveservices-speech-sdk/speechrecognitioncanceledeventargs?view=azure-node-latest
    // This can happen for two broad classes of reasons.
    // 1. An error is encountered.
    //    In this case the .errorDetails property will contain a textual representation of the error.
    // 2. Speech was detected to have ended.
    //    This can be caused by the end of the specified file being reached, or ~20 seconds of silence from a microphone input.
    reco.canceled = function (s, e) {
        var str = "(cancel) Reason: " + speechSdk.CancellationReason[e.reason];
        if (e.reason === speechSdk.CancellationReason.Error) {
            str += ": " + e.errorDetails;
        }
        console.log(str);
    };

    // Signals that a new session has started with the speech service
    reco.sessionStarted = function (s, e) {
        var str = "(sessionStarted) SessionId: " + e.sessionId;
        console.log(str);
        const { WaveFile } = require("wavefile");

        const wav = new WaveFile(fs.readFileSync(filename));
        pushStream.write(wav.data.samples);
    };

    // Signals the end of a session with the speech service.
    reco.sessionStopped = function (s, e) {
        var str = "(sessionStopped) SessionId: " + e.sessionId;
        console.log(str);
    };

    // Signals that the speech service has started to detect speech.
    reco.speechStartDetected = function (s, e) {
        var str = "(speechStartDetected) SessionId: " + e.sessionId;
        console.log(str);
    };

    // Signals that the speech service has detected that speech has stopped.
    reco.speechEndDetected = function (s, e) {
        var str = "(speechEndDetected) SessionId: " + e.sessionId;
        console.log(str);
    };

    // start the recognizer and wait for a result.
    reco.startContinuousRecognitionAsync(
        function () { utils.call_log(_that.clientId, "Recognizer online..."); }
    );


    // }

}());

The error I’m getting When I run the above code snippet

C:\onecg\autopilot\node_modules\microsoft-cognitiveservices-speech-sdk\distrib\lib\src\common.speech\ServiceRecognizerBase.js:86
        this.privRequestSession = new Exports_4.RequestSession(this.privAudioSource.id());
                                                                                    ^

TypeError: this.privAudioSource.id is not a function
    at SpeechServiceRecognizer.ServiceRecognizerBase (C:\onecg\autopilot\node_modules\microsoft-cognitiveservices-speech-sdk\distrib\lib\src\common.speech\ServiceRecognizerBase.js:86:85)
    at new SpeechServiceRecognizer (C:\onecg\autopilot\node_modules\microsoft-cognitiveservices-speech-sdk\distrib\lib\src\common.speech\SpeechServiceRecognizer.js:61:28)
    at SpeechRecognizer.createServiceRecognizer (C:\onecg\autopilot\node_modules\microsoft-cognitiveservices-speech-sdk\distrib\lib\src\sdk\SpeechRecognizer.js:299:16)
    at SpeechRecognizer.Recognizer.implCommonRecognizerSetup (C:\onecg\autopilot\node_modules\microsoft-cognitiveservices-speech-sdk\distrib\lib\src\sdk\Recognizer.js:156:30)
    at SpeechRecognizer.Recognizer (C:\onecg\autopilot\node_modules\microsoft-cognitiveservices-speech-sdk\distrib\lib\src\sdk\Recognizer.js:61:14)
    at new SpeechRecognizer (C:\onecg\autopilot\node_modules\microsoft-cognitiveservices-speech-sdk\distrib\lib\src\sdk\SpeechRecognizer.js:76:24)
    at C:\onecg\autopilot\test-examples\azure_speech_to.js:23:16
    at Object.<anonymous> (C:\onecg\autopilot\test-examples\azure_speech_to.js:94:2)
    at Module._compile (node:internal/modules/cjs/loader:1196:14)
    at Object.Module._extensions..js (node:internal/modules/cjs/loader:1250:10)

Am I missing something?

Edited1: Added .wav extension Edited2: replaced the position of the key and endpoint

Issue Analytics

  • State:closed
  • Created 2 months ago
  • Comments:8 (4 by maintainers)

github_iconTop GitHub Comments

1reaction
glharpercommented, Jul 12, 2023

but now I’m getting this error (cancel) Reason: Error: Unable to contact server. StatusCode: 1006, undefined Reason: getaddrinfo ENOTFOUND [undefined.stt.speech.microsoft.com](http://undefined.stt.speech.microsoft.com/)

The region is not getting set correctly. You don’t actually need to set the custom endpoint above if you set autoDetectConfig.mode = speechSdk.LanguageIdMode.Continuous; (That automatically sets the endpoint to v2.) You also don’t need to set the SpeechServiceConnection_LanguageIdMode if you use the autoDetectConfig.mode property.

Using that mode property for autoDetectConfig, you can then use the normal const speechConfig = speechSdk.SpeechConfig.fromSubscription(key, region); pattern, which should set the region for your endpoint properly.

0reactions
imkhubaibrazacommented, Jul 12, 2023

@glharper Thank you so much! It’s working now

Read more comments on GitHub >

github_iconTop Results From Across the Web

Cognitive Services Speech SDK for JavaScript
The previous example uses single-shot recognition, which recognizes a single utterance. You can also use continuous recognition to control when ...
Read more >
Node.js Streams: Everything you need to know
When we push a null object, that means we want to signal that the stream does not have any more data. To consume...
Read more >
Node.js Readable Streams Explained
Implementing read streams in Node.js can be confusing. Streams are very stateful, so how they function can depend on the mode they're in....
Read more >
Speech Recognititon Streaming API
This Streaming API provides an interface to accept chunks of continuous audio stream ... in multiple programming languages like python, nodejs, java, etc....
Read more >
HTTP/2 | Node.js v20.5.1 Documentation
A new HTTP/2 HEADERS frame with a previously unused stream ID is received;; The http2stream.pushStream() method is called. On the client side, instances...
Read more >

github_iconTop Related Medium Post

No results found

github_iconTop Related StackOverflow Question

No results found

github_iconTroubleshoot Live Code

Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start Free

github_iconTop Related Reddit Thread

No results found

github_iconTop Related Hackernoon Post

No results found

github_iconTop Related Tweet

No results found

github_iconTop Related Dev.to Post

No results found

github_iconTop Related Hashnode Post

No results found