Convert Speech to Text Using Web Speech API in JavaScript | by Gourav Kajal | Nov, 2022

Let’s read those voices!

photo by Vika Straberica Feather unsplash

Sometime back, I wrote an article where we learned how to convert text to speech web speech api, You can read that article here.

we can use web speech api To convert text to speech and speech/voice to text. Today we are going to learn how we can do that.

we will only focus on web speech api In this tutorial. I have prepared two initial files, language.js And index.htmlTo save some time.





integrity="sha384-giJF6kkoqNQ00vy+HMDP7azOuL0xtbfIcaT9wjKHr8RbDVddVHyTfAAsrekwKmP1" crossorigin="anonymous" />
Speech To Text



Select Language





Transcript












// language.js

var langs =
[['Afrikaans', ['af-ZA']],
['Bahasa Indonesia', ['id-ID']],
['Bahasa Melayu', ['ms-MY']],
['Català', ['ca-ES']],
['Čeština', ['cs-CZ']],
['Deutsch', ['de-DE']],
['English', ['en-AU', 'Australia'],
['en-CA', 'Canada'],
['en-IN', 'India'],
['en-NZ', 'New Zealand'],
['en-ZA', 'South Africa'],
['en-GB', 'United Kingdom'],
['en-US', 'United States']],
['Español', ['es-AR', 'Argentina'],
['es-BO', 'Bolivia'],
['es-CL', 'Chile'],
['es-CO', 'Colombia'],
['es-CR', 'Costa Rica'],
['es-EC', 'Ecuador'],
['es-SV', 'El Salvador'],
['es-ES', 'España'],
['es-US', 'Estados Unidos'],
['es-GT', 'Guatemala'],
['es-HN', 'Honduras'],
['es-MX', 'México'],
['es-NI', 'Nicaragua'],
['es-PA', 'Panamá'],
['es-PY', 'Paraguay'],
['es-PE', 'Perú'],
['es-PR', 'Puerto Rico'],
['es-DO', 'República Dominicana'],
['es-UY', 'Uruguay'],
['es-VE', 'Venezuela']],
['Euskara', ['eu-ES']],
['Français', ['fr-FR']],
['Galego', ['gl-ES']],
['Hrvatski', ['hr_HR']],
['IsiZulu', ['zu-ZA']],
['Íslenska', ['is-IS']],
['Italiano', ['it-IT', 'Italia'],
['it-CH', 'Svizzera']],
['Magyar', ['hu-HU']],
['Nederlands', ['nl-NL']],
['Norsk bokmål', ['nb-NO']],
['Polski', ['pl-PL']],
['Português', ['pt-BR', 'Brasil'],
['pt-PT', 'Portugal']],
['Română', ['ro-RO']],
['Slovenčina', ['sk-SK']],
['Suomi', ['fi-FI']],
['Svenska', ['sv-SE']],
['Türkçe', ['tr-TR']],
['български', ['bg-BG']],
['Pусский', ['ru-RU']],
['Српски', ['sr-RS']],
['한국어', ['ko-KR']],
['中文', ['cmn-Hans-CN', '普通话 (中国大陆)'],
['cmn-Hans-HK', '普通话 (香港)'],
['cmn-Hant-TW', '中文 (台灣)'],
['yue-Hant-HK', '粵語 (香港)']],
['日本語', ['ja-JP']],
['Lingua latīna', ['la']]];

let select_language = document.querySelector('#select_language');
let select_dialect = document.querySelector('#select_dialect');

for (var i = 0; i < langs.length; i++)
select_language.options[i] = new Option(langs[i][0], i);

select_language.selectedIndex = 6;
updateCountry();
select_dialect.selectedIndex = 6;

function updateCountry()
for (var i = select_dialect.options.length - 1; i >= 0; i--)
select_dialect.remove(i);

var list = langs[select_language.selectedIndex];
for (var i = 1; i < list.length; i++)
select_dialect.options.add(new Option(list[i][1], list[i][0]));

select_dialect.style.visibility = list[1].length == 1 ? 'hidden' : 'visible';

We’ve included two buttons to start and stop voice recognition, a box to show the transcript, and a language and dialect options menu in the opening code. We used Bootstrap to style the website. Now, if we open index.html In Chrome, you’ll see the following output:

Production

first create a new javascript file calling this speechRecognition.js, After that, put the script in index.html using file

Let's now inspect the script file to see if window Item contains webkitSpeechRecognition Class.

if ("webkitSpeechRecognition" in window) 

// Speech Recognition Stuff goes here

else
console.log("Speech Recognition Not Available")

From now on, everything we write will contain if Statement.

let's create an object for it webkitSpeechRecognition,

let speechRecognition = new webkitSpeechRecognition();

Let's adjust some settings for this speechRecognition Now object.

Depending on when the user stops speaking, the voice recognition object may either stop listening or continue listening until the user tells it to stop. you can set it false If you want to recognize a word or phrase. set it true for this article.

speechRecognition.continuous = true;

The results which are still preliminary are known as interim results. If you enable this option, speechRecognition The object will return both the final result and the interim result. let's change that true,

speechRecognition.interimResults = true;

The user will communicate using this language. To set this property, the locale code must be used. Please be aware that this function does not currently support all languages.

Sets the user's selected language from the select menu. The language attribute must be set to a value from the dialect selection menu.

speechRecognition.lang = document.querySelector("#select_dialect").value;

for events like onStart, onEnd, onResultAnd onErrorYou can provide callback.

onstart

This event is triggered when the user initiates speech recognition. Send a callback function to the webpage to show that the voice recognition frequency is listening.

In starter code, there is a

id element status He says Listening..., This element is currently hidden using CSS.

let's set it display: block When speech recognition starts.

speechRecognition.onstart = () => 
document.querySelector("#status").style.display = "block";
;

In the end

This event is set when the user turns off speech recognition. Let's pass a callback function that will hide the status of the webpage

Element.

let's set it display: none When speech recognition starts.

speechRecognition.onend = () => 
document.querySelector("#status").style.display = "none";
;

onError

This event is triggered when any type of speech recognition error occurs. Let's pass a callback function that will hide the status of the webpage

Element.

let's set it display: none When speech recognition starts.

speechRecognition.onError = () => 
document.querySelector("#status").style.display = "none";
;

onResult

when speechRecognition There are some results from the recognition of the object, this event is closed. Both final and interim findings will be included. passing a callback method that sets the result to appropriate Inside the transcript box will be helpful.

This is the HTML code for the transcript box of the website.




To be determined for the interim and final result span with id interim And finalrespectively.

callback function will get a event The object from the result event. The results will be stored in this object as an array. It would be possible to determine whether each entry in the array is a final result or an interim result by looking at isFinal Property.

Declare a variable for the interim transcript inside the callback method and one for the final transcript outside it.

let final_transcript = "";

speechRecognition.onresult = (event) =>
// Create the interim transcript string locally because we don't want it to persist like final transcript
let interim_transcript = "";
;

Let us now create a string using the result array. If the result item is the last one, we should loop through the process and add it to the final transcript. If not, then we should include it in the series of preliminary results.

// Loop through the results from the speech recognition object.
for (let i = event.resultIndex; i < event.results.length; ++i)
// If the result item is Final, add it to Final Transcript, Else add it to Interim transcript
if (event.results[i].isFinal)
final_transcript += event.results[i][0].transcript;
else
interim_transcript += event.results[i][0].transcript;

Let's finally add the transcript data to the DOM.

document.querySelector("#final").innerHTML = final_transcript;
document.querySelector("#interim").innerHTML = interim_transcript;

Now, here is the complete code for onResult Competition:

let final_transcript = "";

speechRecognition.onresult = (event) =>
// Create the interim transcript string locally because we don't want it to persist like final transcript
let interim_transcript = "";

// Loop through the results from the speech recognition object.
for (let i = event.resultIndex; i < event.results.length; ++i)
// If the result item is Final, add it to Final Transcript, Else add it to Interim transcript
if (event.results[i].isFinal)
final_transcript += event.results[i][0].transcript;
else
interim_transcript += event.results[i][0].transcript;

// Set the Final franscript and Interim transcript.
document.querySelector("#final").innerHTML = final_transcript;
document.querySelector("#interim").innerHTML = interim_transcript;
;

Let's finally start and end the validation.

To start and stop voice recognition, we need to set onClick Start and Stop button properties.

document.querySelector("#start").onclick = () => 
speechRecognition.start();
;
document.querySelector("#stop").onclick = () =>
speechRecognition.stop();
;

here is complete speechRecognition.js file:

if ("webkitSpeechRecognition" in window) 
// Initialize webkitSpeechRecognition
let speechRecognition = new webkitSpeechRecognition();

// String for the Final Transcript
let final_transcript = "";

// Set the properties for the Speech Recognition object
speechRecognition.continuous = true;
speechRecognition.interimResults = true;
speechRecognition.lang = document.querySelector("#select_dialect").value;

// Callback Function for the onStart Event
speechRecognition.onstart = () =>
// Show the Status Element
document.querySelector("#status").style.display = "block";
;
speechRecognition.onerror = () =>
// Hide the Status Element
document.querySelector("#status").style.display = "none";
;
speechRecognition.onend = () =>
// Hide the Status Element
document.querySelector("#status").style.display = "none";
;

speechRecognition.onresult = (event) =>
// Create the interim transcript string locally because we don't want it to persist like final transcript
let interim_transcript = "";

// Loop through the results from the speech recognition object.
for (let i = event.resultIndex; i < event.results.length; ++i)
// If the result item is Final, add it to Final Transcript, Else add it to Interim transcript
if (event.results[i].isFinal)
final_transcript += event.results[i][0].transcript;
else
interim_transcript += event.results[i][0].transcript;

// Set the Final transcript and Interim transcript.
document.querySelector("#final").innerHTML = final_transcript;
document.querySelector("#interim").innerHTML = interim_transcript;
;

// Set the onClick property of the start button
document.querySelector("#start").onclick = () =>
// Start the Speech Recognition
speechRecognition.start();
;
// Set the onClick property of the stop button
document.querySelector("#stop").onclick = () =>
// Stop the Speech Recognition
speechRecognition.stop();
;
else
console.log("Speech Recognition Not Available");

now, open index.html File in your favorite web browser to view the final output. It will ask you to access your system's microphone. press Allow For this. then click Start button and speak. You will see that it recognizes your voice and shows you the exact words you speak.

Production
final output

Congratulations! You learned something new today.

Leave a Reply