Я работаю над чат-ботом WhatsApp, где я получаю URL-адрес файла аудиофайла (формат ogg) от Whatsapp, получаю буфер и загружаю этот файл на S3 (sample.ogg). Теперь, что нужно использовать Я создаю поток чтения файла и отправляю его на AWS транскрибировать. Я использую websocket, но получаю пустой ответ Иногда, когда я отвечаю Ммм мм мм. Кто-нибудь может сказать, что я делаю неправильно в своем коде
const express = require('express')
const app = express()
const fs = require('fs');
const crypto = require('crypto'); // tot sign our pre-signed URL
const v4 = require('./aws-signature-v4'); // to generate our pre-signed URL
const marshaller = require("@aws-sdk/eventstream-marshaller"); // for converting binary event stream messages to and from JSON
const util_utf8_node = require("@aws-sdk/util-utf8-node");
var WebSocket = require('ws') //for opening a web socket
// our converter between binary event streams messages and JSON
const eventStreamMarshaller = new marshaller.EventStreamMarshaller(util_utf8_node.toUtf8, util_utf8_node.fromUtf8);
// our global variables for managing state
let languageCode;
let region = 'ap-south-1';
let sampleRate;
let inputSampleRate;
let transcription = "";
let socket;
let micStream;
let socketError = false;
let transcribeException = false;
// let languageCode = 'en-us'
app.listen(8081, (error, data) => {
if(!error) {
console.log(`running at 8080----->>>>`)
}
})
let handleEventStreamMessage = function (messageJson) {
let results = messageJson.Transcript.Results;
if (results.length > 0) {
if (results[0].Alternatives.length > 0) {
let transcript = results[0].Alternatives[0].Transcript;
// fix encoding for accented characters
transcript = decodeURIComponent(escape(transcript));
console.log(`Transcpted is----->>${transcript}`)
}
}
}
function downsampleBuffer (buffer, inputSampleRate = 44100, outputSampleRate = 16000){
if (outputSampleRate === inputSampleRate) {
return buffer;
}
var sampleRateRatio = inputSampleRate / outputSampleRate;
var newLength = Math.round(buffer.length / sampleRateRatio);
var result = new Float32Array(newLength);
var offsetResult = 0;
var offsetBuffer = 0;
while (offsetResult < result.length) {
var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
var accum = 0,
count = 0;
for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++ ) {
accum += buffer[i];
count++;
}
result[offsetResult] = accum / count;
offsetResult++;
offsetBuffer = nextOffsetBuffer;
}
return result;
}
function pcmEncode(input) {
var offset = 0;
var buffer = new ArrayBuffer(input.length * 2);
var view = new DataView(buffer);
for (var i = 0; i < input.length; i++, offset += 2) {
var s = Math.max(-1, Math.min(1, input[i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
return buffer;
}
function getAudioEventMessage(buffer) {
// wrap the audio data in a JSON envelope
return {
headers: {
':message-type': {
type: 'string',
value: 'event'
},
':event-type': {
type: 'string',
value: 'AudioEvent'
}
},
body: buffer
};
}
function convertAudioToBinaryMessage(raw) {
if (raw == null)
return;
// downsample and convert the raw audio bytes to PCM
let downsampledBuffer = downsampleBuffer(raw, inputSampleRate);
let pcmEncodedBuffer = pcmEncode(downsampledBuffer);
setTimeout(function() {}, 1);
// add the right JSON headers and structure to the message
let audioEventMessage = getAudioEventMessage(Buffer.from(pcmEncodedBuffer));
//convert the JSON object + headers into a binary event stream message
let binary = eventStreamMarshaller.marshall(audioEventMessage);
return binary;
}
function createPresignedUrl() {
let endpoint = "transcribestreaming." + "us-east-1" + ".amazonaws.com:8443";
// get a preauthenticated URL that we can use to establish our WebSocket
return v4.createPresignedURL(
'GET',
endpoint,
'/stream-transcription-websocket',
'transcribe',
crypto.createHash('sha256').update('', 'utf8').digest('hex'), {
'key': <AWS_KEY>,
'secret': <AWS_SECRET_KEY>,
'protocol': 'wss',
'expires': 15,
'region': 'us-east-1',
'query': "language-code=" + 'en-US' + "&media-encoding=pcm&sample-rate=" + 8000
}
);
}
function showError(message) {
console.log("Error: ",message)
}
app.get('/convert', (req, res) => {
var file = 'recorded.mp3'
const eventStreamMarshaller = new marshaller.EventStreamMarshaller(util_utf8_node.toUtf8, util_utf8_node.fromUtf8);
let url = createPresignedUrl();
let socket = new WebSocket(url);
socket.binaryType = "arraybuffer";
let output = '';
const readStream = fs.createReadStream(file, { highWaterMark: 32 * 256 })
readStream.setEncoding('binary')
//let sampleRate = 0;
let inputSampleRate = 44100
readStream.on('end', function() {
console.log('finished reading----->>>>');
// write to file here.
// Send an empty frame so that Transcribe initiates a closure of the WebSocket after submitting all transcripts
let emptyMessage = getAudioEventMessage(Buffer.from(new Buffer([])));
let emptyBuffer = eventStreamMarshaller.marshall(emptyMessage);
socket.send(emptyBuffer);
})
// when we get audio data from the mic, send it to the WebSocket if possible
socket.onopen = function() {
readStream.on('data', function(chunk) {
let binary = convertAudioToBinaryMessage(chunk);
if (socket.readyState === socket.OPEN) {
console.log(`sending to steaming API------->>>>`)
socket.send(binary);
}
});
// the audio stream is raw audio bytes. Transcribe expects PCM with additional metadata, encoded as binary
}
// the audio stream is raw audio bytes. Transcribe expects PCM with additional metadata, encoded as binary
socket.onerror = function () {
socketError = true;
showError('WebSocket connection error. Try again.');
};
// handle inbound messages from Amazon Transcribe
socket.onmessage = function (message) {
//convert the binary event stream message to JSON
let messageWrapper = eventStreamMarshaller.unmarshall(Buffer(message.data));
//console.log(`messag -->>${JSON.stringify(messageWrapper)}`)
let messageBody = JSON.parse(String.fromCharCode.apply(String, messageWrapper.body));
console.log("results:.. ",JSON.stringify(messageBody))
if (messageWrapper.headers[":message-type"].value === "event") {
handleEventStreamMessage(messageBody);
}
else {
transcribeException = true;
showError(messageBody.Message);
}
}
let closeSocket = function () {
if (socket.OPEN) {
// Send an empty frame so that Transcribe initiates a closure of the WebSocket after submitting all transcripts
let emptyMessage = getAudioEventMessage(Buffer.from(new Buffer([])));
let emptyBuffer = eventStreamMarshaller.marshall(emptyMessage);
socket.send(emptyBuffer);
}
}
})