Я пытался расшифровать (.ogg) файл, используя SpeechSDK когнитивных служб из Azure. Но я не могу заставить это работать. Ниже приведен мой код:
import azure.cognitiveservices.speech as speechsdk
import time
speech_key, service_region = "my-subscription", "eastus"
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
speech_config.speech_recognition_language="es-ES"
# audio_filename = "AudioTest.wav"
audio_filename = "AudioFile.ogg"
def speech_recognize_continuous_from_file():
"""performs continuous speech recognition with input from an audio file"""
# <SpeechContinuousRecognitionWithFile>
audio_config = speechsdk.audio.AudioConfig(filename=audio_filename)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
done = False
def stop_cb(evt):
"""callback that signals to stop continuous recognition upon receiving an event `evt`"""
print('CLOSING on {}'.format(evt))
nonlocal done
done = True
# Connect callbacks to the events fired by the speech recognizer
# speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(evt)))
# speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
# speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
# speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
# stop continuous recognition on either session stopped or canceled events
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
# Start continuous speech recognition
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
speech_recognizer.stop_continuous_recognition()
# </SpeechContinuousRecognitionWithFile>
if __name__ == "__main__":
speech_recognize_continuous_from_file()
Проблема в том, что, когда я пробую его с файлом (.wav), он работает отлично, но когда я пробую его с файлом .ogg, я получаю следующую запись об ошибке
(796): 24ms SPX_THROW_HR_IF: (0x00a) = 0xa
(41): 85ms SPX_RETURN_ON_FAIL: hr = 0x47a4dbe0
SPX_RETURN_ON_FAIL: hr = recognizer_start_continuous_recognition_async_wait_for(m_hasyncStartContinuous, 0xffffffffui32) = 0x47a4dbe0
SPX_THROW_ON_FAIL: hr = 0x47a4dbe0
Traceback (most recent call last):
File "c:\Users\jramirezs\Documents\VisualStudioCode\Testing5.py", line 44, in <module>
speech_recognize_continuous_from_file()
File "c:\Users\jramirezs\Documents\VisualStudioCode\Testing5.py", line 36, in speech_recognize_continuous_from_file
speech_recognizer.start_continuous_recognition()
File "C:\Python64bit\lib\site-packages\azure\cognitiveservices\speech\speech.py", line 404, in start_continuous_recognition
return self._impl.start_continuous_recognition()
File "C:\Python64bit\lib\site-packages\azure\cognitiveservices\speech\speech_py_impl.py", line 3679, in start_continuous_recognition
return _speech_py_impl.SpeechRecognizer_start_continuous_recognition(self)
RuntimeError: Exception with an error code: 0xa (SPXERR_INVALID_HEADER)
[CALL STACK BEGIN]
> CreateModuleObject
- CreateModuleObject
- CreateModuleObject
- CreateModuleObject
- CreateModuleObject
- 00007FFFF2B50BAF (SymFromAddr() error: Se ha intentado tener acceso a una direcci�n no v�lida.)
- CreateModuleObject
- CreateModuleObject
- CreateModuleObject
- CreateModuleObject
- o_exp
- BaseThreadInitThunk
- RtlUserThreadStart
[CALL STACK END]
Любая помощь будет признателен, большое спасибо