Как описано в статье здесь , признать_once_asyn c () (метод, который вы используете) - этот метод будет обнаруживать только распознанное высказывание из ввода, начиная с начала обнаруженной речи, пока следующая пауза.
Насколько я понимаю, ваше требование будет выполнено, если вы воспользуетесь функцией start_continuous_recognition () . Функция start запустится и продолжит обработку всех высказываний, пока вы не вызовете функция остановки.
Этот метод имеет много связанных с ним событий, «распознанное» событие срабатывает, когда происходит процесс распознавания речи. У вас должен быть обработчик событий для обработки распознавания и извлечения текста. Вы можете обратиться к статье здесь для получения дополнительной информации.
Совместное использование образца фрагмента, который использует start_continuous_recognition () для преобразования звука в текст.
import azure.cognitiveservices.speech as speechsdk
import time
import datetime
# Creates an instance of a speech config with specified subscription key and service region.
# Replace with your own subscription key and region identifier from here: https://aka.ms/speech/sdkregion
speech_key, service_region = "YOURSUBSCRIPTIONKEY", "YOURREGION"
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
# Creates an audio configuration that points to an audio file.
# Replace with your own audio filename.
audio_filename = "sample.wav"
audio_input = speechsdk.audio.AudioConfig(filename=audio_filename)
# Creates a recognizer with the given settings
speech_config.speech_recognition_language="en-US"
speech_config.request_word_level_timestamps()
speech_config.enable_dictation()
speech_config.output_format = speechsdk.OutputFormat(1)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input)
#result = speech_recognizer.recognize_once()
all_results = []
#https://docs.microsoft.com/en-us/python/api/azure-cognitiveservices-speech/azure.cognitiveservices.speech.recognitionresult?view=azure-python
def handle_final_result(evt):
all_results.append(evt.result.text)
done = False
def stop_cb(evt):
print('CLOSING on {}'.format(evt))
speech_recognizer.stop_continuous_recognition()
global done
done= True
#Appends the recognized text to the all_results variable.
speech_recognizer.recognized.connect(handle_final_result)
#Connect callbacks to the events fired by the speech recognizer & displays the info/status
#Ref:https://docs.microsoft.com/en-us/python/api/azure-cognitiveservices-speech/azure.cognitiveservices.speech.eventsignal?view=azure-python
speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(evt)))
speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
# stop continuous recognition on either session stopped or canceled events
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
print("Printing all results:")
print(all_results)
Пример вывода: введите описание изображения здесь
Вызов того же самого через функцию
Инкапсулировано в функции и попыталось вызвать ее.
Просто немного подправили и инкапсулировали в функцию. убедился, что переменная "done" доступна нелокально. Пожалуйста, проверьте и дайте мне знать
import azure.cognitiveservices.speech as speechsdk
import time
import datetime
def speech_to_text():
# Creates an instance of a speech config with specified subscription key and service region.
# Replace with your own subscription key and region identifier from here: https://aka.ms/speech/sdkregion
speech_key, service_region = "<>", "<>"
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
# Creates an audio configuration that points to an audio file.
# Replace with your own audio filename.
audio_filename = "whatstheweatherlike.wav"
audio_input = speechsdk.audio.AudioConfig(filename=audio_filename)
# Creates a recognizer with the given settings
speech_config.speech_recognition_language="en-US"
speech_config.request_word_level_timestamps()
speech_config.enable_dictation()
speech_config.output_format = speechsdk.OutputFormat(1)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input)
#result = speech_recognizer.recognize_once()
all_results = []
#https://docs.microsoft.com/en-us/python/api/azure-cognitiveservices-speech/azure.cognitiveservices.speech.recognitionresult?view=azure-python
def handle_final_result(evt):
all_results.append(evt.result.text)
done = False
def stop_cb(evt):
print('CLOSING on {}'.format(evt))
speech_recognizer.stop_continuous_recognition()
nonlocal done
done= True
#Appends the recognized text to the all_results variable.
speech_recognizer.recognized.connect(handle_final_result)
#Connect callbacks to the events fired by the speech recognizer & displays the info/status
#Ref:https://docs.microsoft.com/en-us/python/api/azure-cognitiveservices-speech/azure.cognitiveservices.speech.eventsignal?view=azure-python
speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(evt)))
speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
# stop continuous recognition on either session stopped or canceled events
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
print("Printing all results:")
print(all_results)
#calling the conversion through a function
speech_to_text()