Возможно ли использовать SFSpeechRecognizer
с AVPlayer
аудио?Я пытаюсь расшифровать звук во время воспроизведения, но не работает.
Я использую MTAudioProcessing
, чтобы получить CMSampleBuffer
, а затем могу добавить к SFSpeechAudioBufferRecognitionRequest
,
но всегда с одной и той же ошибкой:
Ошибка домена = kAFAssistantErrorDomain Code = 203 «Тайм-аут» и звук остановлен.
Может быть, кто-то может мне помочь!Вот мой код
Спасибо!
init() {
self.recognitionRequest.shouldReportPartialResults = true
SFSpeechRecognizer()?.recognitionTask(with: self.recognitionRequest, resultHandler: { (result, error) in
if let error = error {
NSLog("Error \(error)")
} else {
NSLog("Transcript \(result?.bestTranscription.formattedString)")
if (result?.isFinal == true) {}
}
})
}
func play() {
self.avPlayer.play()
self.installTap(playerItem: self.avPlayer.currentItem!)
}
func installTap(playerItem: AVPlayerItem) {
if (playerItem.asset.tracks(withMediaType: AVMediaType.audio).count > 0) {
var callbacks = MTAudioProcessingTapCallbacks(
version: kMTAudioProcessingTapCallbacksVersion_0,
clientInfo:nil,
init: nil,
finalize: nil,
prepare: tapPrepare,
unprepare: nil,
process: tapProcess)
var tap: Unmanaged<MTAudioProcessingTap>?
let err = MTAudioProcessingTapCreate(kCFAllocatorDefault, &callbacks, kMTAudioProcessingTapCreationFlag_PostEffects, &tap)
assert(noErr == err);
let audioTrack = playerItem.asset.tracks(withMediaType: AVMediaType.audio).first!
let inputParams = AVMutableAudioMixInputParameters(track: audioTrack)
inputParams.audioTapProcessor = tap?.takeRetainedValue()
let audioMix = AVMutableAudioMix()
audioMix.inputParameters = [inputParams]
playerItem.audioMix = audioMix
}
}
let tapPrepare: MTAudioProcessingTapPrepareCallback = {
(tap, itemCount, basicDescription) in
self.audioFormat = AudioStreamBasicDescription(mSampleRate: basicDescription.pointee.mSampleRate,
mFormatID: basicDescription.pointee.mFormatID, mFormatFlags: basicDescription.pointee.mFormatFlags, mBytesPerPacket: basicDescription.pointee.mBytesPerPacket, mFramesPerPacket: basicDescription.pointee.mFramesPerPacket, mBytesPerFrame: basicDescription.pointee.mBytesPerFrame, mChannelsPerFrame: basicDescription.pointee.mChannelsPerFrame, mBitsPerChannel: basicDescription.pointee.mBitsPerChannel, mReserved: basicDescription.pointee.mReserved)
}
let tapProcess: MTAudioProcessingTapProcessCallback = {
(tap, numberFrames, flags, bufferListInOut, numberFramesOut, flagsOut) in
var sbuf : CMSampleBuffer?
var status : OSStatus?
var format: CMFormatDescription?
var formatId = UInt32(kAudioFormatLinearPCM)
var formatFlags = UInt32( kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked )
guard var audioFormat = self.audioFormat else {
return
}
status = CMAudioFormatDescriptionCreate(allocator: kCFAllocatorDefault, asbd: &audioFormat, layoutSize: 0, layout: nil, magicCookieSize: 0, magicCookie: nil, extensions: nil, formatDescriptionOut: &format)
if status != noErr {
print("Error CMAudioFormatDescriptionCreater :\(String(describing: status?.description))")
return
}
var timing = CMSampleTimingInfo(duration: CMTimeMake(value: 1, timescale: Int32(audioFormat.mSampleRate)), presentationTimeStamp: self.playerItem!.currentTime(), decodeTimeStamp: CMTime.invalid)
let buffer = CMSampleBufferCreate(allocator: kCFAllocatorDefault,
dataBuffer: nil,
dataReady: Bool(truncating: 0),
makeDataReadyCallback: nil,
refcon: nil,
formatDescription: format,
sampleCount: CMItemCount(numberFrames),
sampleTimingEntryCount: 1,
sampleTimingArray: &timing,
sampleSizeEntryCount: 0, sampleSizeArray: nil,
sampleBufferOut: &sbuf);
CMSampleBufferSetDataBufferFromAudioBufferList(sbuf!, blockBufferAllocator: kCFAllocatorDefault, blockBufferMemoryAllocator: kCFAllocatorDefault, flags: 0, bufferList: bufferListInOut)
self.recognitionRequest.appendAudioSampleBuffer(sbuf!)
}