SFSpeechAudioBufferRecognitionRequest с аудио AVPlayer - PullRequest
0 голосов
/ 20 марта 2019

Возможно ли использовать SFSpeechRecognizer с AVPlayer аудио?Я пытаюсь расшифровать звук во время воспроизведения, но не работает.

Я использую MTAudioProcessing, чтобы получить CMSampleBuffer, а затем могу добавить к SFSpeechAudioBufferRecognitionRequest,

но всегда с одной и той же ошибкой:

Ошибка домена = kAFAssistantErrorDomain Code = 203 «Тайм-аут» и звук остановлен.

Может быть, кто-то может мне помочь!Вот мой код

Спасибо!

init() {
   self.recognitionRequest.shouldReportPartialResults = true
   SFSpeechRecognizer()?.recognitionTask(with: self.recognitionRequest, resultHandler: { (result, error) in
         if let error = error {
             NSLog("Error \(error)")
          } else {
              NSLog("Transcript \(result?.bestTranscription.formattedString)")
              if (result?.isFinal == true) {}
          }
      })
}

func play() {
   self.avPlayer.play()
   self.installTap(playerItem: self.avPlayer.currentItem!)
}

func installTap(playerItem: AVPlayerItem) {
    if (playerItem.asset.tracks(withMediaType: AVMediaType.audio).count > 0) {
            var callbacks = MTAudioProcessingTapCallbacks(
                version: kMTAudioProcessingTapCallbacksVersion_0,
                clientInfo:nil,
                init: nil,
                finalize: nil,
                prepare: tapPrepare,
                unprepare: nil,
                process: tapProcess)

            var tap: Unmanaged<MTAudioProcessingTap>?
            let err = MTAudioProcessingTapCreate(kCFAllocatorDefault, &callbacks, kMTAudioProcessingTapCreationFlag_PostEffects, &tap)
            assert(noErr == err);

            let audioTrack = playerItem.asset.tracks(withMediaType: AVMediaType.audio).first!
            let inputParams = AVMutableAudioMixInputParameters(track: audioTrack)
            inputParams.audioTapProcessor = tap?.takeRetainedValue()

            let audioMix = AVMutableAudioMix()
            audioMix.inputParameters = [inputParams]

            playerItem.audioMix = audioMix
        }
    }


let tapPrepare: MTAudioProcessingTapPrepareCallback = {
        (tap, itemCount, basicDescription) in
      self.audioFormat = AudioStreamBasicDescription(mSampleRate: basicDescription.pointee.mSampleRate,
                                                                 mFormatID: basicDescription.pointee.mFormatID, mFormatFlags: basicDescription.pointee.mFormatFlags, mBytesPerPacket: basicDescription.pointee.mBytesPerPacket, mFramesPerPacket: basicDescription.pointee.mFramesPerPacket, mBytesPerFrame: basicDescription.pointee.mBytesPerFrame, mChannelsPerFrame: basicDescription.pointee.mChannelsPerFrame, mBitsPerChannel: basicDescription.pointee.mBitsPerChannel, mReserved: basicDescription.pointee.mReserved)
}

let tapProcess: MTAudioProcessingTapProcessCallback = {
        (tap, numberFrames, flags, bufferListInOut, numberFramesOut, flagsOut) in

        var sbuf : CMSampleBuffer?
        var status : OSStatus?
        var format: CMFormatDescription?

        var formatId =  UInt32(kAudioFormatLinearPCM)
        var formatFlags = UInt32( kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked )

        guard var audioFormat = self.audioFormat else {
            return
        }
        status = CMAudioFormatDescriptionCreate(allocator: kCFAllocatorDefault, asbd: &audioFormat, layoutSize: 0, layout: nil, magicCookieSize: 0, magicCookie: nil, extensions: nil, formatDescriptionOut: &format)
        if status != noErr {
            print("Error CMAudioFormatDescriptionCreater :\(String(describing: status?.description))")
            return
        }

        var timing = CMSampleTimingInfo(duration: CMTimeMake(value: 1, timescale: Int32(audioFormat.mSampleRate)), presentationTimeStamp: self.playerItem!.currentTime(), decodeTimeStamp: CMTime.invalid)

        let buffer = CMSampleBufferCreate(allocator: kCFAllocatorDefault,
                                          dataBuffer: nil,
                                          dataReady: Bool(truncating: 0),
                                          makeDataReadyCallback: nil,
                                          refcon: nil,
                                          formatDescription: format,
                                          sampleCount: CMItemCount(numberFrames),
                                          sampleTimingEntryCount: 1,
                                          sampleTimingArray: &timing,
                                          sampleSizeEntryCount: 0, sampleSizeArray: nil,
                                          sampleBufferOut: &sbuf);
        CMSampleBufferSetDataBufferFromAudioBufferList(sbuf!, blockBufferAllocator: kCFAllocatorDefault, blockBufferMemoryAllocator: kCFAllocatorDefault, flags: 0, bufferList: bufferListInOut)
        self.recognitionRequest.appendAudioSampleBuffer(sbuf!)
}
...