Мне удалось закодировать опус с самоограничением, используя AVAudioRecorder
и AVAssetReader
FYI, это, вероятно, не элегантное решение, но другие методы, например, нажатие на InputNode в AudioEngine или использование AudioQueue, сделали не работа для меня. Если кто-то найдет элегантное решение, может предложить редактирование этого ответа
Вот сумма (код ниже):
запись Линейный PCM с использованием AVAudioRecorder
и сохранение записанного аудиофайла, например temp.wav
чтение записанного аудиофайла с использованием AVAssetReader
и извлечение PCM (не загружайте аудиофайл с использованием Data(contentsOf: ...)
, в аудиофайле есть метаданные / заголовок, которые могут вызывать некоторый шум)
разбить данные PCM на x
байт и l oop поверх него (в моем случае x = 640
)
- кодировать
x
байт блока PCM в опус с помощью OpusKit pod - вычислить размер закодированного опуса и добавить в качестве заголовка
код ( Swift 5, iOS 13, Xcode 11.3 )
import UIKit
import MapKit
import MessageKit
import AVFoundation
import OpusKit
import os
class BasicChatViewController: ChatViewController {
override func viewDidLoad() {
super.viewDidLoad()
Logger.logIt(#function)
Logger.logIt("Initilizing opus lib kit")
OpusKit.shared.initialize(sampleRate: Opus.SAMPLE_RATE_DEFAULT,
numberOfChannels: Opus.CHANNEL_COUNT_DEFAULT,
packetSize: Opus.OPUS_ENCODER_BUFFER_SIZE,
encodeBlockSize: Opus.FRAME_SIZE_DEFAULT)
// configure record button here
}
//
// MARK - recording
//
var isRecording = false
var avAudioPlayer: AVAudioPlayer!
var audioRecorder: AVAudioRecorder!
@objc
func onTapRecordButton(sender: UIButton){
Logger.logIt(#function)
toggleRecording()
}
private func toggleRecording(){
Logger.logIt(#function)
Logger.logIt("isRecording: \(isRecording)")
if isRecording {
isRecording = false
stopRecording()
} else {
isRecording = true
checkPermissionAndStartRecording()
}
}
//
// END - recording
//
}
//
// Audio recording related extensions
//
extension BasicChatViewController: AVAudioRecorderDelegate {
private func checkPermissionAndStartRecording() {
Logger.logIt(#function)
AudioUtil.checkRecordingPermission() { isPermissionGranted in
Logger.logIt("isPermissionGranted: \(isPermissionGranted)")
if isPermissionGranted {
self.recordUsingAVAudioRecorder()
} else {
Logger.logIt("don't have permission to record")
}
}
}
private func setupRecorder() {
Logger.logIt(#function)
let tempAudioFileUrl = AudioUtil.TEMP_WAV_FILE
Logger.logIt("tempAudioFileUrl: \(tempAudioFileUrl)")
let linearPcmRecordingSettings = LinearPCMRecording.LINEAR_PCM_RECODING_SETTINGS_DEFAULT
Logger.logIt("RecordingSettings: \(linearPcmRecordingSettings)")
do {
startRecordingSession()
audioRecorder = try AVAudioRecorder(url: tempAudioFileUrl, settings: linearPcmRecordingSettings)
audioRecorder.delegate = self
//audioRecorder.isMeteringEnabled = true
audioRecorder.prepareToRecord()
}
catch {
Logger.logIt("\(error.localizedDescription)")
}
}
private func startRecording() {
Logger.logIt(#function)
if audioRecorder == nil {
setupRecorder()
}
audioRecorder.record()
}
private func stopRecording() {
Logger.logIt(#function)
guard audioRecorder != nil else {
return
}
audioRecorder.stop()
}
private func deleteTempAudioFile(){
Logger.logIt(#function)
guard audioRecorder != nil else {
return
}
if audioRecorder.isRecording {
return
}
// delete temporary audio file
let recordingDeleted = audioRecorder.deleteRecording()
if recordingDeleted {
Logger.logIt("temp (recorded) audio file deleted")
} else {
Logger.logIt("failed to delete temp (recorded) audio file")
}
}
private func startRecordingSession(){
Logger.logIt(#function)
do {
try AVAudioSession.sharedInstance().setCategory(.record, mode: .spokenAudio)
try AVAudioSession.sharedInstance().setActive(true)
} catch {
Logger.logIt("Failed to deactivate recording session")
}
}
private func stopRecordingSession(){
Logger.logIt(#function)
do {
try AVAudioSession.sharedInstance().setActive(false)
} catch {
Logger.logIt("Failed to deactivate recording session")
}
}
private func recordUsingAVAudioRecorder(){
Logger.logIt(#function)
setupRecorder()
startRecording()
}
private func encodeRecordedAudio(){
Logger.logIt(#function)
let pcmData = AudioUtil.extractPcmOnly(from: AudioUtil.TEMP_WAV_FILE)
if pcmData.count > 1 {
Logger.logIt("encoding pcm to self-delimited opus")
let encodedOpusData = AudioUtil.encodeToSelfDelimitedOpus(pcmData: pcmData, splitSize: PCM.SPLIT_CHUNK_SIZE_DEFAULT)
Logger.logIt("encoded opus: \(encodedOpusData)")
Logger.logIt("save encoded opus")
AudioUtil.saveAudio(to: AudioUtil.ENCODED_OPUS_FILE, audioData: encodedOpusData)
} else {
Logger.logIt("no data to encode")
}
deleteTempAudioFile()
stopRecordingSession()
}
func audioRecorderDidFinishRecording(_ recorder: AVAudioRecorder, successfully flag: Bool) {
Logger.logIt(#function)
let finishedSuccessFully = flag
if finishedSuccessFully {
Logger.logIt("finished recording successfully")
encodeRecordedAudio()
} else {
Logger.logIt("recording failed - audio encoding error")
}
}
}
AudioUtil
import Foundation
import AVFoundation
import OpusKit
//
// Opus audio info.
//
public class OpusAudioInfo {
public static let `default` = OpusAudioInfo()
var channels: opus_int32
var headerSize: Int // bytes
var packetSize: opus_int32
var sampleRate: opus_int32 {
didSet {
packetSize = Int32(Opus.FRAME_DURATION_DEFAULT) * (sampleRate / 1000)
}
}
public init(sampleRate: opus_int32 = Opus.SAMPLE_RATE_16_KHZ,
channels: opus_int32 = Opus.CHANNEL_COUNT_DEFAULT,
headerSize: Int = 1) {
self.sampleRate = sampleRate
self.packetSize = Int32(Opus.FRAME_DURATION_DEFAULT) * (sampleRate / 1000)
self.channels = channels
self.headerSize = headerSize
}
}
//
// RAW PCM info.
//
public class PCMInfo {
public static let `default` = PCMInfo()
var sampleRate:Int32
var channels:Int16
var bitDepth:Int16
public init(sampleRate:Int32 = PCM.SAMPLE_RATE_16_KHZ,
channels:Int16 = Int16(PCM.MONO),
bitDepth:Int16 = Int16(PCM.BIT_DEPTH_DEFAULT)) {
self.sampleRate = sampleRate
self.channels = channels
self.bitDepth = bitDepth
}
}
//
// Utility class for audio related operations
//
public class AudioUtil {
private init(){}
//
// Default audio files url in document directory
//
public static let RAW_PCM_FILE = FileUtil.createFileUrl(for: "pcm.raw", in: FileUtil.DOCUMENTS_DIR)
public static let TEMP_WAV_FILE = FileUtil.createFileUrl(for: "wav.wav", in: FileUtil.DOCUMENTS_DIR)
public static let ENCODED_OPUS_FILE = FileUtil.createFileUrl(for: "encoded_opus_ios.opus", in: FileUtil.DOCUMENTS_DIR)
public static let DECODED_WAV_WITH_HEADER_FILE = FileUtil.createFileUrl(for: "decoded_wav_with_header.wav", in: FileUtil.DOCUMENTS_DIR)
/**
Creates fake wav header to play Linear PCM
AVAudioPlayer by default can not play Linear PCM, therefore we need to create a fake wav header
- parameter sampleRate: samples per second
- parameter channelCount: number of channels
- parameter bitDepth: bits per sample
- parameter pcmDataSizeInBytes: PCM data size in bytes
- returns : Data - wav header data
*/
public static func createWavHeader(sampleRate: Int32, channelCount: Int16, bitDepth: Int16, pcmDataSizeInBytes dataSize: Int32) -> Data {
/*
WAV header details: http://www.topherlee.com/software/pcm-tut-wavformat.html
Positions Sample Value Description
1 - 4 "RIFF" Marks the file as a riff file. Characters are each 1 byte long.
5 - 8 File size (integer) Size of the overall file - 8 bytes, in bytes (32-bit integer). Typically, you'd fill this in after creation.
9 -12 "WAVE" File Type Header. For our purposes, it always equals "WAVE".
13-16 "fmt " Format chunk marker. Includes trailing null
17-20 16 Length of format data as listed above
21-22 1 Type of format (1 is PCM) - 2 byte integer
23-24 2 Number of Channels - 2 byte integer
25-28 44100 Sample Rate - 32 byte integer. Common values are 44100 (CD), 48000 (DAT). Sample Rate = Number of Samples per second, or Hertz.
29-32 176400 (Sample Rate * BitsPerSample * Channels) / 8.
33-34 4 (BitsPerSample * Channels) / 8.1 - 8 bit mono2 - 8 bit stereo/16 bit mono4 - 16 bit stereo
35-36 16 Bits per sample
37-40 "data" "data" chunk header. Marks the beginning of the data section.
41-44 File size (data) Size of the data section.
Sample values are given above for a 16-bit stereo source.
An example in swift :
let WAV_HEADER: [Any] = [
"R","I","F","F",
0xFF,0xFF,0xFF,0x7F, // file size
"W","A","V","E",
"f","m","t"," ", // Chunk ID
0x10,0x00,0x00,0x00, // Chunk Size - length of format above
0x01,0x00, // Format Code: 1 is PCM, 3 is IEEE float
0x01,0x00, // Number of Channels (e.g. 2)
0x80,0xBB,0x00,0x00, // Samples per Second, Sample Rate (e.g. 48000)
0x00,0xDC,0x05,0x00, // Bytes per second, byte rate = sample rate * bits per sample * channels / 8
0x08,0x00, // Bytes per Sample Frame, block align = bits per sample * channels / 8
0x20,0x00, // bits per sample (16 for PCM, 32 for float)
"d","a","t","a",
0xFF,0xFF,0xFF,0x7F // size of data section
]
*/
let WAV_HEADER_SIZE:Int32 = 44
let FORMAT_CODE_PCM:Int16 = 1
let fileSize:Int32 = dataSize + WAV_HEADER_SIZE
let sampleRate:Int32 = sampleRate
let subChunkSize:Int32 = 16
let format:Int16 = FORMAT_CODE_PCM
let channels:Int16 = channelCount
let bitsPerSample:Int16 = bitDepth
let byteRate:Int32 = sampleRate * Int32(channels * bitsPerSample / 8)
let blockAlign: Int16 = (bitsPerSample * channels) / 8
let header = NSMutableData()
header.append([UInt8]("RIFF".utf8), length: 4)
header.append(byteArray(from: fileSize), length: 4)
//WAVE
header.append([UInt8]("WAVE".utf8), length: 4)
//FMT
header.append([UInt8]("fmt ".utf8), length: 4)
header.append(byteArray(from: subChunkSize), length: 4)
header.append(byteArray(from: format), length: 2)
header.append(byteArray(from: channels), length: 2)
header.append(byteArray(from: sampleRate), length: 4)
header.append(byteArray(from: byteRate), length: 4)
header.append(byteArray(from: blockAlign), length: 2)
header.append(byteArray(from: bitsPerSample), length: 2)
header.append([UInt8]("data".utf8), length: 4)
header.append(byteArray(from: dataSize), length: 4)
return header as Data
}
/**
Creates default wav header based on default PCM constants
- parameter dataSize: size of PCM data in bytes
- returns : Data - wav header data
*/
public static func createDefaultWavHeader(dataSize: Int32) -> Data {
return createWavHeader(sampleRate: PCM.SAMPLE_RATE_DEFAULT,
channelCount: Int16(PCM.CHANNEL_COUNT_DEFAULT),
bitDepth: Int16(PCM.BIT_DEPTH_DEFAULT),
pcmDataSizeInBytes: dataSize)
}
/**
Converts given value to byte array
- parameter value:FixedWidthInteger type
- returns: array of bytes
*/
public static func byteArray<T>(from value: T) -> [UInt8] where T: FixedWidthInteger {
// https://stackoverflow.com/a/56964191/4802664
// .littleEndian is required
return withUnsafeBytes(of: value.littleEndian) { Array($0) }
}
/**
Generates wav audio data buffer from given header and raw PCM
- parameter wavHeader: a fake RIFF WAV header (appended to PCM)
- parameter pcmData: Linear PCM data
- returns: Data
*/
public static func generateWav(header wavHeader: Data, pcmData: Data) -> Data {
var wavData = Data()
wavData.append(wavHeader)
wavData.append(pcmData)
return wavData
}
/**
Checks permission for recording and invokes callback with flag
- parameter callback: clouser to invoked after checking permission
*/
public static func checkRecordingPermission(onPermissionChecked callback: @escaping(_ isPermissionGranted: Bool) -> Void) {
Logger.logIt(#function)
var isPermissionGranted = false
switch AVAudioSession.sharedInstance().recordPermission {
case .granted:
isPermissionGranted = true
break
case .denied:
isPermissionGranted = false
break
case .undetermined:
AVAudioSession.sharedInstance().requestRecordPermission({ (allowed) in
if allowed {
isPermissionGranted = true
} else {
isPermissionGranted = false
}
})
break
default:
isPermissionGranted = false
break
}
callback(isPermissionGranted)
}
/**
Saves given audio data to specified url
- parameter fileUri: file url where audio data will be saved
*/
public static func saveAudio(to fileUri: URL, audioData: Data) {
Logger.logIt(#function)
Logger.logIt("save to: \(fileUri)")
do {
try audioData.write(to: fileUri)
} catch {
Logger.logIt(error.localizedDescription)
}
}
/**
Encodes given PCM data into self delimited opus (`|header|data|header|data|...|`) using libopus
- parameter pcmData: Linear PCM data buffer (loaded from file or coming from AudioEngine tapping)
- parameter splitSize: size of chunk to split the given pcmData
- returns : encoded data (encoded as: `|header|data|header|data|...|`)
*/
public static func encodeToSelfDelimitedOpus(pcmData: Data, splitSize: Int) -> Data {
Logger.logIt(#function)
var encodedData = Data()
var readIndex = 0
var readStart = 0
var readEnd = 0
var pcmChunk: Data
var readCount = 1
let splitCount = (pcmData.count / splitSize)
Logger.logIt("split count: \(splitCount)")
var header: Data
while readCount <= splitCount {
readStart = readIndex
readEnd = readStart + splitSize
//
// to prevent index out of bound exception
// check readEnd index
//
if(readEnd >= pcmData.count){
readEnd = readStart + (pcmData.count - readIndex)
}
pcmChunk = pcmData[readStart..<readEnd]
//print("chunk: \(pcmChunk)")
if let encodedChunk = OpusKit.shared.encodeData(pcmChunk) {
//
// header is exactly one byte
// header indicates size of the encoded opus data
//
header = Data(from: encodedChunk.count)[0..<1]
//Logger.logIt("header: \([UInt8](header))")
encodedData.append(header)
encodedData.append(encodedChunk)
} else {
print("failed to encode at index: \(readStart)")
}
readIndex += splitSize
readCount += 1
}
//
// remaining data
//
//Logger.logIt("append remaining data")
pcmChunk = pcmData[readIndex..<pcmData.count]
if let encodedChunk = OpusKit.shared.encodeData(pcmChunk) {
header = Data(from: encodedChunk.count)[0..<1]
//Logger.logIt("header: \([UInt8](header))")
encodedData.append(header)
encodedData.append(encodedChunk)
} else {
print("failed to encode at index: \(readIndex)")
}
return encodedData
}
/**
Decodes given self delimited opus data to PCM
Custom opus is encoded as `|header|data|header|data|...|`
Loops over the data, reads data size from header and takes slice/chunk of given opus data based on data size from header. Then each chunk is decode using libopus
- parameter opusData: Encoded opus data buffer
- parameter headerSizeInBytes: size of header in bytes (default is 1)
- returns : decoded pcm data
*/
public static func decodeSelfDelimitedOpusToPcm(opusData: Data, headerSizeInBytes headerSize: Int = 1) -> Data {
var decodedData: Data = Data()
var headerData: Data
var opusChunkSizeFromHeader = 0
var readIndex = 0
var readStart = 0
var readEnd = 0
var extractedOpusChunk: Data
while readIndex < opusData.count {
headerData = opusData[readIndex..<(readIndex + headerSize)]
//Logger.logIt("headerData: \([UInt8](headerData))")
opusChunkSizeFromHeader = Int([UInt8](headerData)[0])
readStart = readIndex + headerSize
readEnd = readStart + opusChunkSizeFromHeader
extractedOpusChunk = opusData[readStart..<readEnd]
//Logger.logIt("extracted: \(extractedOpusChunk)")
if let decodedDataChunk = OpusKit.shared.decodeData(extractedOpusChunk) {
//Logger.logIt("decodedDataChunk: \(decodedDataChunk)")
decodedData.append(decodedDataChunk)
} else {
print("failed to decode at index: \(readStart)")
}
readIndex += (headerSize + opusChunkSizeFromHeader)
}
return decodedData
}
/**
Extracts PCM only from a audio file using AVAssetReader
Normally system will append some meta data while saving audio file with extension, and therefore we need to use AVAssetReader to get PCM only
- parameter fileUrl : audio file url
- returns: PCM Data
*/
public static func extractPcmOnly(from fileUrl: URL) -> Data {
let pcmOnly = NSMutableData()
do {
let asset = AVAsset(url: fileUrl)
let assetReader = try AVAssetReader(asset: asset)
let track = asset.tracks(withMediaType: AVMediaType.audio).first
let outputSettings = LinearPCMRecording.LINEAR_PCM_RECODING_SETTINGS_DEFAULT
let trackOutput = AVAssetReaderTrackOutput(track: track!, outputSettings: outputSettings)
assetReader.add(trackOutput)
assetReader.startReading()
Logger.logIt("reading data with AVAssetReader")
while assetReader.status == AVAssetReader.Status.reading {
if let sampleBufferRef = trackOutput.copyNextSampleBuffer() {
if let blockBufferRef = CMSampleBufferGetDataBuffer(sampleBufferRef) {
let bufferLength = CMBlockBufferGetDataLength(blockBufferRef)
let data = NSMutableData(length: bufferLength)
// func CMBlockBufferCopyDataBytes(_ theSourceBuffer: CMBlockBuffer, atOffset offsetToData: Int, dataLength: Int, destination: UnsafeMutableRawPointer) -> OSStatus
CMBlockBufferCopyDataBytes(blockBufferRef, atOffset: 0, dataLength: bufferLength, destination: data!.mutableBytes)
let samples = data!.mutableBytes.assumingMemoryBound(to: UInt16.self)
pcmOnly.append(samples, length: bufferLength)
CMSampleBufferInvalidate(sampleBufferRef)
}
} else {
Logger.logIt("failed to copy next")
}
}
} catch {
Logger.logIt(error.localizedDescription)
}
return pcmOnly as Data
}
}
Дополнительные классы
import Foundation
import AVFoundation
import OpusKit
public class Audio {
public static let SAMPLE_RATE_16_KHZ: opus_int32 = 16_000
public static let SAMPLE_RATE_8_KHZ: opus_int32 = 8_000
public static let SAMPLE_RATE_DEFAULT = SAMPLE_RATE_16_KHZ
public static let MONO:Int32 = 1
public static let CHANNEL_COUNT_DEFAULT:Int32 = MONO
public static let BIT_DEPTH_DEFAULT:Int32 = 16
public static let FRAME_DURATION_DEFAULT = 20 // milliseconds
// FRAME_SIZE = FRAME (duration in millisecond) * SAMPLE_RATE
public static let FRAME_SIZE_DEFAULT:Int32 = (SAMPLE_RATE_DEFAULT / 1000) * Int32(FRAME_DURATION_DEFAULT)
}
public class PCM: Audio {
public static let SPLIT_CHUNK_SIZE_DEFAULT:Int = Int(FRAME_SIZE_DEFAULT * (BIT_DEPTH_DEFAULT / 8))
}
public class WAV: Audio {
public static let HEADER_SIZE:Int32 = 44 // always 44 bytes
public static let WAV_HEADER_FORMAT_PCM:Int16 = 1
public static let WAV_HEADER_SUB_CHUNK_SIZE:Int32 = 16 // always 16
}
public class Opus: Audio {
public static let ENCODED_OUTPUT_MEMORY_SIZE_LIMIT:Int32 = 255 // Size of the allocated memory for the output payload
public static let OPUS_ENCODER_BUFFER_SIZE:Int32 = 1275 // ref: https://stackoverflow.com/a/55707654/4802664
}
public class PCMRecordingSetting {
private static let SAMPLE_RATE_16_KHZ = 16_000
private static let BIT_DEPTH_16 = 16
private static let CHANNEL_MONO = 1
public var sampleRate:Int = SAMPLE_RATE_16_KHZ {
willSet {
updateBitRate()
updateLinearPCMRecordingSettings()
}
}
public var channelCount:Int = CHANNEL_MONO {
willSet {
updateBitRate()
updateLinearPCMRecordingSettings()
}
}
public var bitDepth:Int = BIT_DEPTH_16 {
willSet {
updateBitRate()
updateLinearPCMRecordingSettings()
}
}
public private(set) var bitRate = SAMPLE_RATE_16_KHZ * BIT_DEPTH_16 * CHANNEL_MONO
private func updateBitRate(){
bitRate = sampleRate * bitDepth * channelCount
}
public static let LINEAR_PCM_DEFAULT = [
AVFormatIDKey: kAudioFormatLinearPCM,
AVSampleRateKey: SAMPLE_RATE_16_KHZ,
AVNumberOfChannelsKey: CHANNEL_MONO,
AVLinearPCMBitDepthKey: BIT_DEPTH_16,
AVLinearPCMIsFloatKey: false
] as [String : Any]
public var recordingSettings = LINEAR_PCM_DEFAULT
private func updateLinearPCMRecordingSettings(){
Logger.debug(#function)
recordingSettings = [
AVFormatIDKey: kAudioFormatLinearPCM,
AVSampleRateKey: sampleRate,
AVNumberOfChannelsKey: channelCount,
AVLinearPCMBitDepthKey: bitDepth,
AVLinearPCMIsFloatKey: false
] as [String : Any]
}
public init(sampleRate: Int, channelCount: Int, bitDepth: Int){
self.sampleRate = sampleRate
self.channelCount = channelCount
self.bitDepth = bitDepth
updateBitRate()
updateLinearPCMRecordingSettings()
}
public static let `default` = PCMRecordingSetting(sampleRate: SAMPLE_RATE_16_KHZ, channelCount: CHANNEL_MONO, bitDepth: BIT_DEPTH_16)
}