使用 AVAssetWriter 录制无缝音频



最终的目标是也有视频,但我发现音频本身在与ffmpeg -f concat -i list.txt -c copy out.mp4

如果我将音频放入 HLS 播放列表中,也会有间隙,所以我认为这不是 ffmpeg 所独有的。


import Foundation
import UIKit
import AVFoundation

class StreamController: UIViewController, AVCaptureAudioDataOutputSampleBufferDelegate, AVCaptureVideoDataOutputSampleBufferDelegate {
    var closingAudioInput: AVAssetWriterInput?
    var closingAssetWriter: AVAssetWriter?

    var currentAudioInput: AVAssetWriterInput?
    var currentAssetWriter: AVAssetWriter?

    var nextAudioInput: AVAssetWriterInput?
    var nextAssetWriter: AVAssetWriter?

    var videoHelper: VideoHelper?

    var startTime: NSTimeInterval = 0
    let closeAssetQueue: dispatch_queue_t = dispatch_queue_create("closeAssetQueue", nil);

    override func viewDidLoad() {
        startTime = NSDate().timeIntervalSince1970
        videoHelper = VideoHelper()
        videoHelper!.delegate = self
        NSTimer.scheduledTimerWithTimeInterval(1, target: self, selector: "createSegmentWriter", userInfo: nil, repeats: true)

    func createSegmentWriter() {
        print("Creating segment writer at t=\(NSDate().timeIntervalSince1970 - self.startTime)")
        let outputPath = OutputFileNameHelper.instance.pathForOutput()
        try? NSFileManager.defaultManager().removeItemAtPath(outputPath)
        nextAssetWriter = try! AVAssetWriter(URL: NSURL(fileURLWithPath: outputPath), fileType: AVFileTypeMPEG4)
        nextAssetWriter!.shouldOptimizeForNetworkUse = true

        let audioSettings: [String:AnyObject] = EncodingSettings.AUDIO
        nextAudioInput = AVAssetWriterInput(mediaType: AVMediaTypeAudio, outputSettings: audioSettings)
        nextAudioInput!.expectsMediaDataInRealTime = true


    func closeWriterIfNecessary() {
        if closing && audioFinished {
            closing = false
            audioFinished = false
            let outputFile = closingAssetWriter?.outputURL.pathComponents?.last
            closingAssetWriter?.finishWritingWithCompletionHandler() {
                let delta = NSDate().timeIntervalSince1970 - self.startTime
                print("segment \(outputFile!) finished at t=\(delta)")
            self.closingAudioInput = nil
            self.closingAssetWriter = nil

    var audioFinished = false
    var closing = false

    func captureOutput(captureOutput: AVCaptureOutput!, didOutputSampleBuffer sampleBuffer: CMSampleBufferRef, fromConnection connection: AVCaptureConnection!) {
        if let nextWriter = nextAssetWriter {
            if nextWriter.status.rawValue != 0 {
                if (currentAssetWriter != nil) {
                    closing = true

                var sampleTiming: CMSampleTimingInfo = kCMTimingInfoInvalid
                CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &sampleTiming)

                print("Switching asset writers at t=\(NSDate().timeIntervalSince1970 - self.startTime)")
                closingAssetWriter = currentAssetWriter
                closingAudioInput = currentAudioInput

                currentAssetWriter = nextAssetWriter
                currentAudioInput = nextAudioInput

                nextAssetWriter = nil
                nextAudioInput = nil


        if let _ = captureOutput as? AVCaptureVideoDataOutput {
        } else if let _ = captureOutput as? AVCaptureAudioDataOutput {

        dispatch_async(closeAssetQueue) {

    func printTimingInfo(sampleBuffer: CMSampleBufferRef, prefix: String) {
        var sampleTiming: CMSampleTimingInfo = kCMTimingInfoInvalid
        CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &sampleTiming)
        let presentationTime = Double(sampleTiming.presentationTimeStamp.value) / Double(sampleTiming.presentationTimeStamp.timescale)

    func captureAudioSample(sampleBuffer: CMSampleBufferRef) {
        printTimingInfo(sampleBuffer, prefix: "A")
        if (closing && !audioFinished) {
            if closingAudioInput?.readyForMoreMediaData == true {
            audioFinished = true
        } else {
            if currentAudioInput?.readyForMoreMediaData == true {

对于像 AAC 这样的数据包格式,您在开始处有静默启动帧(也称为编码器延迟),在结尾处有剩余帧(当您的音频长度不是数据包大小的倍数时)。在你的例子中,每个文件的开头有 2112 个。启动帧和剩余帧破坏了在不转码的情况下连接文件的可能性,因此您不能真正责怪ffmpeg -c copy因为不产生无缝输出。

我不确定这会给您带来视频的什么结果 - 显然,即使存在启动帧,音频也会与视频同步。


CMGetAttachment(buffer, kCMSampleBufferAttachmentKey_TrimDurationAtStart, NULL) 
CMGetAttachment(audioBuffer, kCMSampleBufferAttachmentKey_TrimDurationAtEnd, NULL) 

作为短期解决方案,您可以切换到非“打包”以获得无缝、可连接(使用 ffmpeg)文件。


AVFormatIDKey: kAudioFormatAppleIMA4, fileType: AVFileTypeAIFC、后缀“.aifc”或AVFormatIDKey: kAudioFormatLinearPCM, fileType: AVFileTypeWAVE,后缀“.wav”

附注您可以使用无处不在的命令查看启动和剩余帧以及数据包大小afinfo tool.

afinfo chunk.mp4

数据格式:2通道,44100 Hz,'aac'(0x00000000)0位/通道,0字节/包,1024帧/包,0字节/帧
音频 39596 个有效帧 + 2112 个启动帧 + 276 个剩余帧 = 41984


