iOS: проблема ориентации при записи видео с забавной маской - PullRequest
0 голосов
/ 07 января 2019

Я работаю над приложением, которое имеет функцию маскировки лиц и записи видео с масками. Мы используем AVAssetWriter для записи видео, поскольку мы сэмплируем каждый буфер и добавляем к нему маску перед записью каждого буфера сэмплов.

Мы используем Google Mobile Vision SDK для распознавания лиц.

Здесь проблема заключается в том, что когда видео записывается в книжной ориентации, видео всегда поворачивается в альбомной ориентации, а маска остается в соответствии с книжной ориентацией.

Вот мой код.

Я пытался установить различные ориентации видео, но ни одна из них не работает.

-(void)startRecordingWithAssetWriter:(NSURL *)url{
/* to prepare for output; I'll output 640x480 in H.264, via an asset writer */
NSDictionary *outputSettings =
[NSDictionary dictionaryWithObjectsAndKeys:

 [NSNumber numberWithInt:640], AVVideoWidthKey,
 [NSNumber numberWithInt:480], AVVideoHeightKey,
 AVVideoCodecH264, AVVideoCodecKey,

 nil];

_assetWriterInput = [AVAssetWriterInput
                                        assetWriterInputWithMediaType:AVMediaTypeVideo
                                        outputSettings:outputSettings];
/* I'm going to push pixel buffers to it, so will need a
 AVAssetWriterPixelBufferAdaptor, to expect the same 32BGRA input as I've
 asked the AVCaptureVideDataOutput to supply */
_pixelBufferAdaptor =
[[AVAssetWriterInputPixelBufferAdaptor alloc]
 initWithAssetWriterInput:_assetWriterInput
 sourcePixelBufferAttributes:
 [NSDictionary dictionaryWithObjectsAndKeys:
  [NSNumber numberWithInt:kCVPixelFormatType_32BGRA],
  kCVPixelBufferPixelFormatTypeKey,
  nil]];

/* that's going to   go somewhere, I imagine you've got the URL for that sorted,
 so create a suitable asset writer; we'll put our H.264 within the normal
 MPEG4 container */
_assetWriter = [[AVAssetWriter alloc]
                              initWithURL:url
                              fileType:AVFileTypeMPEG4
                              error:nil];
[_assetWriter addInput:_assetWriterInput];
/* we need to warn the input to expect real time data incoming, so that it tries
 to avoid being unavailable at inopportune moments */
_assetWriterInput.expectsMediaDataInRealTime = YES;


[_assetWriter startWriting];
[_assetWriter startSessionAtSourceTime:kCMTimeZero];
[_session startRunning];
}

-(void)stopAssetWriterRecording{
[_session stopRunning];
[_assetWriter finishWriting];
}

- (void)captureOutput:(AVCaptureOutput *)captureOutput
didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer
   fromConnection:(AVCaptureConnection *)connection {


NSLog(@"Video Orientaion %ld",(long)connection.videoOrientation);
if(!self.faceRecognitionIsOn){
    dispatch_sync(dispatch_get_main_queue(), ^{
        [[self.preview subviews] makeObjectsPerformSelector:@selector(removeFromSuperview)];
    });

    [self.session addOutput:_movieFileOutput];
    return;
}

CGFloat cpuLoadPercentage = cpu_usage();

NSArray *devices = [AVCaptureDevice devices];
NSError *error;

if(cpuLoadPercentage > 100 && fpsCount > 8){
    NSLog(@"FPS COUNT: %i, %f", fpsCount, cpuLoadPercentage);
    fpsCount--;
}

for (AVCaptureDevice *device in devices){
    if ([device hasMediaType:AVMediaTypeVideo]) {
        if([device lockForConfiguration:&error]) {
            [device setActiveVideoMaxFrameDuration:CMTimeMake(1, fpsCount)];
            [device setActiveVideoMinFrameDuration:CMTimeMake(1, fpsCount)];
            [device unlockForConfiguration];
        }
    }
}



UIImage *image = [GMVUtility sampleBufferTo32RGBA:sampleBuffer];
AVCaptureDevicePosition devicePosition = self.position ? AVCaptureDevicePositionFront : AVCaptureDevicePositionBack;

// Establish the image orientation.
UIDeviceOrientation deviceOrientation = [[UIDevice currentDevice] orientation];

if(deviceOrientation == UIDeviceOrientationUnknown)
{
    deviceOrientation = UIDeviceOrientationPortrait;
}

GMVImageOrientation orientation = [GMVUtility
                                   imageOrientationFromOrientation:deviceOrientation
                                   withCaptureDevicePosition:devicePosition
                                   defaultDeviceOrientation:UIDeviceOrientationPortrait];
NSDictionary *options = @{
                          GMVDetectorImageOrientation : @(orientation)
                          };
// Detect features using GMVDetector.
NSArray<GMVFaceFeature *> *faces = [self.faceDetector featuresInImage:image options:options];
// The video frames captured by the camera are a different size than the video preview.
// Calculates the scale factors and offset to properly display the features.
CMFormatDescriptionRef fdesc = CMSampleBufferGetFormatDescription(sampleBuffer);
CGRect clap = CMVideoFormatDescriptionGetCleanAperture(fdesc, false);
CGSize parentFrameSize = self.captureVideoPreviewLayer.frame.size;

// Assume AVLayerVideoGravityResizeAspect
CGFloat cameraRatio = clap.size.height / clap.size.width;
CGFloat viewRatio = parentFrameSize.width / parentFrameSize.height;
CGFloat xScale = 1;
CGFloat yScale = 1;
CGRect videoBox = CGRectZero;

videoBox.size.width = parentFrameSize.width;
videoBox.size.height = clap.size.width * (parentFrameSize.width / clap.size.height);
videoBox.origin.x = (videoBox.size.width - parentFrameSize.width) / 2;
videoBox.origin.y = (parentFrameSize.height - videoBox.size.height) / 2;

xScale = videoBox.size.width / clap.size.height;
yScale = videoBox.size.height / clap.size.width;

//    let context = CGContext(data: CVPixelBufferGetBaseAddress(pixelBuffer), width: CVPixelBufferGetWidth(pixelBuffer), height: CVPixelBufferGetHeight(pixelBuffer), bitsPerComponent: 8, bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer), space: self.sDeviceRgbColorSpace, bitmapInfo: self.bitmapInfo.rawValue)!

CVImageBufferRef pixelbuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
CVPixelBufferLockBaseAddress(pixelbuffer, 0);
CGContextRef context = CGBitmapContextCreate(CVPixelBufferGetBaseAddress(pixelbuffer),
                                             CVPixelBufferGetWidth(pixelbuffer),
                                             CVPixelBufferGetHeight(pixelbuffer),
                                             8,
                                             CVPixelBufferGetBytesPerRow(pixelbuffer), // bytesPerRow ??
                                             CGColorSpaceCreateDeviceRGB(),
                                             kCGImageAlphaPremultipliedLast);

dispatch_sync(dispatch_get_main_queue(), ^{
    // Remove previously added feature views.
    [[self.preview subviews] makeObjectsPerformSelector:@selector(removeFromSuperview)];

    // Display detected features in overlay.
    for (GMVFaceFeature *face in faces) {
        funnyFaceImageView = [UIImageView new];
        funnyFaceImageView.image = self.funnyFaceImage;

        CGRect faceRect = [self scaledRect:face.bounds
                                    xScale:xScale
                                    yScale:yScale
                                    offset:videoBox.origin];
        CGPoint mouthB, mouthL, mouthR, mouth;
        CGPoint nose, cheekL, cheekR;
        CGPoint earL, earR;
        CGPoint eyeL, eyeR;

        mouthB = mouthL = mouthR = mouth = nose = cheekL = cheekR = earL = earR = eyeL = eyeR = CGPointZero;

        // Mouth
        if (face.hasBottomMouthPosition) {
            CGPoint point = [self scaledPoint:face.bottomMouthPosition
                                       xScale:xScale
                                       yScale:yScale
                                       offset:videoBox.origin];
            mouthB = point;
        }
        if (face.hasMouthPosition) {
            CGPoint point = [self scaledPoint:face.mouthPosition
                                       xScale:xScale
                                       yScale:yScale
                                       offset:videoBox.origin];
            mouth = point;
        }
        if (face.hasRightMouthPosition) {
            CGPoint point = [self scaledPoint:face.rightMouthPosition
                                       xScale:xScale
                                       yScale:yScale
                                       offset:videoBox.origin];
            mouthR = point;
        }
        if (face.hasLeftMouthPosition) {
            CGPoint point = [self scaledPoint:face.leftMouthPosition
                                       xScale:xScale
                                       yScale:yScale
                                       offset:videoBox.origin];
            mouthL = point;
        }

        // Nose
        if (face.hasNoseBasePosition) {
            CGPoint point = [self scaledPoint:face.noseBasePosition
                                       xScale:xScale
                                       yScale:yScale
                                       offset:videoBox.origin];
            nose = point;
        }

        // Eyes
        if (face.hasLeftEyePosition) {
            CGPoint point = [self scaledPoint:face.leftEyePosition
                                       xScale:xScale
                                       yScale:yScale
                                       offset:videoBox.origin];
            eyeL = point;
        }
        if (face.hasRightEyePosition) {
            CGPoint point = [self scaledPoint:face.rightEyePosition
                                       xScale:xScale
                                       yScale:yScale
                                       offset:videoBox.origin];
            eyeR = point;
        }

        // Ears
        if (face.hasLeftEarPosition) {
            CGPoint point = [self scaledPoint:face.leftEarPosition
                                       xScale:xScale
                                       yScale:yScale
                                       offset:videoBox.origin];
            earL = point;
        }
        if (face.hasRightEarPosition) {
            CGPoint point = [self scaledPoint:face.rightEarPosition
                                       xScale:xScale
                                       yScale:yScale
                                       offset:videoBox.origin];
            earR = point;
        }

        // Cheeks
        if (face.hasLeftCheekPosition) {
            CGPoint point = [self scaledPoint:face.leftCheekPosition
                                       xScale:xScale
                                       yScale:yScale
                                       offset:videoBox.origin];
            cheekL = point;
        }
        if (face.hasRightCheekPosition) {
            CGPoint point = [self scaledPoint:face.rightCheekPosition
                                       xScale:xScale
                                       yScale:yScale
                                       offset:videoBox.origin];
            cheekR = point;
        }

        CGFloat faceAngle = - (M_PI / 180) * face.headEulerAngleZ;
        funnyFaceImageView.frame = faceRect;
        CGFloat shift = 100.0f;
        //            CGPoint maskEyeR = CGPointMake(972, 799);
        CGPoint maskEyeL = CGPointMake(789 - shift, 799);

        CGFloat realEyeDistance = eyeR.x - eyeL.x;
        CGFloat realEyeDistanceY = eyeR.y - eyeL.y;
        //            CGFloat maskEyeDistance = maskEyeR.x - maskEyeL.x;
        CGFloat maskEyeDistance = 283.0f;
        CGFloat proximityRatio = 1.3f;

        CGFloat ratio = sqrtf(realEyeDistance * realEyeDistance + realEyeDistanceY * realEyeDistanceY) / maskEyeDistance * proximityRatio;

        CGPoint eyeLScaled = CGPointMake(maskEyeL.x * ratio, maskEyeL.y * ratio);
        //            CGPoint eyeRScaled = CGPointMake(maskEyeL.x / ratio, maskEyeL.y / ratio);
        CGSize sizeScaled = CGSizeMake(funnyFaceImageView.image.size.height * ratio, funnyFaceImageView.image.size.height * ratio);
        funnyFaceImageView.frame = CGRectMake(eyeL.x - eyeLScaled.x,
                                              eyeL.y - eyeLScaled.y,
                                              sizeScaled.width,
                                              sizeScaled.height);
//            CGRect imageframe;
        CGRect transformedBounds;
        if(face.hasHeadEulerAngleZ){
            funnyFaceImageView.transform = CGAffineTransformRotate(CGAffineTransformIdentity, faceAngle);
//                imageframe = funnyFaceImageView.frame
           transformedBounds = CGRectApplyAffineTransform(CGRectMake(self.preview.frame.origin.x, self.preview.origin.y, funnyFaceImageView.frame.size.width, funnyFaceImageView.frame.size.height), CGAffineTransformRotate(CGAffineTransformIdentity, faceAngle));
        } else {
            funnyFaceImageView.transform = CGAffineTransformIdentity;
            transformedBounds = CGRectApplyAffineTransform(CGRectMake(self.preview.frame.origin.x, self.preview.origin.y, funnyFaceImageView.frame.size.width, funnyFaceImageView.frame.size.height), CGAffineTransformIdentity);
        }
        funnyFaceImageView.contentMode = UIViewContentModeScaleToFill;
//            context.draw(metadata.image.cgImage!, in: metadata.faceRect)

        CGContextSetBlendMode(context, kCGBlendModeCopy);


        CGContextDrawImage(context, transformedBounds , funnyFaceImageView.image.CGImage);
        CGContextRotateCTM(context, faceAngle);

//            CGImageRelease(cgImage);

        [self.preview addSubview:funnyFaceImageView];

        // Tracking Id.
        if (face.hasTrackingID) {
            CGPoint point = [self scaledPoint:face.bounds.origin
                                       xScale:xScale
                                       yScale:yScale
                                       offset:videoBox.origin];
            UILabel *label = [[UILabel alloc] initWithFrame:CGRectMake(point.x, point.y, 100, 20)];
            label.text = [NSString stringWithFormat:@"id: %lu", (unsigned long)face.trackingID];
        }

    }

//        CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
    CGContextRelease(context);

    // a very dense way to keep track of the time at which this frame
    // occurs relative to the output stream, but it's just an example!
    static int64_t frameNumber = 0;
    if(self.assetWriterInput.readyForMoreMediaData)
        [self.pixelBufferAdaptor appendPixelBuffer:pixelbuffer
                              withPresentationTime:CMTimeMake(frameNumber, 25)];
    frameNumber++;
    CVPixelBufferUnlockBaseAddress(pixelbuffer, 0);
});


}

Фактическая запись:

введите описание изображения здесь

Выход:

введите описание изображения здесь

...