Лукас Канаде Трекер дает плохие результаты. Как я могу улучшить свое решение? - PullRequest
0 голосов
/ 21 апреля 2020

Я пытаюсь реализовать Lucas Kande Tracker с нуля, используя 9 шагов, показанных в лекции . Однако после завершения алгоритма я получаю действительно плохие результаты (он не отслеживает, не отслеживает в темных условиях).

При дальнейшем чтении я обнаружил, что могу улучшить результаты, внедрив Huber Функция потери, но я не совсем понимаю, как ее реализовать. Как я могу реализовать это или как получить лучшие результаты? Вот набор данных Я использую:

'''
Copyright (c) 2020 
@file       LKTracker.py
@date       2020/04/18
@brief      TBD
@license    This project is released under the BSD-3-Clause license.
'''

import numpy as np
import cv2
import matplotlib.pyplot as plt


'''
@brief      TBD
'''


class LKTracker:
    template = None         # Template image
    templateBox = None      # 4x2 NumPy array of bounding box coordinates for the template ROI
    iterations = 0          # Max iterations for optimizing warp matrix parameters
    epsilon = 0             # Tolerance for change in parameter vector to terminate optimization
    warpMatrix = None       # 3x3 NumPy affine transformation matrix from frame to template coordinates

    def __init__(self, template, templateBox, iterations=100, epsilon=0.001):
        self.template = template
        self. templateBox = templateBox
        self.iterations = iterations
        self.epsilon = epsilon
        self.warpMatrix = np.float32([[1, 0, 0],
                                      [0, 1, 0]])    # Initialize warpMatrix to the identity matrix

    '''
    @brief      Optimizes for a new set of warp matrix parameters given a new frame
    @param      frame           Image or video frame to optimize warp matrix parameters with
    @return     warpMatrix      Updates self.warpMatrix and returns it
    '''

    def updateWarpParams(self, frame):
        # Continue to modify affine parameters (p) until max iterations
        # or until the norm of delta p is < epsilon
        iteration = 0
        deltaPNorm = float('inf')
        while(iteration < self.iterations and deltaPNorm > self.epsilon):
            # Step 1
            # Warp frame image using current estimate of warpMatrix
            warppedFrame = cv2.warpAffine(frame, self.warpMatrix, dsize=(frame.shape[1], frame.shape[0]))

            # Step 2
            # Crop out the ROIs of the template and frame and compute the error
            frameROI = warppedFrame[self.templateBox[1][1]: self.templateBox[2][1],
                                    self.templateBox[0][0]: self.templateBox[1][0]]

            templateROI = self.template[self.templateBox[1][1]: self.templateBox[2][1],
                                        self.templateBox[0][0]: self.templateBox[1][0]]

            errorImage = templateROI - frameROI

            # Step 3
            # Compute the x and y gradient components of the warpped frame ROI
            gradx = cv2.Sobel(frameROI, cv2.CV_64F, 1, 0, ksize=5)
            grady = cv2.Sobel(frameROI, cv2.CV_64F, 0, 1, ksize=5)

            # Steps 4 and 5
            # Compute steepest descent "images" (grad(I) * dW/dp)
            # dW/dp Jacobian is [[x,0,y,0,1,0], [0,x,0,y,0,1]]
            numVectors = frameROI.shape[0] * frameROI.shape[1]
            steepestDescentVectors = np.zeros((numVectors, 6))

            # x and y are OpenCV image indices, not NumPy
            for x in range(frameROI.shape[1]):
                for y in range(frameROI.shape[0]):
                    # Convolutied indexing to fill in list properly
                    steepestDescentVectors[(x + 1) * (y + 1) - 1] = np.array([gradx[y, x] * x,
                                                                              grady[y, x] * x,
                                                                              gradx[y, x] * y,
                                                                              grady[y, x] * y,
                                                                              gradx[y, x],
                                                                              grady[y, x]])

            # Step 6
            # Compute frame ROI Hessian matrix
            H = np.matmul(steepestDescentVectors.T, steepestDescentVectors)
            # print(self.warpMatrix)
            #cv2.imshow('Frame', warppedFrame)
            # cv2.waitKey(0)
            # print(gradx)
            # print(steepestDescentVectors)
            # print(H)
            cv2.imshow("1", templateROI)
            cv2.imshow("2", frameROI)

            # Steps 7 and 8
            # Compute delta p (change in affine parameters)
            deltaP = np.matmul(np.linalg.inv(H), np.matmul(steepestDescentVectors.T, errorImage.flatten()))

            deltaWarpMatrix = np.array([[deltaP[0], deltaP[2], deltaP[4]],
                                        [deltaP[1], deltaP[3], deltaP[5]]])

            # Step 9
            # Update warpMatrix
            self.warpMatrix += deltaWarpMatrix

            # print(self.warpMatrix)

            # Compute the norm of the delta p
            deltaPNorm = np.linalg.norm(deltaP)
            # print(deltaPNorm)

            iteration += 1

        return self.warpMatrix

    '''
    @brief      TBD
    @param      
    @return     
    '''

    def detectObject(self, frame):
        # Convert frame to grayscale
        grayFrame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Get updated affine transform matrix
        a = self.updateWarpParams(grayFrame)

        # Convert to augmented form and take the inverse
        a = np.vstack((a, np.array([0, 0, 1])))
        print(a)
        aInv = np.linalg.inv(a)

        # Convert template corners to augmented form and inverse warp it to the frame coordinates
        templateCorners = np.hstack((self.templateBox, np.ones((4, 1)))).T
        boundingRect = np.matmul(aInv, templateCorners)

        # Draw resulting bounding box in the frame
        rectMinX = int(min(boundingRect[0, :]))
        rectMinY = int(min(boundingRect[1, :]))
        rectMaxX = int(max(boundingRect[0, :]))
        rectMaxY = int(max(boundingRect[1, :]))
        newFrame = cv2.rectangle(frame, (rectMinX, rectMinY), (rectMaxX, rectMaxY), color=(0, 255, 0), thickness=2)

        return newFrame

    '''
    @brief      TBD
    @param      
    @return     
    '''

    def runApplication(self, videoFile, saveVideo=False):
        # Create video stream object
        videoCapture = cv2.VideoCapture(videoFile)

        # Define video codec and output file if video needs to be saved
        if (saveVideo == True):
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            # 720p 30fps video
            out = cv2.VideoWriter('BuoyDetection.mp4', fourcc, 30, (1280, 720))

        # Continue to process frames if the video stream object is open
        while(videoCapture.isOpened()):
            ret, frame = videoCapture.read()

            # Continue processing if a valid frame is received
            if ret == True:
                newFrame = self.detectObject(frame)

                # Save video if desired, resizing frame to 720p
                if (saveVideo == True):
                    out.write(cv2.resize(newFrame, (1280, 720)))

                # Display frame to the screen in a video preview
                cv2.imshow("Frame", cv2.resize(newFrame, (1280, 720)))

                # Exit if the user presses 'q'
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            # If the end of the video is reached, wait for final user keypress and exit
            else:
                cv2.waitKey(0)
                break

        # Release video and file object handles
        videoCapture.release()
        if (saveVideo == True):
            out.release()

        print('Video and file handles closed')


if __name__ == '__main__':

    templateFile = 'datasets/boltTemplate.jpg'
    videoFile = 'datasets/bolt.mp4'

    saveVideo = False

    # Note: Template box coorindates by start from top left and proceed clockwise

    # Bolt template box
    templateBox = np.array([[265, 80],
                            [306, 80],
                            [306, 143],
                            [265, 143]])
    '''
    # Car template box
    templateBox = np.array([[64, 51],
                            [168, 51],
                            [168, 139],
                            [64, 139]])'''
    '''
    # Dragon Baby template box
    templateBox = np.array([[177, 53],
                            [243, 53],
                            [243, 125],
                            [177, 125]])'''

    # Load template and image
    template = cv2.cvtColor(cv2.imread(templateFile), cv2.COLOR_BGR2GRAY)

    lkTracker = LKTracker(template, templateBox)

    lkTracker.runApplication(videoFile, saveVideo)
▲
...