Я пытаюсь реализовать Lucas Kande Tracker с нуля, используя 9 шагов, показанных в лекции . Однако после завершения алгоритма я получаю действительно плохие результаты (он не отслеживает, не отслеживает в темных условиях).
При дальнейшем чтении я обнаружил, что могу улучшить результаты, внедрив Huber Функция потери, но я не совсем понимаю, как ее реализовать. Как я могу реализовать это или как получить лучшие результаты? Вот набор данных Я использую:
'''
Copyright (c) 2020
@file LKTracker.py
@date 2020/04/18
@brief TBD
@license This project is released under the BSD-3-Clause license.
'''
import numpy as np
import cv2
import matplotlib.pyplot as plt
'''
@brief TBD
'''
class LKTracker:
template = None # Template image
templateBox = None # 4x2 NumPy array of bounding box coordinates for the template ROI
iterations = 0 # Max iterations for optimizing warp matrix parameters
epsilon = 0 # Tolerance for change in parameter vector to terminate optimization
warpMatrix = None # 3x3 NumPy affine transformation matrix from frame to template coordinates
def __init__(self, template, templateBox, iterations=100, epsilon=0.001):
self.template = template
self. templateBox = templateBox
self.iterations = iterations
self.epsilon = epsilon
self.warpMatrix = np.float32([[1, 0, 0],
[0, 1, 0]]) # Initialize warpMatrix to the identity matrix
'''
@brief Optimizes for a new set of warp matrix parameters given a new frame
@param frame Image or video frame to optimize warp matrix parameters with
@return warpMatrix Updates self.warpMatrix and returns it
'''
def updateWarpParams(self, frame):
# Continue to modify affine parameters (p) until max iterations
# or until the norm of delta p is < epsilon
iteration = 0
deltaPNorm = float('inf')
while(iteration < self.iterations and deltaPNorm > self.epsilon):
# Step 1
# Warp frame image using current estimate of warpMatrix
warppedFrame = cv2.warpAffine(frame, self.warpMatrix, dsize=(frame.shape[1], frame.shape[0]))
# Step 2
# Crop out the ROIs of the template and frame and compute the error
frameROI = warppedFrame[self.templateBox[1][1]: self.templateBox[2][1],
self.templateBox[0][0]: self.templateBox[1][0]]
templateROI = self.template[self.templateBox[1][1]: self.templateBox[2][1],
self.templateBox[0][0]: self.templateBox[1][0]]
errorImage = templateROI - frameROI
# Step 3
# Compute the x and y gradient components of the warpped frame ROI
gradx = cv2.Sobel(frameROI, cv2.CV_64F, 1, 0, ksize=5)
grady = cv2.Sobel(frameROI, cv2.CV_64F, 0, 1, ksize=5)
# Steps 4 and 5
# Compute steepest descent "images" (grad(I) * dW/dp)
# dW/dp Jacobian is [[x,0,y,0,1,0], [0,x,0,y,0,1]]
numVectors = frameROI.shape[0] * frameROI.shape[1]
steepestDescentVectors = np.zeros((numVectors, 6))
# x and y are OpenCV image indices, not NumPy
for x in range(frameROI.shape[1]):
for y in range(frameROI.shape[0]):
# Convolutied indexing to fill in list properly
steepestDescentVectors[(x + 1) * (y + 1) - 1] = np.array([gradx[y, x] * x,
grady[y, x] * x,
gradx[y, x] * y,
grady[y, x] * y,
gradx[y, x],
grady[y, x]])
# Step 6
# Compute frame ROI Hessian matrix
H = np.matmul(steepestDescentVectors.T, steepestDescentVectors)
# print(self.warpMatrix)
#cv2.imshow('Frame', warppedFrame)
# cv2.waitKey(0)
# print(gradx)
# print(steepestDescentVectors)
# print(H)
cv2.imshow("1", templateROI)
cv2.imshow("2", frameROI)
# Steps 7 and 8
# Compute delta p (change in affine parameters)
deltaP = np.matmul(np.linalg.inv(H), np.matmul(steepestDescentVectors.T, errorImage.flatten()))
deltaWarpMatrix = np.array([[deltaP[0], deltaP[2], deltaP[4]],
[deltaP[1], deltaP[3], deltaP[5]]])
# Step 9
# Update warpMatrix
self.warpMatrix += deltaWarpMatrix
# print(self.warpMatrix)
# Compute the norm of the delta p
deltaPNorm = np.linalg.norm(deltaP)
# print(deltaPNorm)
iteration += 1
return self.warpMatrix
'''
@brief TBD
@param
@return
'''
def detectObject(self, frame):
# Convert frame to grayscale
grayFrame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Get updated affine transform matrix
a = self.updateWarpParams(grayFrame)
# Convert to augmented form and take the inverse
a = np.vstack((a, np.array([0, 0, 1])))
print(a)
aInv = np.linalg.inv(a)
# Convert template corners to augmented form and inverse warp it to the frame coordinates
templateCorners = np.hstack((self.templateBox, np.ones((4, 1)))).T
boundingRect = np.matmul(aInv, templateCorners)
# Draw resulting bounding box in the frame
rectMinX = int(min(boundingRect[0, :]))
rectMinY = int(min(boundingRect[1, :]))
rectMaxX = int(max(boundingRect[0, :]))
rectMaxY = int(max(boundingRect[1, :]))
newFrame = cv2.rectangle(frame, (rectMinX, rectMinY), (rectMaxX, rectMaxY), color=(0, 255, 0), thickness=2)
return newFrame
'''
@brief TBD
@param
@return
'''
def runApplication(self, videoFile, saveVideo=False):
# Create video stream object
videoCapture = cv2.VideoCapture(videoFile)
# Define video codec and output file if video needs to be saved
if (saveVideo == True):
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
# 720p 30fps video
out = cv2.VideoWriter('BuoyDetection.mp4', fourcc, 30, (1280, 720))
# Continue to process frames if the video stream object is open
while(videoCapture.isOpened()):
ret, frame = videoCapture.read()
# Continue processing if a valid frame is received
if ret == True:
newFrame = self.detectObject(frame)
# Save video if desired, resizing frame to 720p
if (saveVideo == True):
out.write(cv2.resize(newFrame, (1280, 720)))
# Display frame to the screen in a video preview
cv2.imshow("Frame", cv2.resize(newFrame, (1280, 720)))
# Exit if the user presses 'q'
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# If the end of the video is reached, wait for final user keypress and exit
else:
cv2.waitKey(0)
break
# Release video and file object handles
videoCapture.release()
if (saveVideo == True):
out.release()
print('Video and file handles closed')
if __name__ == '__main__':
templateFile = 'datasets/boltTemplate.jpg'
videoFile = 'datasets/bolt.mp4'
saveVideo = False
# Note: Template box coorindates by start from top left and proceed clockwise
# Bolt template box
templateBox = np.array([[265, 80],
[306, 80],
[306, 143],
[265, 143]])
'''
# Car template box
templateBox = np.array([[64, 51],
[168, 51],
[168, 139],
[64, 139]])'''
'''
# Dragon Baby template box
templateBox = np.array([[177, 53],
[243, 53],
[243, 125],
[177, 125]])'''
# Load template and image
template = cv2.cvtColor(cv2.imread(templateFile), cv2.COLOR_BGR2GRAY)
lkTracker = LKTracker(template, templateBox)
lkTracker.runApplication(videoFile, saveVideo)
▲