Я пытаюсь извлечь текст из реального времени (камера в реальном времени), используя python + opencv + tesseract, но при извлечении текста из живого фрейма и его большом отображении на экране возникает большая задержка. есть способ ускорить обнаружение.
Я пытаюсь с той же скоростью достичь этого видео
https://www.youtube.com/watch?v=NfiCmhLLxMA
https://www.youtube.com/watch?v=vtSGSXKggEo
из future import print_function
import pyzbar.pyzbar as pyzbar
import numpy as np
import cv2
from PIL import Image
import os
import pytesseract as pytesseract
pytesseract.pytesseract.tesseract_cmd=r'C:\Users\minaa\AppData\Local\Tesseract-OCR\tesseract.exe'
import argparse
from pathos.multiprocessing import ProcessingPool
from time import time
filename='video.avi'
fram_per_second=30.0
res='720'
def change_res(cap, width, height):
cap.set(3, width)
cap.set(4, height)
# Standard Video Dimensions Sizes
STD_DIMENSIONS = {
"480p": (640, 480),
"720p": (1280, 720),
"1080p": (1920, 1080),
"4k": (3840, 2160),
}
# grab resolution dimensions and set video capture to it.
def get_dims(cap, res='1080p'):
width, height = STD_DIMENSIONS["480p"]
if res in STD_DIMENSIONS:
width,height = STD_DIMENSIONS[res]
## change the current caputre device
## to the resulting resolution
change_res(cap, width, height)
return width, height
VIDEO_TYPE = {
'avi': cv2.VideoWriter_fourcc(*'XVID'),
#'mp4': cv2.VideoWriter_fourcc(*'H264'),
'mp4': cv2.VideoWriter_fourcc(*'XVID'),
}
def get_video_type(filename):
filename, ext = os.path.splitext(filename)
if ext in VIDEO_TYPE:
return VIDEO_TYPE[ext]
return VIDEO_TYPE['avi']
url = 'http://192.168.0.5:4747/video'
font=cv2.FONT_HERSHEY_PLAIN
def decode(im) :
# Find barcodes and QR codes
decodedObjects = pyzbar.decode(im)
# Print results
for obj in decodedObjects:
print('Type : ', obj.type)
print('Data : ', obj.data,'\n')
data=obj.data
return decodedObjects
# Display barcode and QR code location
def display(im, decodedObjects):
# Loop over all decoded objects
for decodedObject in decodedObjects:
points = decodedObject.polygon
# If the points do not form a quad, find convex hull
if len(points) > 4 :
hull = cv2.convexHull(np.array([point for point in points], dtype=np.float32))
hull = list(map(tuple, np.squeeze(hull)))
else :
hull = points;
# Number of points in the convex hull
n = len(hull)
for obj in decodedObjects:
print('Type : ', obj.type)
print('Data : ', obj.data,'\n')
data=obj.data
# Draw the convext hull
for j in range(0,n):
cv2.line(im, hull[j], hull[ (j+1) % n], (255,0,0), 3)
cv2.putText(im,str(data),(50,50),font,3,(255,0,0),10)
# Display results
return im
#cv2.imshow("Results", im);
#cv2.waitKey(0);
def detecttext(im) :
img = cv2.resize(frame2, None, fx=1.5, fy=1.5, interpolation=cv2.INTER_CUBIC)
img = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
closing = cv2.morphologyEx(frame_grey, cv2.MORPH_CLOSE, kernel)
thresh = threshold_adaptive(closing, 251, offset = 35) #add thresholding
thresh = thresh.astype("uint8") * 255
img = cv2.dilate(im, kernel, iterations=1)
img = cv2.erode(im, kernel, iterations=1)
# Apply blur to smooth out the edges
img = cv2.GaussianBlur(im, (5, 5), 0)
# Apply threshold to get image with only b&w (binarization)
img =cv2.threshold(im, 0, 255, cv2.THRESH_BINARY)
# img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
#img = cv2.threshold(im, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
h, w, c = im.shape
boxes = pytesseract.image_to_boxes(im)
for b in boxes.splitlines():
b = b.split(' ')
im = cv2.rectangle(im, (int(b[1]), h - int(b[2])), (int(b[3]), h - int(b[4])), (0, 255, 0), 2)
text = pytesseract.image_to_string(im, lang = 'eng')
cv2.putText(im,text,(50,50),font,3,(0,255,0),2)
print(text)
return im
# Main
if __name__ == '__main__':
# Read image
im = cv2.imread('zbar-test.png')
test_vid = cv2.VideoCapture(0)
out = cv2.VideoWriter(filename, get_video_type(filename), 25, get_dims(test_vid, res))
test_vid.set(cv2.CAP_PROP_FRAME_WIDTH, 720)
test_vid.set(cv2.CAP_PROP_FRAME_HEIGHT, 640)
test_vid.set(cv2.CAP_PROP_FPS,30)
fps = test_vid.get(cv2.CAP_PROP_FPS)
print ("fps".format(fps))
index = 0
while test_vid.isOpened():
ret,frame2 = test_vid.read()
gray = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
# do frame processing...
decodedObjects = decode(frame2)
mage=display(frame2, decodedObjects)
# print(text)
cv2.imshow('frame555555555', mage)
#name2 = './image_frames/frame' + str(index) + '.png'
# print ('Extracting frames...' + name)
#q cv2.imshow('frame',frame)
#cv2.imwrite(name2, mage)
#text = pytesseract.image_to_string(gray, lang = 'eng')
#cv2.putText(im,text,(40,40),font,3,(0,255,0),10)
#print(text)
cv2.imshow('Results', gray);
TextDetect=detecttext(frame2)
cv2.imshow('TextDetect', TextDetect);
#out.write("mage : " + mage)
#demo = Image.open(name2)
#text = pytesseract.image_to_string(demo, lang = 'eng')
#print(text)
# calculate wait time based on the camera fps
# (as an extension, measure and subtract the image processing time)
key = cv2.waitKey(27)
if key == ord('q'):
break
#decodedObjects = decode(im)
# display(im, decodedObjects)