pytesseract очень медленно обнаруживает в режиме реального времени и имеет большой лаг на экране при использовании python - PullRequest
0 голосов
/ 16 февраля 2020

Я пытаюсь извлечь текст из реального времени (камера в реальном времени), используя python + opencv + tesseract, но при извлечении текста из живого фрейма и его большом отображении на экране возникает большая задержка. есть способ ускорить обнаружение.
Я пытаюсь с той же скоростью достичь этого видео
https://www.youtube.com/watch?v=NfiCmhLLxMA

https://www.youtube.com/watch?v=vtSGSXKggEo

из future import print_function

    import pyzbar.pyzbar as pyzbar
    import numpy as np
    import cv2
    from PIL import Image
    import os
    import pytesseract as pytesseract
    pytesseract.pytesseract.tesseract_cmd=r'C:\Users\minaa\AppData\Local\Tesseract-OCR\tesseract.exe'
    import argparse
    from pathos.multiprocessing import ProcessingPool
    from time import time 

filename='video.avi'
fram_per_second=30.0
res='720'

def change_res(cap, width, height):
    cap.set(3, width)
    cap.set(4, height)

# Standard Video Dimensions Sizes
STD_DIMENSIONS =  {
    "480p": (640, 480),
    "720p": (1280, 720),
    "1080p": (1920, 1080),
    "4k": (3840, 2160),
}
# grab resolution dimensions and set video capture to it.
def get_dims(cap, res='1080p'):
    width, height = STD_DIMENSIONS["480p"]
    if res in STD_DIMENSIONS:
        width,height = STD_DIMENSIONS[res]
    ## change the current caputre device
    ## to the resulting resolution
    change_res(cap, width, height)
    return width, height
VIDEO_TYPE = {
    'avi': cv2.VideoWriter_fourcc(*'XVID'),
    #'mp4': cv2.VideoWriter_fourcc(*'H264'),
    'mp4': cv2.VideoWriter_fourcc(*'XVID'),
}

def get_video_type(filename):
    filename, ext = os.path.splitext(filename)
    if ext in VIDEO_TYPE:
      return  VIDEO_TYPE[ext]
    return VIDEO_TYPE['avi']


url = 'http://192.168.0.5:4747/video'
font=cv2.FONT_HERSHEY_PLAIN
def decode(im) :
      # Find barcodes and QR codes
  decodedObjects = pyzbar.decode(im)

  # Print results
  for obj in decodedObjects:
    print('Type : ', obj.type)
    print('Data : ', obj.data,'\n')
    data=obj.data
  return decodedObjects


# Display barcode and QR code location  
def display(im, decodedObjects):


  # Loop over all decoded objects
  for decodedObject in decodedObjects: 
    points = decodedObject.polygon

    # If the points do not form a quad, find convex hull
    if len(points) > 4 : 
      hull = cv2.convexHull(np.array([point for point in points], dtype=np.float32))
      hull = list(map(tuple, np.squeeze(hull)))
    else : 
      hull = points;

    # Number of points in the convex hull
    n = len(hull)
  for obj in decodedObjects:
    print('Type : ', obj.type)
    print('Data : ', obj.data,'\n')
    data=obj.data
    # Draw the convext hull
    for j in range(0,n):
      cv2.line(im, hull[j], hull[ (j+1) % n], (255,0,0), 3)
      cv2.putText(im,str(data),(50,50),font,3,(255,0,0),10)


  # Display results 
  return im
  #cv2.imshow("Results", im);
  #cv2.waitKey(0);
def detecttext(im) :

    img = cv2.resize(frame2, None, fx=1.5, fy=1.5, interpolation=cv2.INTER_CUBIC)   
    img = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    # Apply dilation and erosion to remove some noise
    kernel = np.ones((1, 1), np.uint8)
    closing = cv2.morphologyEx(frame_grey, cv2.MORPH_CLOSE, kernel)
    thresh = threshold_adaptive(closing, 251, offset = 35) #add thresholding
    thresh = thresh.astype("uint8") * 255
    img = cv2.dilate(im, kernel, iterations=1)
    img = cv2.erode(im, kernel, iterations=1)
    # Apply blur to smooth out the edges
    img = cv2.GaussianBlur(im, (5, 5), 0)
    # Apply threshold to get image with only b&w (binarization)
    img =cv2.threshold(im, 0, 255, cv2.THRESH_BINARY)
  #  img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    #img = cv2.threshold(im, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    h, w, c = im.shape
    boxes = pytesseract.image_to_boxes(im) 
    for b in boxes.splitlines():
      b = b.split(' ')
      im = cv2.rectangle(im, (int(b[1]), h - int(b[2])), (int(b[3]), h - int(b[4])), (0, 255, 0), 2)
      text = pytesseract.image_to_string(im, lang = 'eng')
      cv2.putText(im,text,(50,50),font,3,(0,255,0),2)
      print(text) 
    return im


# Main 
if __name__ == '__main__':



  # Read image
  im = cv2.imread('zbar-test.png')

  test_vid = cv2.VideoCapture(0)
  out = cv2.VideoWriter(filename, get_video_type(filename), 25, get_dims(test_vid, res))

  test_vid.set(cv2.CAP_PROP_FRAME_WIDTH, 720)
  test_vid.set(cv2.CAP_PROP_FRAME_HEIGHT, 640)
  test_vid.set(cv2.CAP_PROP_FPS,30)

  fps = test_vid.get(cv2.CAP_PROP_FPS)
  print ("fps".format(fps))
  index = 0

  while test_vid.isOpened():

      ret,frame2 = test_vid.read()
      gray = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
      # do frame processing...
      decodedObjects = decode(frame2)
      mage=display(frame2, decodedObjects)
      #  print(text)
      cv2.imshow('frame555555555', mage)
     #name2 = './image_frames/frame' + str(index) + '.png'
   # print ('Extracting frames...' + name)
   #q cv2.imshow('frame',frame)
     #cv2.imwrite(name2, mage)
     #text = pytesseract.image_to_string(gray, lang = 'eng')
     #cv2.putText(im,text,(40,40),font,3,(0,255,0),10)
     #print(text)

      cv2.imshow('Results', gray);
      TextDetect=detecttext(frame2)
      cv2.imshow('TextDetect', TextDetect);
     #out.write("mage : " + mage)
     #demo = Image.open(name2)
     #text = pytesseract.image_to_string(demo, lang = 'eng')
     #print(text)    

    # calculate wait time based on the camera fps
    # (as an extension, measure and subtract the image processing time)
      key = cv2.waitKey(27)
      if key == ord('q'):
          break


  #decodedObjects = decode(im)
 # display(im, decodedObjects)
...