Как исправить порядок сортировки в моей таблице контуров? - PullRequest
0 голосов
/ 19 июня 2019

Всякий раз, когда я пытаюсь установить иерархию с заголовком столбца в качестве родителя и нижележащих ячеек в качестве его дочерних элементов, порядок прерывается на полпути при нарушении порядка контура.

Я пытаюсь выполнить OCR наPDF, содержащий отсканированные изображения таблицы.Я сделал всю необходимую предварительную обработку, обнаружил требуемые контуры и отсортировал их «слева направо».

def sort_contours(cnts, method="left-to-right"):
    # initialize the reverse flag and sort index
    reverse = False
    i = 0

    # handle if we need to sort in reverse
    if method == "right-to-left" or method == "bottom-to-top":
        reverse = True

    # handle if we are sorting against the y-coordinate rather than
    # the x-coordinate of the bounding box
    if method == "top-to-bottom" or method == "bottom-to-top":
        i = 1

    # construct the list of bounding boxes and sort them from top to
    # bottom
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
        key=lambda b:b[1][i], reverse=reverse))

    # return the list of sorted contours and bounding boxes
    return (cnts, boundingBoxes)

def get_cell_contours(img, i):
    """
    Purpose : Morphological operation to detect cell outlines from an image and get the column contours
    """
    # Defining a kernel length
    kernel_length = np.array(img).shape[0]//i

    # A vertical kernel of (1 X kernel_length), which will detect all the vertical lines from the image.
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))

    # A horizontal kernel of (kernel_length X 1), which will help to detect all the horizontal line from the image.
    hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))

    # A kernel of (3 X 3) ones.
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))

    # Morphological operation to detect vertical lines from an image
    img_temp1 = cv2.erode(img, vertical_kernel, iterations=3)
    vertical_lines_img = cv2.dilate(img_temp1, vertical_kernel, iterations=3)
    # cv2.imwrite("vertical_lines.jpg",vertical_lines_img)

    # Morphological operation to detect horizontal lines from an image
    img_temp2 = cv2.erode(img, hori_kernel, iterations=3)
    horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)
    # cv2.imwrite("horizontal_lines.jpg",horizontal_lines_img)

    # Weighting parameters, this will decide the quantity of an image to be added to make a new image.
    alpha = 1
    beta = 1

    # This function helps to add two image with specific weight parameter to get a third image as summation of two image.
    img_final_bin = cv2.addWeighted(vertical_lines_img, alpha, horizontal_lines_img, beta, 0.0)

    # Find contours for image, which will detect all the boxes
    contours, hierarchy = cv2.findContours(
        img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    img_final_bin = cv2.cvtColor(img_final_bin, cv2.COLOR_GRAY2BGR)
    im2 = cv2.drawContours(img_final_bin, contours, -1, (128,255,0), 3)

    # Sort all the contours by right to left
    (contours, boundingBoxes) = sort_contours(contours, method="right-to-left")

    return im2, contours, hierarchy

def get_contour_tree(img, contours):
    """ This finds out the necessary hierarchy """
    mean_length = cv2.arcLength(contours[1], closed = True)
    array = []
    column = []
    j = 0
    for i, contour in enumerate(contours):
        length = cv2.arcLength(contour, closed = True)
        if length > (1.5 * mean_length):
            column = [(j, [contour])] + column
            array.append(column)
            column = []
            mean_length = length
        elif length < (0.67 * mean_length):
            mean_length = length
            j = 1
            column = [(j, [contour])] + column
        else:
            column = [(j, [contour])] + column
            j = j + 1
            mean_length = ((mean_length * (j - 1)) + length)/j

    img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    print(len(array))

    # For Debugging purposes
    for j, column in enumerate(array):
            print("........")
            for i, contour in (array[j]):
                # dup_img = img.copy()
                img = cv2.drawContours(img, contour, -1, (128,255,0), -1)
            cv2.imwrite("Cell.jpg", img)
            time.sleep(5)


    # contour = array[3][-2][1]
    # im2 = cv2.drawContours(img, np.array(contour), -1, (128,255,0), -1)
    # cv2.imwrite("Cell.jpg", im2)

    return array

Я хочу, чтобы числа были в правильном порядке, т.е. после 671 это должно быть 672 а не 643.

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...