Question

Я использовал Open CV и Skimage для анализа документов. Я пытаюсь выделить теневую область отдельно.

В настоящее время я могу выделить часть и номер в виде различных кластеров.

Используя felzenszwalb() из скимага, я сегментирую части:

import matplotlib.pyplot as plt
import numpy as np     
from skimage.segmentation import felzenszwalb
from skimage.io import imread

img = imread('test.jpg')

segments_fz = felzenszwalb(img, scale=100, sigma=0.2, min_size=50)

print("Felzenszwalb number of segments {}".format(len(np.unique(segments_fz))))

plt.imshow(segments_fz)
plt.tight_layout()
plt.show()

Но не может их соединить. Любая идея соединить методично и обозначить соответствующий сегмент с номером детали и номера будет очень полезна. Заранее спасибо за ваше время - если я что-то упустил, переоценил или недооценил конкретный момент, дайте мне знать в комментариях.

Richard · Answer 1 · 08 августа 2018

Отборочные

Предварительный код:

%matplotlib inline
%load_ext Cython
import numpy as np
import cv2
from matplotlib import pyplot as plt
import skimage as sk
import skimage.morphology as skm
import itertools

def ShowImage(title,img,ctype):
  plt.figure(figsize=(20, 20))
  if ctype=='bgr':
    b,g,r = cv2.split(img)       # get b,g,r
    rgb_img = cv2.merge([r,g,b])     # switch it to rgb
    plt.imshow(rgb_img)
  elif ctype=='hsv':
    rgb = cv2.cvtColor(img,cv2.COLOR_HSV2RGB)
    plt.imshow(rgb)
  elif ctype=='gray':
    plt.imshow(img,cmap='gray')
  elif ctype=='rgb':
    plt.imshow(img)
  else:
    raise Exception("Unknown colour type")
  plt.axis('off')
  plt.title(title)
  plt.show()

Для справки вот ваше оригинальное изображение:

#Read in image
img         = cv2.imread('part.jpg')
ShowImage('Original',img,'bgr')

Идентификационные номера

Чтобы упростить вещи, мы хотим классифицировать пиксели как включенные или выключенные. Мы можем сделать это с порогом. Поскольку наше изображение содержит два чистых класса пикселей (черный и белый), мы можем использовать метод Оцу . Мы изменим цветовую схему, так как библиотеки, которые мы используем, считают черные пиксели скучными, а белые - интересными.

#Convert image to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

#Apply Otsu's method to eliminate pixels of intermediate colour
ret, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)

ShowImage('Applying Otsu',thresh,'gray')

#Verify that pixels are either black or white and nothing in between
np.unique(thresh)

Наша стратегия будет заключаться в том, чтобы найти номера и затем следовать линиям рядом с ними до частей, а затем маркировать эти части. Поскольку для удобства все арабские цифры формируются из смежных пикселей, мы можем начать с поиска связанных компонентов.

ret, components = cv2.connectedComponents(thresh)
#Each component is a different colour
ShowImage('Connected Components', components, 'rgb')

Затем мы можем отфильтровать подключенные компоненты, чтобы найти числа путем фильтрации по размеру. Обратите внимание, что это не очень надежный способ сделать это. Лучшим вариантом было бы использовать распознавание символов, но это оставлено читателю в качестве упражнения: -)

class Box:
    def __init__(self,x0,x1,y0,y1):
        self.x0, self.x1, self.y0, self.y1 = x0,x1,y0,y1
    def overlaps(self,box2,tol):
        if self.x0 is None or box2.x0 is None:
            return False
        return not (self.x1+tol<=box2.x0 or self.x0-tol>=box2.x1 or self.y1+tol<=box2.y0 or self.y0-tol>=box2.y1)
    def merge(self,box2):
        self.x0 = min(self.x0,box2.x0)
        self.x1 = max(self.x1,box2.x1)
        self.y0 = min(self.y0,box2.y0)
        self.y1 = max(self.y1,box2.y1)
        box2.x0 = None #Used to mark `box2` as being no longer valid. It can be removed later
    def dist(self,x,y):
        #Get center point
        ax = (self.x0+self.x1)/2
        ay = (self.y0+self.y1)/2
        #Get distance to center point
        return np.sqrt((ax-x)**2+(ay-y)**2)
    def good(self):
        return not (self.x0 is None)

def ExtractComponent(original_image, component_matrix, component_number):
    """Extracts a component from a ConnectedComponents matrix"""
    #Create a true-false matrix indicating if a pixel is part of a particular component
    is_component = component_matrix==component_number
    #Find the coordinates of those pixels
    coords = np.argwhere(is_component)

    # Bounding box of non-black pixels.
    y0, x0 = coords.min(axis=0)
    y1, x1 = coords.max(axis=0) + 1   # slices are exclusive at the top

    # Get the contents of the bounding box.
    return x0,x1,y0,y1,original_image[y0:y1, x0:x1]

numbers_img = thresh.copy() #This is used purely to show that we can identify numbers
numbers = []
for component in range(components.max()):
    tx0,tx1,ty0,ty1,this_component = ExtractComponent(thresh, components, component)
    #ShowImage('Component #{0}'.format(component), this_component, 'gray')
    cheight, cwidth = this_component.shape
    #print(cwidth,cheight) #Enable this to see dimensions
    #Identify numbers based on aspect ratio
    if (abs(cwidth-14)<3 or abs(cwidth-7)<3) and abs(cheight-24)<3:
        numbers_img[ty0:ty1,tx0:tx1] = 128
        numbers.append(Box(tx0,tx1,ty0,ty1))
ShowImage('Numbers', numbers_img, 'gray')

Теперь мы соединяем числа в смежные блоки, слегка расширяя их ограничительные рамки и ища перекрытия.

#This is kind of a silly way to do this, but it will work find for small quantities (hundreds)
merged=True                                       #If true, then a merge happened this round
while merged:                                     #Continue until there are no more mergers
    merged=False                                  #Reset merge indicator
    for a,b in itertools.combinations(numbers,2): #Consider all pairs of numbers
        if a.overlaps(b,10):                      #If this pair overlaps
            a.merge(b)                            #Merge it
            merged=True                           #Make a note that we've merged
numbers = [x for x in numbers if x.good()]        #Eliminate those boxes that were gobbled by the mergers

#This is used purely to show that we can identify numbers
numbers_img = thresh.copy() 
for n in numbers:
    numbers_img[n.y0:n.y1,n.x0:n.x1] = 128
    thresh[n.y0:n.y1,n.x0:n.x1] = 0 #Drop numbers from thresholded image
ShowImage('Numbers', numbers_img, 'gray')

Хорошо, теперь мы определили номера! Мы будем использовать их позже для идентификации деталей.

Идентификационные стрелки

Далее мы хотим выяснить, на какие части указывают числа. Для этого мы хотим обнаружить линии. Преобразование Хафа хорошо для этого. Чтобы уменьшить количество ложных срабатываний, мы скелетизируем данные, которые преобразуют их в представление шириной не более одного пикселя.

skel = sk.img_as_ubyte(skm.skeletonize(thresh>0))
ShowImage('Skeleton', skel, 'gray')

Теперь мы выполняем преобразование Хафа. Мы ищем одну, которая идентифицирует все линии, идущие от цифр к частям. Чтобы получить это право, нужно немного поиграться с параметрами.

lines = cv2.HoughLinesP(
    skel,
    1,           #Resolution of r in pixels
    np.pi / 180, #Resolution of theta in radians
    30,          #Minimum number of intersections to detect a line
    None,
    80,          #Min line length
    10           #Max line gap
)
lines = [x[0] for x in lines]

line_img = thresh.copy()
line_img = cv2.cvtColor(line_img, cv2.COLOR_GRAY2BGR)
for l in lines:
    color = tuple(map(int, np.random.randint(low=0, high=255, size=3)))
    cv2.line(line_img, (l[0], l[1]), (l[2], l[3]), color, 3, cv2.LINE_AA)
ShowImage('Lines', line_img, 'bgr')

Теперь мы хотим найти строку или строки, которые являются ближайшими к каждому числу, и сохранить только эти. По сути, мы отфильтровываем все строки, которые не являются стрелками. Для этого мы сравниваем конечные точки каждой линии с центральной точкой каждого числового поля.

  comp_labels = np.zeros(img.shape[0:2], dtype=np.uint8)

for n_idx,n in enumerate(numbers):
    distvals = []
    for i,l in enumerate(lines):
        #Distances from each point of line to midpoint of rectangle
        dists    = [n.dist(l[0],l[1]),n.dist(l[2],l[3])] 
        #Minimum distance and the end point (0 or 1) of the line associated with that point
        #Tuples of (Line Number, Line Point, Dist to Line Point) are produced
        distvals.append( (i,np.argmin(dists),np.min(dists)) )
    #Sort by distance between the number box and the line
    distvals = sorted(distvals, key=lambda x: x[2])
    #Include nearby lines, not just the closest one. This accounts for forking.
    distvals = [x for x in distvals if x[2]<1.5*distvals[0][2]]

    #Draw a white rectangle where the number box was
    cv2.rectangle(comp_labels, (n.x0,n.y0), (n.x1,n.y1), 1, cv2.FILLED)

    #Draw white lines where the arrows are
    for dv in distvals:
        l = lines[dv[0]]
        lp = (l[0],l[1]) if dv[1]==0 else (l[2],l[3])
        cv2.line(comp_labels, (l[0], l[1]), (l[2], l[3]), 1, 3, cv2.LINE_AA)
        cv2.line(comp_labels, (lp[0], lp[1]), ((n.x0+n.x1)//2, (n.y0+n.y1)//2), 1, 3, cv2.LINE_AA)
ShowImage('Lines', comp_labels, 'gray')

Поиск деталей

Эта часть была трудной! Теперь мы хотим сегментировать части изображения. Если бы был какой-то способ отсоединить линии, связывающие части, это было бы легко. К сожалению, линии, соединяющие части, имеют ту же ширину, что и многие линии, составляющие части.

Чтобы обойти это, мы могли бы использовать много логики. Было бы больно и подвержено ошибкам.

В качестве альтернативы, мы могли бы предположить, что у вас есть эксперт в цикле. Единственная работа этого эксперта - разрезать линии, соединяющие части. Это должно быть легко и быстро для них. Маркировка всего будет медленной и грустной для людей, но быстрой для компьютеров. Разделять вещи легко для людей, но трудно для компьютеров. Поэтому мы позволяем обоим делать то, что у них получается лучше всего.

В этом случае вы, вероятно, могли бы обучить кого-то выполнять эту работу за несколько минут, поэтому настоящий «эксперт» на самом деле не нужен. Просто умеренно компетентный человек.

Если вы преследуете это, вам нужно написать эксперта в инструменте цикла. Для этого сохраните изображения скелета, попросите своего эксперта изменить их и прочитайте скелетонизированные изображения обратно. Вот так.

#Save the image, or display it on a GUI
#cv2.imwrite("/z/skel.png", skel);
#EXPERT DOES THEIR THING HERE
#Read the expert-mediated image back in
skelhuman = cv2.imread('/z/skel.png')
#Convert back to the form we need
skelhuman = cv2.cvtColor(skelhuman,cv2.COLOR_BGR2GRAY)
ret, skelhuman = cv2.threshold(skelhuman,0,255,cv2.THRESH_OTSU)
ShowImage('SkelHuman', skelhuman, 'gray')

Теперь, когда у нас есть отдельные части, мы уберем как можно больше стрел. Мы уже извлекли их выше, поэтому мы можем добавить их позже, если потребуется.

Чтобы исключить стрелки, мы найдем все линии, которые заканчиваются в местах, отличных от другой линии. То есть мы найдем пиксели, у которых есть только один соседний пиксель. Затем мы удалим пиксель и посмотрим на его соседа. Делая это итеративно устраняет стрелки. Поскольку я не знаю другого термина, я назову это Fuse Transform . Поскольку это потребует манипулирования отдельными пикселями, что будет супер медленным в Python, мы напишем преобразование в Cython.

%%cython -a --cplus
import cython

from libcpp.queue cimport queue
import numpy as np
cimport numpy as np

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
@cython.cdivision(True) 
cpdef void FuseTransform(unsigned char [:, :] image):
    # set the variable extension types
    cdef int c, x, y, nx, ny, width, height, neighbours
    cdef queue[int] q

    # grab the image dimensions
    height = image.shape[0]
    width  = image.shape[1]

    cdef int dx[8]
    cdef int dy[8]

    #Offsets to neighbouring cells
    dx[:] = [-1,-1,0,1,1,1,0,-1]
    dy[:] = [0,-1,-1,-1,0,1,1,1]

    #Find seed cells: those with only one neighbour
    for y in range(1, height-1):
        for x in range(1, width-1):
            if image[y,x]==0: #Seed cells cannot be blank cells
                continue
            neighbours = 0
            for n in range(0,8):   #Looks at all neighbours
                nx = x+dx[n]
                ny = y+dy[n]
                if image[ny,nx]>0: #This neighbour has a value
                    neighbours += 1
            if neighbours==1:      #Was there only one neighbour?
                q.push(y*width+x)  #If so, this is a seed cell

    #Starting with the seed cells, gobble up the lines
    while not q.empty():
        c = q.front()
        q.pop()
        y = c//width         #Convert flat index into 2D x-y index
        x = c%width
        image[y,x] = 0       #Gobble up this part of the fuse
        neighbour  = -1      #No neighbours yet
        for n in range(0,8): #Look at all neighbours
            nx = x+dx[n]     #Find coordinates of neighbour cells
            ny = y+dy[n]
            #If the neighbour would be off the side of the matrix, ignore it
            if nx<0 or ny<0 or nx==width or ny==height:
                continue
            if image[ny,nx]>0:      #Is the neighbouring cell active?
                if neighbour!=-1:   #If we've already found an active neighbour
                    neighbour=-1    #Then pretend we found no neighbours
                    break           #And stop looking. This is the end of the fuse.
                else:               #Otherwise, make a note of the neighbour's index.
                    neighbour = ny*width+nx
        if neighbour!=-1:           #If there was only one neighbour
            q.push(neighbour)       #Continue burning the fuse

Назад в стандартном Python:

#Apply the Fuse Transform
skh_dilated=skelhuman.copy()
FuseTransform(skh_dilated)
ShowImage('Fuse Transform', skh_dilated, 'gray')

Теперь, когда мы удалили все стрелки и линии, соединяющие части, мы расширили оставшиеся пиксели много .

kernel = np.ones((3,3),np.uint8)
dilated  = cv2.dilate(skh_dilated, kernel, iterations=6)
ShowImage('Dilation', dilated, 'gray')

Собираем все вместе

И наложение меток и стрелок, которые мы сегментировали ранее ...

comp_labels_dilated  = cv2.dilate(comp_labels, kernel, iterations=5)
labels_combined = np.uint8(np.logical_or(comp_labels_dilated,dilated))
ShowImage('Comp Labels', labels_combined, 'gray')

Наконец, мы берем объединенные поля с числами, стрелки компонентов и детали и окрашиваем каждый из них красивыми цветами из Color Brewer . Затем мы накладываем это на исходное изображение, чтобы получить желаемую подсветку.

ret, labels = cv2.connectedComponents(labels_combined)
colormask = np.zeros(img.shape, dtype=np.uint8)
#Colors from Color Brewer
colors = [(228,26,28),(55,126,184),(77,175,74),(152,78,163),(255,127,0),(255,255,51),(166,86,40),(247,129,191),(153,153,153)]
for l in range(labels.max()):
    if l==0: #Background component
        colormask[labels==0] = (255,255,255)
    else:
        colormask[labels==l] = colors[l]
ShowImage('Comp Labels', colormask, 'bgr')
blended = cv2.addWeighted(img,0.7,colormask,0.3,0)
ShowImage('Blended', blended, 'bgr')

Финальное изображение

Итак, подведем итог, мы определили числа, стрелки и части. В некоторых случаях мы смогли их разделить автоматически. В других случаях мы использовали эксперта в цикле. Там, где нам приходилось индивидуально манипулировать пикселями, мы использовали Cython для скорости.

Конечно, опасность такого рода вещей в том, что какое-то другое изображение нарушит (многие) предположения, которые я здесь сделал. Но это риск, который вы берете на себя, когда пытаетесь использовать одно изображение для представления проблемы.

Соедините ближайшие точки в сегменте и маркируйте сегмент

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Отборочные

Идентификационные номера

Идентификационные стрелки

Поиск деталей

Собираем все вместе

Финальное изображение

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Соедините ближайшие точки в сегменте и маркируйте сегмент

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Отборочные

Идентификационные номера

Идентификационные стрелки

Поиск деталей

Собираем все вместе

Финальное изображение

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Похожие темы