Ниже относительно простое решение.Комментарии объясняют идею, стоящую за этим.
import cv2, numpy as np
img = cv2.imread("test.jpg", cv2.IMREAD_GRAYSCALE)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
grad = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel)
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[-2:]
# y-coordinate of midline of rectangle
def ymid(y, h): return y+int(h/2)
# identify lines (l=0, 1, ...) based on ymid() and estimate line width
ym2l, l, l2w, rects = {}, 0, {}, []
for cont in contours:
x, y, w, h = cv2.boundingRect(cont)
rects.append([x, y, w, h])
ym = ymid(y, h)
if ym not in ym2l:
for i in range(-2, 3): # range of ymid() values allowed for same line
if ym+i not in ym2l:
ym2l[ym+i] = l
l2w[l] = w
l += 1
else:
l2w[ym2l[ym]] += w
# combine rectangles for "good" lines (those close to maximum width)
maxw, l2r = max(l2w.values()), {}
for x, y, w, h in rects:
l = ym2l[ymid(y, h)]
if l2w[l] > .9*maxw:
if l not in l2r:
l2r[l] = [x, y, x+w, y+h]
else:
x1, y1, X1, Y1 = l2r[l]
l2r[l] = [min(x, x1), min(y, y1), max(x+w, X1), max(y+h, Y1)]
for x, y, X, Y in l2r.values():
cv2.rectangle(img, (x, y), (X-1, Y-1), (255, 255, 255), 2)
cv2.imshow("img", img)
cv2.waitKey(0)
Вот результат: