Вот мое решение на основе моего комментария выше:
require 'pp'
test_array = [" WP(PIL)/7/2013 PUBLIC AND PANCHAYAT MS PEMA BHUTIA MR. S.K. CHETTRI,",
" KABI LUNGCHUK MS PANILA THEENGH ASST. GOVT.",
" CONSTITUENCY, NORTH MS MON MAYA SUBBA ADVOCATE",
" SIKKIM MS TASHI DOMA SHERPA MR. KARMA THINLAY,",
" Vs MR SANGAY GURMEY CENTRAL GOVT.",
" THE SECRETARY, MINISTRY BHUTIA COUNSEL",
" OF SURFACE TRANSPORT MR. JORGAY NAMKA MR THINLAY DORJEE",
" AND ORS. MR. ZANGPO SHERPA, BHUTIA",
" AMICUS CURIAE MS POLLIN RAI, ASST.",
" GOVT. ADVOCATE"]
class ColumnAnalyzer
attr_reader :columns
attr_accessor :array
def initialize(array)
@array = array
analyze
end
def analyze
lefts = Array.new
rights = Array.new
@array.each do |line|
pos_left = Array.new
deconstruct = line.dup
col = 0
while m = deconstruct.match(/\s\s[^\s]{1}/) do
left = m.offset(0)[0]+1
pos_left[col] = col == 0 ? left : left + pos_left[col-1]
col += 1
deconstruct = deconstruct[left+1..-1]
end
lefts.push pos_left
pos_right = Array.new
deconstruct = line.dup
col = 0
while m = deconstruct.match(/[^\s]{1}\s\s/) do
right = m.offset(0)[0]
pos_right[col] = col == 0 ? right : right + pos_right[col-1]
col += 1
deconstruct = deconstruct[right+1..-1]
end
pos_right.push line.length
rights.push pos_right
end
cols_l = lefts.collect { |a| a.size }.max
cols_r = rights.collect { |a| a.size }.max
cols = [cols_l,cols_r].max # no. of columns
@columns = Array.new
(0..cols-1).each do |col|
@columns[col] = Hash.new
@columns[col][:l] = lefts.map { |a| a[col] }.min
lefts.select { |a| a.size < cols }.map! { |a| a.unshift 0 }
rights.select { |a| a.size < cols }.map! { |a| a.unshift 0 }
end
(0..cols-1).each do |col|
@columns[col][:r] = rights.map { |a| a[col] }.max
end
end
def extract
data = Array.new
@array.each do |line|
line_array = Array.new
@columns.each do |col|
line_array.push line[col[:l]..col[:r]].strip!
end
data.push line_array
end
data
end
end
ca = ColumnAnalyzer.new(test_array)
data = ca.extract
pp ca.columns
pp data
=> [{:l=>7, :r=>21}, {:l=>28, :r=>54}, {:l=>62, :r=>85}, {:l=>87, :r=>113}]
[["WP(PIL)/7/2013",
"PUBLIC AND PANCHAYAT",
"MS PEMA BHUTIA",
"MR. S.K. CHETTRI,"],
["", "KABI LUNGCHUK", "MS PANILA THEENGH", "ASST. GOVT."],
["", "CONSTITUENCY, NORTH", "MS MON MAYA SUBBA", "ADVOCATE"],
["", "SIKKIM", "MS TASHI DOMA SHERP", "MR. KARMA THINLAY,"],
["", "Vs", "MR SANGAY GURMEY", "CENTRAL GOVT."],
["", "THE SECRETARY, MINISTRY", "BHUTIA", "COUNSEL"],
["", "OF SURFACE TRANSPORT", "MR. JORGAY NAMKA", "MR THINLAY DORJEE"],
["", "AND ORS.", "MR. ZANGPO SHERPA,", "BHUTIA"],
["", "", "AMICUS CURIAE", "MS POLLIN RAI, ASST."],
["", "", "", "GOVT. ADVOCATE"]]