Сначала необходимо создать массив хэшей и извлечь ключи в виде заголовков, а затем поместить значения в правый столбец, все узлы сведены в столбцы, игнорируя корневой ключ и ключи записи.
Примерно так
require 'nokogiri'
require 'set'
file = File.read('jobProfile.xml')
doc = Nokogiri::XML(file)
record = {}
keys = Set.new
records = []
csv = ""
doc.traverse do |node|
value = node.text.gsub(/\n +/, '')
if node.name
if node.name != "text" # skip these nodes
if value.length > 0 # skip empty nodes
key = node.name.gsub(/sd:/,'').to_sym
# if a new and not empty record, add to our records collection
if key == :Job_Profile_Data && !record.empty?
records << record
record = {}
elsif key[/Job_Profile|^root$|^document$/]
# neglect these keys
else
key = node.name.gsub(/sd:/,'').to_sym
# in case our value is html instead of text
record[key] = Nokogiri::HTML.parse(value).text
# add to our key set only if not allready in the set
keys << key
end
end
end
end
end
# build our csv
File.open('./xmloutput.csv', 'w') do |file|
file.puts %Q{"#{keys.to_a.join('","')}"}
records.each do |record|
keys.each do |key|
file.write %Q{"#{record[key]}",}
end
file.write "\n"
end
end
Что дает в нашем CSV-файле следующее
"Job_Code","Effective_Date","Job_Title","Inactive","Include_Job_Code_in_Name","Job_Description","Additional_Job_Description","Work_Shift_Required","Public_Job"
"30000","1900-01-01","Chief Executive Officer","","","","","","",
"30100","1900-01-01","Administrator Job Profile","","","","","","",
"30200","1900-01-01","Facilities & Grounds Maintenance Attendant","0","0","Job Description rich text!","Additional Job Description rich text!","0","1",
"30300","1900-01-01","Sample_Job_Title","0","0","Sample Job Description","Sample Additional Job Description","1","",