Это можно проанализировать с помощью некоторой рекурсии.Приведенный ниже код использует пустые записи в начале строки, чтобы определить, что подзапись должна быть проанализирована.
Этот код не отображает данные в требуемую схему, как это должно быть сделано за секунду.шаг.Предложил бы шаблон ETL , где этот код был бы частью шага Извлечения.Шаг преобразования должен быть отдельной процедурой, которая отображает каждую из этих записей в нужную схему.
Код:
def get_records_from_csv(csv_filename):
with open(csv_filename, 'r') as f:
csv_file = iter(csv.reader(f))
last_line = []
def rows():
"""wrapper to csv iterator which allows a read of last_line"""
while True:
if last_line:
yield last_line.pop()
else:
yield next(csv_file)
def row_level(row):
"""how many empty fields on start of row"""
for i, col in enumerate(row):
if col.strip():
return i
def get_records(level=0):
header = next(rows())
records = []
for row in rows():
this_level = row_level(row)
if this_level == level:
records.append(dict(zip(header[level:], row[level:])))
elif this_level < level:
break
elif this_level == level + 1:
last_line.append(row)
record_type, sub_records = get_records(this_level)
if record_type:
records[-1].setdefault(
record_type, []).extend(sub_records)
if not level:
last_line.append(header)
break
else:
sub_record = [
records[-1][k] for k in header[level:this_level]]
this_row = sub_record + row[this_level:]
records.append(dict(zip(header[level:], this_row)))
return header[level], records
record = get_records(0)[1]
while record:
yield record
record = get_records(0)[1]
Код теста:
import csv
import json
print(json.dumps(list(get_records_from_csv('csvfile.csv')), indent=2))
Результаты:
[
[
{
"Order Ref": "1234",
"Order Status": "PayOfflineConfirmedManual",
"Affiliate": " ",
"Source": " ",
"Agent": "akjsd@ad.com",
"Customer Name": "Mr Kas Iasd",
"Email Address": "asd@asd.com",
"Telephone": "3342926655",
"Mobile": " ",
"Address 1": " ",
"Address 2": " ",
"City": " ",
"County/State": " ",
"Postal Code": " ",
"Country": " ",
"Voucher Code": " ",
"Voucher Amount": "0",
"Order Date": "11/01/2018 18:51",
"Item ID": [
{
"Item ID": "125",
"Type": "Flight",
"Supplier Code": "SB",
"Supplier Name": "Sabre",
"Booking Ref": "ABC123",
"Supplier Price": "5002",
"Currency": "PKR",
"Selling Price": "5002",
"Depart": "12/01/2018 13:15",
"Arrive": "ONEWAY",
"Origin": "KHI",
"Destination": "LHE",
"Carrier": "PK",
"Flight No": "354",
"Class": "Economy",
"": "",
"Pax Type": [
{
"Pax Type": "Adult",
"Title": "Mr",
"Firstname": "Aasdsa",
"Surname": "Mas",
"DOB": "19/09/1995",
"Gender": "Male",
"FOID Type": "None",
"": ""
},
{
"Pax Type": "Adult",
"Title": "Mr",
"Firstname": "Asdad",
"Surname": "Dasd",
"DOB": "07/12/1995",
"Gender": "Male",
"FOID Type": "None",
"": ""
}
]
}
]
}
],
[
{
"Order Ref": "1235",
"Order Status": "PayOfflinePendingManualProcessing",
"Affiliate": " ",
"Source": " ",
"Agent": "asdss@asda.com",
"Customer Name": "Mr Asdsd Asdsd",
"Email Address": "ads@ads.com",
"Telephone": "3332324252",
"Mobile": "3332784342",
"Address 1": " ",
"Address 2": " ",
"City": " ",
"County/State": " ",
"Postal Code": " ",
"Country": " ",
"Voucher Code": "ABC123",
"Voucher Amount": "100",
"Order Date": "11/01/2018 17:06",
"Item ID": [
{
"Item ID": "123",
"Type": "Flight",
"Supplier Code": "PITCH",
"Supplier Name": "Kicker",
"Booking Ref": "FAILED",
"Supplier Price": "154340",
"Currency": "PKR",
"Selling Price": "154340",
"Depart": "18/01/2018 11:40",
"Arrive": "18/01/2018 14:25",
"Origin": "KHI",
"Destination": "DXB",
"Carrier": "PA",
"Flight No": "210",
"Class": "Economy",
"": ""
},
{
"Item ID": "123",
"Type": "Flight",
"Supplier Code": "PITCH",
"Supplier Name": "Kicker",
"Booking Ref": "FAILED",
"Supplier Price": "154340",
"Currency": "PKR",
"Selling Price": "154340",
"Depart": "25/01/2018 6:25",
"Arrive": "25/01/2018 10:40",
"Origin": "DXB",
"Destination": "LHE",
"Carrier": "PA",
"Flight No": "211",
"Class": "Economy",
"": "",
"Pax Type": [
{
"Pax Type": "Adult",
"Title": "Mr",
"Firstname": "Asd",
"Surname": "Azam",
"DOB": "11/08/1991",
"Gender": "Male",
"FOID Type": "None",
"": ""
},
{
"Pax Type": "Adult",
"Title": "Mr",
"Firstname": "Aziz",
"Surname": "Asdsd",
"DOB": "01/07/1974",
"Gender": "Male",
"FOID Type": "None",
"": ""
},
{
"Pax Type": "Adult",
"Title": "Mr",
"Firstname": "mureed",
"Surname": "ahmed",
"DOB": "28/05/1995",
"Gender": "Male",
"FOID Type": "None",
"": ""
},
{
"Pax Type": "Child",
"Title": "Mr",
"Firstname": "abdullah",
"Surname": "Cdsd",
"DOB": "14/04/2012",
"Gender": "Female",
"FOID Type": "None",
"": ""
},
{
"Pax Type": "Adult",
"Title": "Mr",
"Firstname": "Asdsd",
"Surname": "Ahmed",
"DOB": "17/12/1999",
"Gender": "Male",
"FOID Type": "None",
"": ""
}
]
}
]
}
],
[
{
"Order Ref": "1236",
"Order Status": "PayOfflinePendingManualProcessing",
"Affiliate": " ",
"Source": " ",
"Agent": "asda@asdad.com",
"Customer Name": "Mr Asds Sdsd",
"Email Address": "asd@asdsd.com",
"Telephone": "3067869234",
"Mobile": "3067869234",
"Address 1": " ",
"Address 2": " ",
"City": " ",
"County/State": " ",
"Postal Code": " ",
"Country": " ",
"Voucher Code": " ",
"Voucher Amount": "0",
"Order Date": "11/01/2018 16:23",
"Item ID": [
{
"Item ID": "124",
"Type": "Flight",
"Supplier Code": "PITCH",
"Supplier Name": "Kicker",
"Booking Ref": " ",
"Supplier Price": "20134",
"Currency": "PKR",
"Selling Price": "20134",
"Depart": "23/01/2018 2:00",
"Arrive": "ONEWAY",
"Origin": "KHI",
"Destination": "SHJ",
"Carrier": "PK",
"Flight No": "812",
"Class": "Economy",
"": "",
"Pax Type": [
{
"Pax Type": "Adult",
"Title": "Mr",
"Firstname": "Asds",
"Surname": "raza",
"DOB": "01/12/1994",
"Gender": "Male",
"FOID Type": "Passport",
"": ""
}
]
}
]
}
]
]
csvfile.csv
Order Ref,Order Status,Affiliate,Source,Agent,Customer Name,Email Address,Telephone,Mobile,Address 1,Address 2,City,County/State,Postal Code,Country,Voucher Code,Voucher Amount,Order Date
1234,PayOfflineConfirmedManual, , ,akjsd@ad.com,Mr Kas Iasd,asd@asd.com,3342926655, , , , , , , , ,0,11/01/2018 18:51
,Item ID,Type,Supplier Code,Supplier Name,Booking Ref,Supplier Price,Currency,Selling Price,Currency,Depart,Arrive,Origin,Destination,Carrier,Flight No,Class,
,125,Flight,SB,Sabre,ABC123,5002,PKR,5002,PKR,12/01/2018 13:15,ONEWAY,KHI,LHE,PK,354,Economy,
, ,Pax Type,Title,Firstname,Surname,DOB,Gender,FOID Type,,,,,,,,,
, ,Adult,Mr,Aasdsa,Mas,19/09/1995,Male,None,,,,,,,,,
, ,Adult,Mr,Asdad,Dasd,07/12/1995,Male,None,,,,,,,,,
Order Ref,Order Status,Affiliate,Source,Agent,Customer Name,Email Address,Telephone,Mobile,Address 1,Address 2,City,County/State,Postal Code,Country,Voucher Code,Voucher Amount,Order Date
1235,PayOfflinePendingManualProcessing, , ,asdss@asda.com,Mr Asdsd Asdsd,ads@ads.com,3332324252,3332784342, , , , , , ,ABC123,100,11/01/2018 17:06
,Item ID,Type,Supplier Code,Supplier Name,Booking Ref,Supplier Price,Currency,Selling Price,Currency,Depart,Arrive,Origin,Destination,Carrier,Flight No,Class,
,123,Flight,PITCH,Kicker,FAILED,154340,PKR,154340,PKR,18/01/2018 11:40,18/01/2018 14:25,KHI,DXB,PA,210,Economy,
, , , , , , , , , ,25/01/2018 6:25,25/01/2018 10:40,DXB,LHE,PA,211,Economy,
, ,Pax Type,Title,Firstname,Surname,DOB,Gender,FOID Type,,,,,,,,,
, ,Adult,Mr,Asd,Azam,11/08/1991,Male,None,,,,,,,,,
, ,Adult,Mr,Aziz,Asdsd,01/07/1974,Male,None,,,,,,,,,
, ,Adult,Mr,mureed,ahmed,28/05/1995,Male,None,,,,,,,,,
, ,Child,Mr,abdullah,Cdsd,14/04/2012,Female,None,,,,,,,,,
, ,Adult,Mr,Asdsd,Ahmed,17/12/1999,Male,None,,,,,,,,,
Order Ref,Order Status,Affiliate,Source,Agent,Customer Name,Email Address,Telephone,Mobile,Address 1,Address 2,City,County/State,Postal Code,Country,Voucher Code,Voucher Amount,Order Date
1236,PayOfflinePendingManualProcessing, , ,asda@asdad.com,Mr Asds Sdsd,asd@asdsd.com,3067869234,3067869234, , , , , , , ,0,11/01/2018 16:23
,Item ID,Type,Supplier Code,Supplier Name,Booking Ref,Supplier Price,Currency,Selling Price,Currency,Depart,Arrive,Origin,Destination,Carrier,Flight No,Class,
,124,Flight,PITCH,Kicker, ,20134,PKR,20134,PKR,23/01/2018 2:00,ONEWAY,KHI,SHJ,PK,812,Economy,
, ,Pax Type,Title,Firstname,Surname,DOB,Gender,FOID Type,,,,,,,,,
, ,Adult,Mr,Asds,raza,01/12/1994,Male,Passport,,,,,,,,,