Мой вопрос очень похож на этот: Python синтаксический анализ текста из нескольких текстовых файлов
Я хотел бы получить такой же результат.
Я работаю над использованием конечного автомата, как предлагает mangupt, но я не уверен, как лучше всего адаптировать этот код для моих файлов. Вот что у меня есть.
state = ["other", "head", "present", "qa", "speaker", "data"]
#codes : 0, 1, 2, 3, 4, 5
def writecell(out, data):
data = data.replace('"', '""')
out.write('"')
out.write(data)
out.write('"')
out.write(",")
def readfile(fname, outname):
initstate = 0
f = open(fname, "r")
out = open(outname, "w")
head = ""
head_written = 0
quotes = 0
had_speaker = 0
for line in f:
line = line.strip()
if not line: continue
if initstate in [0,5]:
if initstate == 5:
out.write('"')
quotes = 0
out.write("\n")
initstate = 1
elif initstate in [0,5]:
if initstate == 5:
out.write('"')
quotes = 0
out.write("\n")
initstate = 4
elif initstate == 1 and line == "Presentation" or line == "P R E S E N T A T I O N":
initstate = 2
head = "Presentation"
head_written = 0
elif initstate == 1 and line == "Questions and Answers" or line = "Q U E S T I O N S A N D A N S W E R S":
initstate = 3
head = "Questions and Answers"
head_written = 0
elif initstate == 1:
initstate = 0
elif initstate in [2, 3]:
initstate = 4
elif initstate == 4 and ":" in line in line:
comma = line.find(',')
speech_st = line.find(':')
speech_end = line.find(':')
if speech_st == -1:
initstate = 0
continue
if comma == -1:
firm = ""
speaker = line[:speech_st].strip()
else:
speaker = line[:comma].strip()
firm = line[comma+1:speech_st].strip()
head_written = 1
if head_written:
writecell(out, head)
head_written = 0
order = line[speech_st+1:speech_end]
writecell(out, speaker)
writecell(out, firm)
writecell(out, order)
had_speaker = 1
elif initstate == 4:
if had_speaker:
initstate = 5
out.write('"')
quotes = 1
had_speaker = 0
elif initstate == 5:
line = line.replace('"', '""')
out.write(line)
elif initstate == 0:
continue
else:
continue
f.close()
if quotes:
out.write('"')
out.close()
Вот несколько примеров моих файлов:
https://pastebin.com/EACpRDvy
https://pastebin.com/EqGPp4vg
https://pastebin.com/X08vAX5J
Спасибо !!