У меня был проблемный XML-файл, который мне нужно загрузить в блог WordPress. Любой импортер работает с этим файлом, похоже что-то связанное с авторами поста. Чтобы разобраться с этим и получить сообщения из старого блога блоггера, я пытаюсь проанализировать XML-файл в CSV, и поэтому я напишу плагин или попробую другой импорт, чтобы наполнить блог.
Я уже проанализировал заполнение, но когда я пытаюсь записать в csv
, данные не заполняют все столбцы, только один из них.
Мой скрипт на питоне:
from xml.dom import minidom
import csv
xmldoc = minidom.parse('data/data.xml')
feed = xmldoc.getElementsByTagName('feed')[0]
entries = feed.getElementsByTagName('entry')
ids = []
publisheds = []
titles = []
updateds = []
categories = []
contents = []
links = []
authors = []
authors_emails = []
i = 0
for entry in entries:
id = entry.getElementsByTagName('id')[0].firstChild.data
published = entry.getElementsByTagName('published')[0].firstChild.data
if (entry.getElementsByTagName('updated')[0].firstChild is not None):
updated = entry.getElementsByTagName('updated')[0].firstChild.data
else:
updated = ''
category = entry.getElementsByTagName('category')[0]
if (entry.getElementsByTagName('title')[0].firstChild is not None):
title = entry.getElementsByTagName('title')[0].firstChild.data
else:
title = ''
if (entry.getElementsByTagName('content')[0].firstChild is not None):
content = entry.getElementsByTagName('content')[0].firstChild.data
else:
content = ''
link = entry.getElementsByTagName('link')
author = entry.getElementsByTagName('author')[0]
author_name = author.getElementsByTagName('name')[0].firstChild.data
if (author.getElementsByTagName('email')[0].firstChild is not None):
author_email = author.getElementsByTagName('email')[0].firstChild.data
else:
author_email = ''
author_gd = author.getElementsByTagName('gd:image')
term = category.attributes["term"]
if (term.value == 'http://schemas.google.com/blogger/2008/kind#post'):
titles.append(title)
ids.append(id)
publisheds.append(published)
updateds.append(updated)
categories.append(category)
contents.append(category)
links.append(link)
authors.append(author_name)
authors_emails.append(author_email)
for data in titles:
with open('data.csv', 'a') as csvfile:
fieldnames = ['id','published', 'updated', 'title', 'content', 'links', 'author', 'author_email']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
while (i == 0):
writer.writeheader()
i = i + 1
writer.writerow({'title': data})
for data in id:
with open('data.csv', 'a') as csvfile:
fieldnames = ['id','published', 'updated', 'title', 'content', 'links', 'author', 'author_email']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
while (i == 0):
writer.writeheader()
i = i + 1
writer.writerow({'id': id})
for data in publisheds:
with open('data.csv', 'a') as csvfile:
fieldnames = ['id','published', 'updated', 'title', 'content', 'links', 'author', 'author_email']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
while (i == 0):
writer.writeheader()
i = i + 1
writer.writerow(
{'published': published,})
А мой XML выглядит так:
<feed>
<entry>
<id>tag:blogger.com,1999:blog-3819124901608450649.post-3344567350301029496</id>
<published>2018-08-28T20:31:00.001-07:00</published>
<updated>2018-08-28T20:31:06.083-07:00</updated>
<category scheme="http://schemas.google.com/g/2005#kind" term="http://schemas.google.com/blogger/2008/kind#post" />
<title type="text">Post 03</title>
<content type="html">Foo bar</content>
<link rel="replies" type="application/atom+xml" href="https://marcosademir.blogspot.com/feeds/3344567350301029496/comments/default" title="Postar comentários" />
<link rel="replies" type="text/html" href="http://marcosademir.blogspot.com/2018/08/post-03.html#comment-form" title="0 Comentários" />
<link rel="edit" type="application/atom+xml" href="https://www.blogger.com/feeds/3819124901608450649/posts/default/3344567350301029496" />
<link rel="self" type="application/atom+xml" href="https://www.blogger.com/feeds/3819124901608450649/posts/default/3344567350301029496" />
<link rel="alternate" type="text/html" href="http://marcosademir.blogspot.com/2018/08/post-03.html" title="Post 03" />
<author>
<name>Lucas Maraal</name>
<uri>https://www.blogger.com/profile/17261797217287987677</uri>
<email>noreply@blogger.com</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="32" height="32" src="//4.bp.blogspot.com/_pSv61Q08Q6Y/S_IujH-j3aI/AAAAAAAAAAM/M-DlhT-CaoA/S220-s32/OAAAABcf5r93dqAGgOuYvkuDQTljXHKx0_qR6EqgupGM6Ym2hS3vvA10Kp5Mcva6IMm7XV1K4Ac5s0_3IGX4vGUEkTwAm1T1UCi0ddtOBY2GdNPNBcTka0l1Cl-T.jpg" />
</author>
<thr:total>0</thr:total>
</entry>
</feed>