Я хочу конвертировать файл mbox в формат MSG.Для этого я сделал, но я не получаю правильный формат.Я могу прочитать файл Mbox, но я не понимаю, как создать файл MSG с этим.Я конвертировал файл mbox в файл eml, но так же, как я хочу создать файл msg, но я не понимаю, как это сделать.
Ниже приведен код для преобразования mbox в eml.
import os
import mailbox
from email import generator
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
count = 0
def emlGenerator(body, thisemail):
global count
msg = MIMEMultipart('alternative')
msg['Subject'] = thisemail['subject']
msg['From'] = thisemail['From']
msg['To'] = thisemail['To']
msg['Cc'] = thisemail['Cc']
msg['Bcc'] = thisemail['Bcc']
msg['Date'] = thisemail['Date']
name = str(count) + '.eml'
count += 1
part = MIMEText(body)
msg.attach(part)
outfile_name = os.path.join('xxxxx/test2', name)
with open(outfile_name, 'w') as outfile:
gen = generator.Generator(outfile)
gen.flatten(msg)
def getcharsets(msg):
charsets = set({})
for c in msg.get_charsets():
if c is not None:
charsets.update([c])
return charsets
def handleerror(errmsg, emailmsg, cs):
print()
print(errmsg)
print("This error occurred while decoding with ", cs, " charset.")
print("These charsets were found in the one email.", getcharsets(emailmsg))
print("This is the subject:", emailmsg['subject'])
print("This is the sender:", emailmsg['From'])
def getbodyfromemail(msg):
body = None
# Walk through the parts of the email to find the text body.
if msg.is_multipart():
for part in msg.walk():
# If part is multipart, walk through the subparts.
if part.is_multipart():
for subpart in part.walk():
if subpart.get_content_type() == 'text/plain':
# Get the subpart payload (i.e the message body)
body = subpart.get_payload(decode=True)
# charset = subpart.get_charset()
# Part isn't multipart so get the email body
elif part.get_content_type() == 'text/plain':
body = part.get_payload(decode=True)
# charset = part.get_charset()
# If this isn't a multi-part message then get the payload (i.e the message body)
elif msg.get_content_type() == 'text/plain':
body = msg.get_payload(decode=True)
# No checking done to match the charset with the correct part.
charsets = set({})
for c in msg.get_charsets():
if c is not None:
charsets.update([c])
for charset in charsets:
try:
body = body.decode(charset)
except:
print("Hit a UnicodeDecodeError or AttributeError. Moving right along.")
return body
if __name__ == "__main__":
for thisemail in mailbox.mbox('xxxxxx/topics.mbox'):
print (thisemail['Message-id'])
body = getbodyfromemail(thisemail)
emlGenerator(body, thisemail)
print("=========== DONE ============")
print("Total ", count, " File")