Как преобразовать файл mbox в формат .msg, используя python? - PullRequest
0 голосов
/ 06 декабря 2018

Я хочу конвертировать файл mbox в формат MSG.Для этого я сделал, но я не получаю правильный формат.Я могу прочитать файл Mbox, но я не понимаю, как создать файл MSG с этим.Я конвертировал файл mbox в файл eml, но так же, как я хочу создать файл msg, но я не понимаю, как это сделать.

Ниже приведен код для преобразования mbox в eml.

 import os
import mailbox
from email import generator
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

count = 0


def emlGenerator(body, thisemail):
    global count
    msg = MIMEMultipart('alternative')
    msg['Subject'] = thisemail['subject']
    msg['From'] = thisemail['From']
    msg['To'] = thisemail['To']
    msg['Cc'] = thisemail['Cc']
    msg['Bcc'] = thisemail['Bcc']
    msg['Date'] = thisemail['Date']
    name = str(count) + '.eml'
    count += 1
    part = MIMEText(body)
    msg.attach(part)
    outfile_name = os.path.join('xxxxx/test2', name)
    with open(outfile_name, 'w') as outfile:
        gen = generator.Generator(outfile)
        gen.flatten(msg)


def getcharsets(msg):
    charsets = set({})
    for c in msg.get_charsets():
        if c is not None:
            charsets.update([c])
    return charsets


def handleerror(errmsg, emailmsg, cs):
    print()
    print(errmsg)
    print("This error occurred while decoding with ", cs, " charset.")
    print("These charsets were found in the one email.", getcharsets(emailmsg))
    print("This is the subject:", emailmsg['subject'])
    print("This is the sender:", emailmsg['From'])


def getbodyfromemail(msg):
    body = None
    # Walk through the parts of the email to find the text body.
    if msg.is_multipart():
        for part in msg.walk():
            # If part is multipart, walk through the subparts.
            if part.is_multipart():
                for subpart in part.walk():
                    if subpart.get_content_type() == 'text/plain':
                        # Get the subpart payload (i.e the message body)
                        body = subpart.get_payload(decode=True)
                        # charset = subpart.get_charset()
            # Part isn't multipart so get the email body
            elif part.get_content_type() == 'text/plain':
                body = part.get_payload(decode=True)
                # charset = part.get_charset()
    # If this isn't a multi-part message then get the payload (i.e the message body)
    elif msg.get_content_type() == 'text/plain':
        body = msg.get_payload(decode=True)
        # No checking done to match the charset with the correct part.
    charsets = set({})
    for c in msg.get_charsets():
        if c is not None:
            charsets.update([c])
    for charset in charsets:
        try:
            body = body.decode(charset)
        except:
            print("Hit a UnicodeDecodeError or AttributeError. Moving right along.")
    return body


if __name__ == "__main__":
    for thisemail in mailbox.mbox('xxxxxx/topics.mbox'):
        print (thisemail['Message-id'])
        body = getbodyfromemail(thisemail)
        emlGenerator(body, thisemail)
    print("=========== DONE ============")
    print("Total ", count, " File")
...