在Python中从电子邮件附件中获取文件名

问题描述 投票:0回答:7

我正在使用我的凭据登录服务器,并搜索具有特定主题的电子邮件。 这封电子邮件有一个附件,我想稍后知道它的文件名和可能的扩展名。

我在 Python 中执行此操作,但每次询问文件名时,它都会返回 NONE,而实际上附件中有文件名。

from imaplib import *
import base64
import email
import os
import sys
import errno
import mimetypes




server = IMAP4("SERVER LOCATION");

server.login("USER", "PASS");
server.select("Inbox");

typ, data = server.search(None, '(SUBJECT "Hello World")');

for num in data[0].split():
    typ, data = server.fetch(num, '(RFC822)');
    print (data);
    msg = email.message_from_string(str(data[0][1]));

      counter = 1
for part in msg.walk():
    print (part.as_string() + "\n")
    # multipart/* are just containers
    if part.get_content_maintype() == 'multipart':
        continue
    # Applications should really sanitize the given filename so that an
    # email message can't be used to overwrite important files
    filename = part.get_filename()


    print (filename);

    fn = msg.get_filename()

    print("The Filename was:", (fn));


    if not filename:
        ext = mimetypes.guess_extension(part.get_content_type())


                        if not ext:
            # Use a generic bag-of-bits extension
            ext = '.bin'
            filename = 'part-%03d%s' % (counter, ext)
    counter += 1


server.close()


server.logout();

我不知道为什么我一直得不到答案,有什么帮助吗?

python email attachment
7个回答
6
投票

我遇到了同样的问题,这就是我解决它的方法:

if msg.get_content_maintype() == 'multipart': #multipart messages only
    # loop on the parts of the mail
    for part in msg.walk():
        #find the attachment part - so skip all the other parts
        if part.get_content_maintype() == 'multipart': continue
        if part.get_content_maintype() == 'text': continue
        if part.get('Content-Disposition') == 'inline': continue
        if part.get('Content-Disposition') is None: continue

        #save the attachment in the program directory
        print "part:", part.as_string()
        filename = part.get_filename()
        print "filename :", filename
        filepath = DIR_SBD+filename
        fp = open(filepath, 'wb')
        fp.write(part.get_payload(decode=True))
        fp.close()
        print '%s saved!' % filepath

2
投票

您需要先检查

for part in msg.walk():

    print (part.get_content_type())

然后在主 for 循环中 -

for part in msg.walk():

只需继续选择邮件正文中存在但您不需要的内容类型。

您也可以直接检查所需的 content_type,然后读取文件名。

ex -我遇到了同样的问题,我的内容类型是 multiparttext/htmlapplication/json

我没有检查text/html,而是想阅读“application/json”中的附件。我直接读取文件名,因此出现错误 - 文件名没有出现。

当我付支票时- `

if part.get_content_maintype() == 'text/html':
    continue

if part.get('Content-Type')== 'application/json':

    filename = part.get_filename().split('.')

#do the stuff needed 

` 不会出现错误。

希望对你有帮助


1
投票

如果你将所有内容都转储到“部分”中,你真的能看到那里的文件吗?


0
投票

我也面临类似的问题。只需删除 如果part.get_content_maintype() == 'multipart': 继续 条件良好,它会正常工作。


0
投票

用上面的答案解决了这个问题...加上我刚刚在循环中打印了结果:

for part in msg.walk():
    print('content type: %s' % part.get_content_type())
    print('filename: %s' % part.get_filename())

然后识别文件名是哪个内容部分:

content type: multipart/mixed
filename: None
content type: multipart/alternative
filename: None
content type: text/plain
filename: None
content type: text/html
filename: None
content type: application/octet-stream
filename: XXXXX__202307141059.csv

现在代码:

for part in msg.walk():
    if part.get_content_type() == 'application/octet-stream':
        file_name = part.get_filename()

0
投票
    email_part_types = [
"text/plain", "text/html", "multipart/mixed", "multipart/alternative",
"multipart/related", "multipart/digest", "multipart/report",
"application/octet-stream", "application/pdf", "application/msword",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.ms-excel",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/zip", "image/jpeg", "image/png", "image/gif",
"audio/mpeg", "video/mp4", "message/rfc822"
]

for part_type in email_part_types:
    content_type = part_type
    maintype, subtype = part_type.split('/')
    content_disposition = "attachment" if maintype != "multipart" else None
    filename = "example.file" if maintype != "multipart" else None

    print(f"Content-Type: {content_type}")
    print(f"Maintype: {maintype}")
    print(f"Subtype: {subtype}")
    print(f"Content-Disposition: {content_disposition}")
    print(f"Filename: {filename}")
    print("-" * 40)

0
投票
for part in msg.walk():
    content_type = part.get_content_type()
    maintype = part.get_content_maintype()
    subtype = part.get_content_subtype()
    content_disposition = part.get('Content-Disposition')
    filename = part.get_filename()

    print(f"Content-Type: {content_type}")
    print(f"Maintype: {maintype}")
    print(f"Subtype: {subtype}")
    print(f"Content-Disposition: {content_disposition}")
    print(f"Filename: {filename}")

    # Example filtering based on type
    if maintype == 'multipart':
        if subtype == 'mixed':
            print("This is a multipart/mixed message.")
        elif subtype == 'alternative':
            print("This is a multipart/alternative message.")
        elif subtype == 'related':
            print("This is a multipart/related message.")
        elif subtype == 'digest':
            print("This is a multipart/digest message.")
        elif subtype == 'report':
            print("This is a multipart/report message.")
        continue

    if maintype == 'text':
        if subtype == 'plain':
            print("This is a text/plain part.")
        elif subtype == 'html':
            print("This is a text/html part.")
        continue

    if maintype == 'application':
        if subtype == 'octet-stream':
            print("This is an application/octet-stream part.")
        elif subtype == 'pdf':
            print("This is an application/pdf part.")
        elif subtype == 'msword':
            print("This is an application/msword part.")
        elif subtype == 'vnd.openxmlformats-officedocument.wordprocessingml.document':
            print("This is an application/vnd.openxmlformats-officedocument.wordprocessingml.document part (for .docx).")
        elif subtype == 'vnd.ms-excel':
            print("This is an application/vnd.ms-excel part.")
        elif subtype == 'vnd.openxmlformats-officedocument.spreadsheetml.sheet':
            print("This is an application/vnd.openxmlformats-officedocument.spreadsheetml.sheet part (for .xlsx).")
        elif subtype == 'zip':
            print("This is an application/zip part.")
        continue

    if maintype == 'image':
        if subtype == 'jpeg':
            print("This is an image/jpeg part.")
        elif subtype == 'png':
            print("This is an image/png part.")
        elif subtype == 'gif':
            print("This is an image/gif part.")
        continue

    if maintype == 'audio':
        if subtype == 'mpeg':
            print("This is an audio/mpeg part.")
        continue

    if maintype == 'video':
        if subtype == 'mp4':
            print("This is a video/mp4 part.")
        continue

    if maintype == 'message':
        if subtype == 'rfc822':
            print("This is a message/rfc822 part (for attached emails).")
        continue

    # Handling parts with no specific content type or unrecognized types
    if maintype not in ['multipart', 'text', 'application', 'image', 'audio', 'video', 'message']:
        print("This is an unrecognized or custom part.")
© www.soinside.com 2019 - 2024. All rights reserved.