Source code for textract.parsers.eml_parser
from email.parser import Parser
[docs]def extract(filename, **kwargs):
"""Extract text from email messages in .eml format. This gets the
subject and all text from the contents.
"""
# TODO: could make option here to omit all non-original content
# (forwarded content, quoted content in reply, signature, etc),
# perhaps using https://github.com/zapier/email-reply-parser
# TODO: could also potentially grab text/html content instead of
# only grabbing text/plain content
with open(filename) as stream:
parser = Parser()
message = parser.parse(stream)
text_content = []
for part in message.walk():
if part.get_content_type().startswith('text/plain'):
text_content.append(part.get_payload())
return '\n\n'.join(text_content)