Source code for textract.parsers.docx_parser

import docx


[docs]def extract(filename, **kwargs): """Extract text from docx file using python-docx. """ document = docx.Document(filename) return '\n\n'.join([ paragraph.text.encode('utf-8') for paragraph in document.paragraphs ])