Source code for textract.parsers.pptx_parser

import pptx

from .utils import BaseParser


[docs]class Parser(BaseParser):
    """Extract text from pptx file using python-pptx
    """

[docs]    def extract(self, filename, **kwargs):
        presentation = pptx.Presentation(filename)
        text_runs = []
        for slide in presentation.slides:
            for shape in slide.shapes:
                if not shape.has_textframe:
                    continue
                for paragraph in shape.textframe.paragraphs:
                    for run in paragraph.runs:
                        text_runs.append(run.text)
        return '\n\n'.join(text_runs)