Source code for textract.parsers.tesseract

from ..shell import run


[docs]def extract(filename, **kwargs): """Extract text from various image file formats using tesseract-ocr""" # Tesseract can't output to console directly so you must first create # a dummy file to write to, read, and then delete stdout, stderr = run( 'tesseract %(filename)s tmpout && cat tmpout.txt && rm -f tmpout.txt' % locals() ) return stdout