Source code for textract.parsers.tesseract
from ..shell import run
[docs]def extract(filename, **kwargs):
"""Extract text from various image file formats using tesseract-ocr"""
# Tesseract can't output to console directly so you must first create
# a dummy file to write to, read, and then delete
stdout, stderr = run(
'tesseract %(filename)s tmpout && cat tmpout.txt && rm -f tmpout.txt'
% locals()
)
return stdout