import zipfile
import xml.dom.minidom
import StringIO
from .utils import BaseParser
[docs]class Parser(BaseParser):
"""Extract text from open document files.
"""
[docs] def to_string(self):
""" Converts the document to a string. """
buff = u""
for val in ["text:p", "text:h", "text:list"]:
for paragraph in self.content.getElementsByTagName(val):
buff += self.text_to_string(paragraph) + "\n"
return buff
[docs] def text_to_string(self, element):
buff = u""
for node in element.childNodes:
if node.nodeType == xml.dom.Node.TEXT_NODE:
buff += node.nodeValue
elif node.nodeType == xml.dom.Node.ELEMENT_NODE:
buff += self.text_to_string(node)
return buff