diff --git a/HISTORY.rst b/HISTORY.rst index abfaa3fc24..de08bd2d91 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -83,6 +83,8 @@ and converteed before serializing them. - Add the 'ocr_content' attribute to documents to allow access to a document's OCR content for indexing and other purposes. +- Add the 'content' attribute to documents to allow access + to a document's parsed content for indexing and other purposes. 3.1.9 (2018-11-01) ================== diff --git a/mayan/apps/document_parsing/apps.py b/mayan/apps/document_parsing/apps.py index c410ba2237..51a2ab265d 100644 --- a/mayan/apps/document_parsing/apps.py +++ b/mayan/apps/document_parsing/apps.py @@ -15,7 +15,7 @@ from common import ( MayanAppConfig, menu_facet, menu_multi_item, menu_object, menu_secondary, menu_tools ) -from common.classes import ModelField +from common.classes import ModelAttribute, ModelField from common.settings import settings_db_sync_task_delay from documents.search import document_search, document_page_search from documents.signals import post_version_upload @@ -40,7 +40,7 @@ from .permissions import ( permission_parse_document ) from .signals import post_document_version_parsing -from .utils import get_document_content +from .utils import document_property_content, get_document_content logger = logging.getLogger(__name__) @@ -95,7 +95,7 @@ class DocumentParsingApp(MayanAppConfig): Document.add_to_class('submit_for_parsing', document_parsing_submit) Document.add_to_class( - 'content', get_document_content + 'content', document_property_content ) DocumentVersion.add_to_class( 'content', get_document_content @@ -104,6 +104,12 @@ class DocumentParsingApp(MayanAppConfig): 'submit_for_parsing', document_version_parsing_submit ) + ModelAttribute( + model=Document, name='content', description=_( + 'The parsed content of the document.' + ) + ) + ModelField( Document, name='versions__pages__content__content' ) diff --git a/mayan/apps/document_parsing/tests/literals.py b/mayan/apps/document_parsing/tests/literals.py index a4cf5f6802..280e1eaa15 100644 --- a/mayan/apps/document_parsing/tests/literals.py +++ b/mayan/apps/document_parsing/tests/literals.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -TEST_PARSING_INDEX_NODE_TEMPLATE = '{% if "sample" in document.latest_version.content|join:" "|lower %}sample{% endif %}' +TEST_PARSING_INDEX_NODE_TEMPLATE = '{% if "sample" in document.content.lower() %}sample{% endif %}' diff --git a/mayan/apps/document_parsing/utils.py b/mayan/apps/document_parsing/utils.py index ab8e049450..ce3048dea5 100644 --- a/mayan/apps/document_parsing/utils.py +++ b/mayan/apps/document_parsing/utils.py @@ -14,6 +14,11 @@ def get_document_content(document): try: page_content = page.content.content except DocumentPageContent.DoesNotExist: - pass + yield '' else: yield conditional_escape(force_text(page_content)) + + +@property +def document_property_content(self): + return ' '.join(get_document_content(self))