Parsing: Add the 'content' attribute
Add the 'content' attribute to documents to allow access to a document's parsed content for indexing and other purposes. Fixes the document parsing indexing failing test. Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -83,6 +83,8 @@
|
||||
and converteed before serializing them.
|
||||
- Add the 'ocr_content' attribute to documents to allow access
|
||||
to a document's OCR content for indexing and other purposes.
|
||||
- Add the 'content' attribute to documents to allow access
|
||||
to a document's parsed content for indexing and other purposes.
|
||||
|
||||
3.1.9 (2018-11-01)
|
||||
==================
|
||||
|
||||
@@ -15,7 +15,7 @@ from common import (
|
||||
MayanAppConfig, menu_facet, menu_multi_item, menu_object, menu_secondary,
|
||||
menu_tools
|
||||
)
|
||||
from common.classes import ModelField
|
||||
from common.classes import ModelAttribute, ModelField
|
||||
from common.settings import settings_db_sync_task_delay
|
||||
from documents.search import document_search, document_page_search
|
||||
from documents.signals import post_version_upload
|
||||
@@ -40,7 +40,7 @@ from .permissions import (
|
||||
permission_parse_document
|
||||
)
|
||||
from .signals import post_document_version_parsing
|
||||
from .utils import get_document_content
|
||||
from .utils import document_property_content, get_document_content
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -95,7 +95,7 @@ class DocumentParsingApp(MayanAppConfig):
|
||||
|
||||
Document.add_to_class('submit_for_parsing', document_parsing_submit)
|
||||
Document.add_to_class(
|
||||
'content', get_document_content
|
||||
'content', document_property_content
|
||||
)
|
||||
DocumentVersion.add_to_class(
|
||||
'content', get_document_content
|
||||
@@ -104,6 +104,12 @@ class DocumentParsingApp(MayanAppConfig):
|
||||
'submit_for_parsing', document_version_parsing_submit
|
||||
)
|
||||
|
||||
ModelAttribute(
|
||||
model=Document, name='content', description=_(
|
||||
'The parsed content of the document.'
|
||||
)
|
||||
)
|
||||
|
||||
ModelField(
|
||||
Document, name='versions__pages__content__content'
|
||||
)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
TEST_PARSING_INDEX_NODE_TEMPLATE = '{% if "sample" in document.latest_version.content|join:" "|lower %}sample{% endif %}'
|
||||
TEST_PARSING_INDEX_NODE_TEMPLATE = '{% if "sample" in document.content.lower() %}sample{% endif %}'
|
||||
|
||||
@@ -14,6 +14,11 @@ def get_document_content(document):
|
||||
try:
|
||||
page_content = page.content.content
|
||||
except DocumentPageContent.DoesNotExist:
|
||||
pass
|
||||
yield ''
|
||||
else:
|
||||
yield conditional_escape(force_text(page_content))
|
||||
|
||||
|
||||
@property
|
||||
def document_property_content(self):
|
||||
return ' '.join(get_document_content(self))
|
||||
|
||||
Reference in New Issue
Block a user