Parsing: Add the 'content' attribute
Add the 'content' attribute to documents to allow access to a document's parsed content for indexing and other purposes. Fixes the document parsing indexing failing test. Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
@@ -83,6 +83,8 @@
|
|||||||
and converteed before serializing them.
|
and converteed before serializing them.
|
||||||
- Add the 'ocr_content' attribute to documents to allow access
|
- Add the 'ocr_content' attribute to documents to allow access
|
||||||
to a document's OCR content for indexing and other purposes.
|
to a document's OCR content for indexing and other purposes.
|
||||||
|
- Add the 'content' attribute to documents to allow access
|
||||||
|
to a document's parsed content for indexing and other purposes.
|
||||||
|
|
||||||
3.1.9 (2018-11-01)
|
3.1.9 (2018-11-01)
|
||||||
==================
|
==================
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ from common import (
|
|||||||
MayanAppConfig, menu_facet, menu_multi_item, menu_object, menu_secondary,
|
MayanAppConfig, menu_facet, menu_multi_item, menu_object, menu_secondary,
|
||||||
menu_tools
|
menu_tools
|
||||||
)
|
)
|
||||||
from common.classes import ModelField
|
from common.classes import ModelAttribute, ModelField
|
||||||
from common.settings import settings_db_sync_task_delay
|
from common.settings import settings_db_sync_task_delay
|
||||||
from documents.search import document_search, document_page_search
|
from documents.search import document_search, document_page_search
|
||||||
from documents.signals import post_version_upload
|
from documents.signals import post_version_upload
|
||||||
@@ -40,7 +40,7 @@ from .permissions import (
|
|||||||
permission_parse_document
|
permission_parse_document
|
||||||
)
|
)
|
||||||
from .signals import post_document_version_parsing
|
from .signals import post_document_version_parsing
|
||||||
from .utils import get_document_content
|
from .utils import document_property_content, get_document_content
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -95,7 +95,7 @@ class DocumentParsingApp(MayanAppConfig):
|
|||||||
|
|
||||||
Document.add_to_class('submit_for_parsing', document_parsing_submit)
|
Document.add_to_class('submit_for_parsing', document_parsing_submit)
|
||||||
Document.add_to_class(
|
Document.add_to_class(
|
||||||
'content', get_document_content
|
'content', document_property_content
|
||||||
)
|
)
|
||||||
DocumentVersion.add_to_class(
|
DocumentVersion.add_to_class(
|
||||||
'content', get_document_content
|
'content', get_document_content
|
||||||
@@ -104,6 +104,12 @@ class DocumentParsingApp(MayanAppConfig):
|
|||||||
'submit_for_parsing', document_version_parsing_submit
|
'submit_for_parsing', document_version_parsing_submit
|
||||||
)
|
)
|
||||||
|
|
||||||
|
ModelAttribute(
|
||||||
|
model=Document, name='content', description=_(
|
||||||
|
'The parsed content of the document.'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
ModelField(
|
ModelField(
|
||||||
Document, name='versions__pages__content__content'
|
Document, name='versions__pages__content__content'
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
TEST_PARSING_INDEX_NODE_TEMPLATE = '{% if "sample" in document.latest_version.content|join:" "|lower %}sample{% endif %}'
|
TEST_PARSING_INDEX_NODE_TEMPLATE = '{% if "sample" in document.content.lower() %}sample{% endif %}'
|
||||||
|
|||||||
@@ -14,6 +14,11 @@ def get_document_content(document):
|
|||||||
try:
|
try:
|
||||||
page_content = page.content.content
|
page_content = page.content.content
|
||||||
except DocumentPageContent.DoesNotExist:
|
except DocumentPageContent.DoesNotExist:
|
||||||
pass
|
yield ''
|
||||||
else:
|
else:
|
||||||
yield conditional_escape(force_text(page_content))
|
yield conditional_escape(force_text(page_content))
|
||||||
|
|
||||||
|
|
||||||
|
@property
|
||||||
|
def document_property_content(self):
|
||||||
|
return ' '.join(get_document_content(self))
|
||||||
|
|||||||
Reference in New Issue
Block a user