diff --git a/HISTORY.rst b/HISTORY.rst index 617874aed7..5a8746d9fc 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -4,6 +4,8 @@ * Change the required permission for the checkout info link from document check in to document checkout details view. * Lower the log severity when links don't resolve. +* Add DOCUMENTS_HASH_BLOCK_SIZE to control the size of the file + block when calculating a document's checksum. 3.1.10 (2019-04-04) =================== diff --git a/docs/releases/3.1.11.rst b/docs/releases/3.1.11.rst index a92cf49531..2efe86bb2d 100644 --- a/docs/releases/3.1.11.rst +++ b/docs/releases/3.1.11.rst @@ -7,12 +7,43 @@ Released: April XX, 2019 Changes ------- +Memory usage +^^^^^^^^^^^^ + +The ``DOCUMENTS_HASH_BLOCK_SIZE`` setting was added to limit the number of +bytes that will be read into memory when calculating the checksum of a new +document. For compatibility with the current bevahor this setting defaults to +0 which means that it is disabled. Disabling the setting will cause the +entire document's file to be loaded into memory. If documents are not +processing due to out of memory errors (large documents or devices with +limited memory), set ``DOCUMENTS_HASH_BLOCK_SIZE`` to a value other than 0. +Limited tests suggest 65535 to be a good alternative. + + +Tag wizard step +^^^^^^^^^^^^^^^ + +The tag wizard step was fixed and will now allow attaching multple tags to a +new document. + + +Permissions +^^^^^^^^^^^ + +Previously the document checkout information link required one of the following +permissions: document check in, document check in override, or document +checkout. Meanwhile the document checkout information view would require the +document checkout detail view permission. This difference in permissions +has been eliminated and the link will now required the document checkout +detail view permission, same as the view. Update your user role permissions +accordingly. + + Other changes ^^^^^^^^^^^^^ -* Fix multiple tag selection wizard step. -* Update the check out info link permission. Update the link permission to - match the same required permission as the view. +* Lower the log severity when links don't resolve. + Removals -------- @@ -112,6 +143,6 @@ Backward incompatible changes Bugs fixed or issues closed --------------------------- -* :gitlab-issue:`563` Recursive Watch Folder +* None .. _PyPI: https://pypi.python.org/pypi/mayan-edms/ diff --git a/mayan/apps/documents/models.py b/mayan/apps/documents/models.py index f5b973e01d..441622d941 100644 --- a/mayan/apps/documents/models.py +++ b/mayan/apps/documents/models.py @@ -45,7 +45,8 @@ from .permissions import permission_document_view from .settings import ( setting_disable_base_image_cache, setting_disable_transformed_image_cache, setting_display_width, setting_display_height, setting_fix_orientation, - setting_language, setting_zoom_max_level, setting_zoom_min_level + setting_hash_block_size, setting_language, setting_zoom_max_level, + setting_zoom_min_level ) from .signals import ( post_document_created, post_document_type_change, post_version_upload @@ -56,8 +57,8 @@ logger = logging.getLogger(__name__) # document image cache name hash function -def HASH_FUNCTION(data): - return hashlib.sha256(data).hexdigest() +def hash_function(): + return hashlib.sha256() def UUID_FUNCTION(*args, **kwargs): @@ -697,10 +698,25 @@ class DocumentVersion(models.Model): Open a document version's file and update the checksum field using the user provided checksum function """ + block_size = setting_hash_block_size.value + if block_size == 0: + # If the setting value is 0 that means disable read limit. To disable + # the read limit passing None won't work, we pass -1 instead as per + # the Python documentation. + # https://docs.python.org/2/tutorial/inputoutput.html#methods-of-file-objects + block_size = -1 + if self.exists(): - source = self.open() - self.checksum = force_text(HASH_FUNCTION(source.read())) - source.close() + hash_object = hash_function() + with self.open() as file_object: + while (True): + data = file_object.read(block_size) + if not data: + break + + hash_object.update(data) + + self.checksum = force_text(hash_object.hexdigest()) if save: self.save() diff --git a/mayan/apps/documents/settings.py b/mayan/apps/documents/settings.py index 794c433a5e..25001f96b5 100644 --- a/mayan/apps/documents/settings.py +++ b/mayan/apps/documents/settings.py @@ -62,6 +62,14 @@ setting_fix_orientation = namespace.add_setting( 'feature and it is disabled by default.' ) ) +setting_hash_block_size = namespace.add_setting( + global_name='DOCUMENTS_HASH_BLOCK_SIZE', default=0, + help_text=_( + 'Size of blocks to use when calculating the document file\'s ' + 'checksum. A value of 0 disables the block calculation and the entire ' + 'file will be loaded into memory.' + ) +) setting_language = namespace.add_setting( global_name='DOCUMENTS_LANGUAGE', default=DEFAULT_LANGUAGE, help_text=_('Default documents language (in ISO639-3 format).')