Watch folders: Add support for subfolders

Add support for subfolder scanning to watch folders. Closes
GitLab issue #498 and #563.

This commit adds a new field to watch folders called
"include_subdirectories".

The directory walk was also updated to use pathlib2.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2019-04-05 01:03:34 -04:00
parent 9b21275b7e
commit d35323aa65
7 changed files with 214 additions and 52 deletions

View File

@@ -1,6 +1,8 @@
3.2 (2019-04-XX) 3.2 (2019-04-XX)
================ ================
* Split sources models into separate modules * Split sources models into separate modules
* Add support for subfolder scanning to watchfolders. Closes
GitLab issue #498 and #563.
3.1.10 (2019-04-04) 3.1.10 (2019-04-04)
=================== ===================

87
docs/releases/3.2.rst Normal file
View File

@@ -0,0 +1,87 @@
Version 3.2
===========
Released: XX XX, 2019
Changes
-------
Other changes
^^^^^^^^^^^^^
* Split source models into different modules.
Removals
--------
* None
Upgrading from a previous version
---------------------------------
If installed via Python's PIP
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Remove deprecated requirements::
$ curl https://gitlab.com/mayan-edms/mayan-edms/raw/master/removals.txt | pip uninstall -r /dev/stdin
Type in the console::
$ pip install mayan-edms==3.2
the requirements will also be updated automatically.
Using Git
^^^^^^^^^
If you installed Mayan EDMS by cloning the Git repository issue the commands::
$ git reset --hard HEAD
$ git pull
otherwise download the compressed archived and uncompress it overriding the
existing installation.
Remove deprecated requirements::
$ pip uninstall -y -r removals.txt
Next upgrade/add the new requirements::
$ pip install --upgrade -r requirements.txt
Common steps
^^^^^^^^^^^^
Perform these steps after updating the code from either step above.
Migrate existing database schema with::
$ mayan-edms.py performupgrade
Add new static media::
$ mayan-edms.py collectstatic --noinput
The upgrade procedure is now complete.
Backward incompatible changes
-----------------------------
* None
Bugs fixed or issues closed
---------------------------
* :gitlab-issue:`498` Can't scan subdirectories
* :gitlab-issue:`563` Recursive Watch Folder
.. _PyPI: https://pypi.python.org/pypi/mayan-edms/

View File

@@ -137,6 +137,6 @@ class WatchFolderSetupForm(forms.ModelForm):
class Meta: class Meta:
fields = ( fields = (
'label', 'enabled', 'interval', 'document_type', 'uncompress', 'label', 'enabled', 'interval', 'document_type', 'uncompress',
'folder_path' 'folder_path', 'include_subdirectories'
) )
model = WatchFolderSource model = WatchFolderSource

View File

@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.16 on 2018-11-28 07:52
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('sources', '0019_auto_20180803_0440'),
]
operations = [
migrations.AddField(
model_name='watchfoldersource',
name='include_subdirectories',
field=models.BooleanField(default=False, help_text='If checked, not only will the folder path be scanned for files but also its subdirectories.', verbose_name='Include subdirectories?'),
preserve_default=False,
),
migrations.AlterField(
model_name='watchfoldersource',
name='folder_path',
field=models.CharField(help_text='Server side filesystem path to scan for files.', max_length=255, verbose_name='Folder path'),
),
]

View File

@@ -1,11 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import logging import logging
import os
from django.core.files import File from pathlib2 import Path
from django.db import models from django.db import models
from django.utils.encoding import force_text
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from ..literals import SOURCE_CHOICE_WATCH, SOURCE_UNCOMPRESS_CHOICE_Y from ..literals import SOURCE_CHOICE_WATCH, SOURCE_UNCOMPRESS_CHOICE_Y
@@ -30,8 +29,15 @@ class WatchFolderSource(IntervalBaseModel):
source_type = SOURCE_CHOICE_WATCH source_type = SOURCE_CHOICE_WATCH
folder_path = models.CharField( folder_path = models.CharField(
help_text=_('Server side filesystem path.'), max_length=255, help_text=_('Server side filesystem path to scan for files.'),
verbose_name=_('Folder path') max_length=255, verbose_name=_('Folder path')
)
include_subdirectories = models.BooleanField(
help_text=_(
'If checked, not only will the folder path be scanned for files '
'but also its subdirectories.'
),
verbose_name=_('Include subdirectories?')
) )
objects = models.Manager() objects = models.Manager()
@@ -41,15 +47,19 @@ class WatchFolderSource(IntervalBaseModel):
verbose_name_plural = _('Watch folders') verbose_name_plural = _('Watch folders')
def check_source(self): def check_source(self):
# Force self.folder_path to unicode to avoid os.listdir returning path = Path(self.folder_path)
# str for non-latin filenames, gh-issue #163
for file_name in os.listdir(force_text(self.folder_path)): if self.include_subdirectories:
full_path = os.path.join(self.folder_path, file_name) iterator = path.rglob('*')
if os.path.isfile(full_path): else:
with File(file=open(full_path, mode='rb')) as file_object: iterator = path.glob('*')
for entry in iterator:
if entry.is_file() or entry.is_symlink():
with entry.open(mode='rb') as file_object:
self.handle_upload( self.handle_upload(
file_object=file_object, file_object=file_object,
expand=(self.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y), expand=(self.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y),
label=file_name label=entry.name
) )
os.unlink(full_path) entry.unlink()

View File

@@ -137,3 +137,4 @@ Content-MD5: 1B2M2Y8AsgTpgAmY7PhCfg==
TEST_SOURCE_LABEL = 'test source' TEST_SOURCE_LABEL = 'test source'
TEST_SOURCE_UNCOMPRESS_N = 'n' TEST_SOURCE_UNCOMPRESS_N = 'n'
TEST_STAGING_PREVIEW_WIDTH = 640 TEST_STAGING_PREVIEW_WIDTH = 640
TEST_WATCHFOLDER_SUBFOLDER = 'test_subfolder'

View File

@@ -3,29 +3,31 @@ from __future__ import unicode_literals
import shutil import shutil
import mock import mock
from pathlib2 import Path
from django.test import override_settings from django.test import override_settings
from django.utils.encoding import force_text
from common.utils import mkdtemp from common.utils import mkdtemp
from common.tests import BaseTestCase from common.tests import BaseTestCase
from documents.models import Document, DocumentType from documents.models import Document, DocumentType
from documents.tests import ( from documents.tests import (
TEST_COMPRESSED_DOCUMENT_PATH, TEST_DOCUMENT_TYPE_LABEL, DocumentTestMixin, TEST_COMPRESSED_DOCUMENT_PATH, TEST_DOCUMENT_TYPE_LABEL,
TEST_NON_ASCII_DOCUMENT_FILENAME, TEST_NON_ASCII_DOCUMENT_PATH, TEST_NON_ASCII_DOCUMENT_FILENAME, TEST_NON_ASCII_DOCUMENT_PATH,
TEST_NON_ASCII_COMPRESSED_DOCUMENT_PATH TEST_NON_ASCII_COMPRESSED_DOCUMENT_PATH
) )
from metadata.models import MetadataType from metadata.models import MetadataType
from ..literals import SOURCE_UNCOMPRESS_CHOICE_Y from ..literals import SOURCE_UNCOMPRESS_CHOICE_Y
from ..models import ( from ..models import POP3Email, WatchFolderSource, WebFormSource
EmailBaseModel, POP3Email, WatchFolderSource, WebFormSource from ..models.email_sources import EmailBaseModel
)
from .literals import ( from .literals import (
TEST_EMAIL_ATTACHMENT_AND_INLINE, TEST_EMAIL_BASE64_FILENAME, TEST_EMAIL_ATTACHMENT_AND_INLINE, TEST_EMAIL_BASE64_FILENAME,
TEST_EMAIL_BASE64_FILENAME_FROM, TEST_EMAIL_BASE64_FILENAME_SUBJECT, TEST_EMAIL_BASE64_FILENAME_FROM, TEST_EMAIL_BASE64_FILENAME_SUBJECT,
TEST_EMAIL_INLINE_IMAGE, TEST_EMAIL_NO_CONTENT_TYPE, TEST_EMAIL_INLINE_IMAGE, TEST_EMAIL_NO_CONTENT_TYPE,
TEST_EMAIL_NO_CONTENT_TYPE_STRING, TEST_EMAIL_ZERO_LENGTH_ATTACHMENT TEST_EMAIL_NO_CONTENT_TYPE_STRING, TEST_EMAIL_ZERO_LENGTH_ATTACHMENT,
TEST_WATCHFOLDER_SUBFOLDER
) )
@@ -244,35 +246,47 @@ class POP3SourceTestCase(BaseTestCase):
@override_settings(OCR_AUTO_OCR=False) @override_settings(OCR_AUTO_OCR=False)
class UploadDocumentTestCase(BaseTestCase): class WatchFolderTestCase(DocumentTestMixin, BaseTestCase):
""" auto_upload_document = False
Test creating documents
"""
def setUp(self):
super(UploadDocumentTestCase, self).setUp()
self.document_type = DocumentType.objects.create(
label=TEST_DOCUMENT_TYPE_LABEL
)
def tearDown(self): def _create_watchfolder(self):
self.document_type.delete() return WatchFolderSource.objects.create(
super(UploadDocumentTestCase, self).tearDown() document_type=self.document_type,
folder_path=self.temporary_directory,
def test_issue_gh_163(self): include_subdirectories=False,
"""
Non-ASCII chars in document name failing in upload via watch folder
gh-issue #163 https://github.com/mayan-edms/mayan-edms/issues/163
"""
temporary_directory = mkdtemp()
shutil.copy(TEST_NON_ASCII_DOCUMENT_PATH, temporary_directory)
watch_folder = WatchFolderSource.objects.create(
document_type=self.document_type, folder_path=temporary_directory,
uncompress=SOURCE_UNCOMPRESS_CHOICE_Y uncompress=SOURCE_UNCOMPRESS_CHOICE_Y
) )
watch_folder.check_source()
def setUp(self):
super(WatchFolderTestCase, self).setUp()
self.temporary_directory = mkdtemp()
def tearDown(self):
shutil.rmtree(self.temporary_directory)
super(WatchFolderTestCase, self).tearDown()
def test_subfolder_support_disabled(self):
watch_folder = self._create_watchfolder()
test_path = Path(self.temporary_directory)
test_subfolder = test_path.joinpath(TEST_WATCHFOLDER_SUBFOLDER)
test_subfolder.mkdir()
shutil.copy(TEST_NON_ASCII_DOCUMENT_PATH, force_text(test_subfolder))
watch_folder.check_source()
self.assertEqual(Document.objects.count(), 0)
def test_subfolder_support_enabled(self):
watch_folder = self._create_watchfolder()
watch_folder.include_subdirectories = True
watch_folder.save()
test_path = Path(self.temporary_directory)
test_subfolder = test_path.joinpath(TEST_WATCHFOLDER_SUBFOLDER)
test_subfolder.mkdir()
shutil.copy(TEST_NON_ASCII_DOCUMENT_PATH, force_text(test_subfolder))
watch_folder.check_source()
self.assertEqual(Document.objects.count(), 1) self.assertEqual(Document.objects.count(), 1)
document = Document.objects.first() document = Document.objects.first()
@@ -285,16 +299,18 @@ class UploadDocumentTestCase(BaseTestCase):
self.assertEqual(document.label, TEST_NON_ASCII_DOCUMENT_FILENAME) self.assertEqual(document.label, TEST_NON_ASCII_DOCUMENT_FILENAME)
self.assertEqual(document.page_count, 1) self.assertEqual(document.page_count, 1)
# Test Non-ASCII named documents inside Non-ASCII named compressed file def test_issue_gh_163(self):
"""
shutil.copy( Non-ASCII chars in document name failing in upload via watch folder
TEST_NON_ASCII_COMPRESSED_DOCUMENT_PATH, temporary_directory gh-issue #163 https://github.com/mayan-edms/mayan-edms/issues/163
) """
watch_folder = self._create_watchfolder()
shutil.copy(TEST_NON_ASCII_DOCUMENT_PATH, self.temporary_directory)
watch_folder.check_source() watch_folder.check_source()
document = Document.objects.all()[1] self.assertEqual(Document.objects.count(), 1)
self.assertEqual(Document.objects.count(), 2) document = Document.objects.first()
self.assertEqual(document.exists(), True) self.assertEqual(document.exists(), True)
self.assertEqual(document.size, 17436) self.assertEqual(document.size, 17436)
@@ -304,4 +320,24 @@ class UploadDocumentTestCase(BaseTestCase):
self.assertEqual(document.label, TEST_NON_ASCII_DOCUMENT_FILENAME) self.assertEqual(document.label, TEST_NON_ASCII_DOCUMENT_FILENAME)
self.assertEqual(document.page_count, 1) self.assertEqual(document.page_count, 1)
shutil.rmtree(temporary_directory) def test_issue_gh_163_expanded(self):
"""
Test Non-ASCII named documents inside Non-ASCII named compressed file
"""
watch_folder = self._create_watchfolder()
shutil.copy(
TEST_NON_ASCII_COMPRESSED_DOCUMENT_PATH, self.temporary_directory
)
watch_folder.check_source()
self.assertEqual(Document.objects.count(), 1)
document = Document.objects.first()
self.assertEqual(document.exists(), True)
self.assertEqual(document.size, 17436)
self.assertEqual(document.file_mimetype, 'image/png')
self.assertEqual(document.file_mime_encoding, 'binary')
self.assertEqual(document.label, TEST_NON_ASCII_DOCUMENT_FILENAME)
self.assertEqual(document.page_count, 1)