From f9a0fb2e79671935f6948548e73c5fc458dd8ac4 Mon Sep 17 00:00:00 2001 From: Roberto Rosario Date: Wed, 28 Nov 2018 04:00:15 -0400 Subject: [PATCH] Watch folders: Add support for subfolders Add support for subfolder scanning to watch folders. Closes GitLab issue #498. This commit adds a new field to watch folders called "include_subdirectories". The directory walk was also updated to use pathlib2. Signed-off-by: Roberto Rosario --- HISTORY.rst | 2 + docs/releases/3.2.rst | 1 + mayan/apps/sources/forms.py | 2 +- .../migrations/0020_auto_20181128_0752.py | 26 +++++ mayan/apps/sources/models.py | 33 ++++-- mayan/apps/sources/tests/literals.py | 1 + mayan/apps/sources/tests/test_models.py | 106 ++++++++++++------ 7 files changed, 124 insertions(+), 47 deletions(-) create mode 100644 mayan/apps/sources/migrations/0020_auto_20181128_0752.py diff --git a/HISTORY.rst b/HISTORY.rst index d671d6bcfc..6b8bd67e52 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -90,6 +90,8 @@ other ensures passwords have a minimum amount of numbers. - Add support to the mailing profiles for specifying a from address. Closes GitLab issue #522. +- Add support for subfolder scanning to watchfolders. Closes + GitLab issue #498. 3.1.9 (2018-11-01) ================== diff --git a/docs/releases/3.2.rst b/docs/releases/3.2.rst index 13e2cc00b9..578bc8f1e5 100644 --- a/docs/releases/3.2.rst +++ b/docs/releases/3.2.rst @@ -77,6 +77,7 @@ Backward incompatible changes Bugs fixed or issues closed --------------------------- +* :gitlab-issue:`498` Can't scan subdirectories * :gitlab-issue:`522` Office 365 SMTP * :gitlab-issue:`539` Setting for default email sender is missing diff --git a/mayan/apps/sources/forms.py b/mayan/apps/sources/forms.py index 53fa556139..634f088cf2 100644 --- a/mayan/apps/sources/forms.py +++ b/mayan/apps/sources/forms.py @@ -137,6 +137,6 @@ class WatchFolderSetupForm(forms.ModelForm): class Meta: fields = ( 'label', 'enabled', 'interval', 'document_type', 'uncompress', - 'folder_path' + 'folder_path', 'include_subdirectories' ) model = WatchFolderSource diff --git a/mayan/apps/sources/migrations/0020_auto_20181128_0752.py b/mayan/apps/sources/migrations/0020_auto_20181128_0752.py new file mode 100644 index 0000000000..49ecd7bbbf --- /dev/null +++ b/mayan/apps/sources/migrations/0020_auto_20181128_0752.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.16 on 2018-11-28 07:52 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('sources', '0019_auto_20180803_0440'), + ] + + operations = [ + migrations.AddField( + model_name='watchfoldersource', + name='include_subdirectories', + field=models.BooleanField(default=False, help_text='If checked, not only will the folder path be scanned for files but also its subdirectories.', verbose_name='Include subdirectories?'), + preserve_default=False, + ), + migrations.AlterField( + model_name='watchfoldersource', + name='folder_path', + field=models.CharField(help_text='Server side filesystem path to scan for files.', max_length=255, verbose_name='Folder path'), + ), + ] diff --git a/mayan/apps/sources/models.py b/mayan/apps/sources/models.py index eb6464cd90..72bb2e9268 100644 --- a/mayan/apps/sources/models.py +++ b/mayan/apps/sources/models.py @@ -7,10 +7,10 @@ import os import poplib import subprocess +from pathlib2 import Path import yaml from django.core.exceptions import ValidationError -from django.core.files import File from django.core.files.base import ContentFile from django.db import models, transaction from django.utils.encoding import ( @@ -755,8 +755,15 @@ class WatchFolderSource(IntervalBaseModel): source_type = SOURCE_CHOICE_WATCH folder_path = models.CharField( - help_text=_('Server side filesystem path.'), max_length=255, - verbose_name=_('Folder path') + help_text=_('Server side filesystem path to scan for files.'), + max_length=255, verbose_name=_('Folder path') + ) + include_subdirectories = models.BooleanField( + help_text=_( + 'If checked, not only will the folder path be scanned for files ' + 'but also its subdirectories.' + ), + verbose_name=_('Include subdirectories?') ) objects = models.Manager() @@ -766,18 +773,22 @@ class WatchFolderSource(IntervalBaseModel): verbose_name_plural = _('Watch folders') def check_source(self): - # Force self.folder_path to unicode to avoid os.listdir returning - # str for non-latin filenames, gh-issue #163 - for file_name in os.listdir(force_text(self.folder_path)): - full_path = os.path.join(self.folder_path, file_name) - if os.path.isfile(full_path): - with File(file=open(full_path, mode='rb')) as file_object: + path = Path(self.folder_path) + + if self.include_subdirectories: + iterator = path.rglob('*') + else: + iterator = path.glob('*') + + for entry in iterator: + if entry.is_file() or entry.is_symlink(): + with entry.open(mode='rb') as file_object: self.handle_upload( file_object=file_object, expand=(self.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y), - label=file_name + label=entry.name ) - os.unlink(full_path) + entry.unlink() class SourceLog(models.Model): diff --git a/mayan/apps/sources/tests/literals.py b/mayan/apps/sources/tests/literals.py index 7d020d6ede..4efdfaeb6a 100644 --- a/mayan/apps/sources/tests/literals.py +++ b/mayan/apps/sources/tests/literals.py @@ -112,3 +112,4 @@ QCDLAIEsAwSyDBDIMkAgywCBLAMEsgwQyDJAIMsAgSwDBLIMEMgyQCDLAIEsAwSyDBDIMkAg6wK+ TEST_SOURCE_LABEL = 'test source' TEST_SOURCE_UNCOMPRESS_N = 'n' TEST_STAGING_PREVIEW_WIDTH = 640 +TEST_WATCHFOLDER_SUBFOLDER = 'test_subfolder' diff --git a/mayan/apps/sources/tests/test_models.py b/mayan/apps/sources/tests/test_models.py index 351abc5c2e..fb619ee614 100644 --- a/mayan/apps/sources/tests/test_models.py +++ b/mayan/apps/sources/tests/test_models.py @@ -3,14 +3,16 @@ from __future__ import unicode_literals import shutil import mock +from pathlib2 import Path from django.test import override_settings +from django.utils.encoding import force_text from common.utils import mkdtemp from common.tests import BaseTestCase from documents.models import Document, DocumentType from documents.tests import ( - TEST_COMPRESSED_DOCUMENT_PATH, TEST_DOCUMENT_TYPE_LABEL, + DocumentTestMixin, TEST_COMPRESSED_DOCUMENT_PATH, TEST_DOCUMENT_TYPE_LABEL, TEST_NON_ASCII_DOCUMENT_FILENAME, TEST_NON_ASCII_DOCUMENT_PATH, TEST_NON_ASCII_COMPRESSED_DOCUMENT_PATH ) @@ -25,7 +27,7 @@ from .literals import ( TEST_EMAIL_ATTACHMENT_AND_INLINE, TEST_EMAIL_BASE64_FILENAME, TEST_EMAIL_BASE64_FILENAME_FROM, TEST_EMAIL_BASE64_FILENAME_SUBJECT, TEST_EMAIL_INLINE_IMAGE, TEST_EMAIL_NO_CONTENT_TYPE, - TEST_EMAIL_NO_CONTENT_TYPE_STRING + TEST_EMAIL_NO_CONTENT_TYPE_STRING, TEST_WATCHFOLDER_SUBFOLDER ) @@ -234,35 +236,47 @@ class POP3SourceTestCase(BaseTestCase): @override_settings(OCR_AUTO_OCR=False) -class UploadDocumentTestCase(BaseTestCase): - """ - Test creating documents - """ - def setUp(self): - super(UploadDocumentTestCase, self).setUp() - self.document_type = DocumentType.objects.create( - label=TEST_DOCUMENT_TYPE_LABEL - ) +class WatchFolderTestCase(DocumentTestMixin, BaseTestCase): + auto_upload_document = False - def tearDown(self): - self.document_type.delete() - super(UploadDocumentTestCase, self).tearDown() - - def test_issue_gh_163(self): - """ - Non-ASCII chars in document name failing in upload via watch folder - gh-issue #163 https://github.com/mayan-edms/mayan-edms/issues/163 - """ - - temporary_directory = mkdtemp() - shutil.copy(TEST_NON_ASCII_DOCUMENT_PATH, temporary_directory) - - watch_folder = WatchFolderSource.objects.create( - document_type=self.document_type, folder_path=temporary_directory, + def _create_watchfolder(self): + return WatchFolderSource.objects.create( + document_type=self.document_type, + folder_path=self.temporary_directory, + include_subdirectories=False, uncompress=SOURCE_UNCOMPRESS_CHOICE_Y ) - watch_folder.check_source() + def setUp(self): + super(WatchFolderTestCase, self).setUp() + self.temporary_directory = mkdtemp() + + def tearDown(self): + shutil.rmtree(self.temporary_directory) + super(WatchFolderTestCase, self).tearDown() + + def test_subfolder_support_disabled(self): + watch_folder = self._create_watchfolder() + + test_path = Path(self.temporary_directory) + test_subfolder = test_path.joinpath(TEST_WATCHFOLDER_SUBFOLDER) + test_subfolder.mkdir() + + shutil.copy(TEST_NON_ASCII_DOCUMENT_PATH, force_text(test_subfolder)) + watch_folder.check_source() + self.assertEqual(Document.objects.count(), 0) + + def test_subfolder_support_enabled(self): + watch_folder = self._create_watchfolder() + watch_folder.include_subdirectories = True + watch_folder.save() + + test_path = Path(self.temporary_directory) + test_subfolder = test_path.joinpath(TEST_WATCHFOLDER_SUBFOLDER) + test_subfolder.mkdir() + + shutil.copy(TEST_NON_ASCII_DOCUMENT_PATH, force_text(test_subfolder)) + watch_folder.check_source() self.assertEqual(Document.objects.count(), 1) document = Document.objects.first() @@ -275,16 +289,18 @@ class UploadDocumentTestCase(BaseTestCase): self.assertEqual(document.label, TEST_NON_ASCII_DOCUMENT_FILENAME) self.assertEqual(document.page_count, 1) - # Test Non-ASCII named documents inside Non-ASCII named compressed file - - shutil.copy( - TEST_NON_ASCII_COMPRESSED_DOCUMENT_PATH, temporary_directory - ) + def test_issue_gh_163(self): + """ + Non-ASCII chars in document name failing in upload via watch folder + gh-issue #163 https://github.com/mayan-edms/mayan-edms/issues/163 + """ + watch_folder = self._create_watchfolder() + shutil.copy(TEST_NON_ASCII_DOCUMENT_PATH, self.temporary_directory) watch_folder.check_source() - document = Document.objects.all()[1] + self.assertEqual(Document.objects.count(), 1) - self.assertEqual(Document.objects.count(), 2) + document = Document.objects.first() self.assertEqual(document.exists(), True) self.assertEqual(document.size, 17436) @@ -294,4 +310,24 @@ class UploadDocumentTestCase(BaseTestCase): self.assertEqual(document.label, TEST_NON_ASCII_DOCUMENT_FILENAME) self.assertEqual(document.page_count, 1) - shutil.rmtree(temporary_directory) + def test_issue_gh_163_expanded(self): + """ + Test Non-ASCII named documents inside Non-ASCII named compressed file + """ + watch_folder = self._create_watchfolder() + + shutil.copy( + TEST_NON_ASCII_COMPRESSED_DOCUMENT_PATH, self.temporary_directory + ) + watch_folder.check_source() + self.assertEqual(Document.objects.count(), 1) + + document = Document.objects.first() + + self.assertEqual(document.exists(), True) + self.assertEqual(document.size, 17436) + + self.assertEqual(document.file_mimetype, 'image/png') + self.assertEqual(document.file_mime_encoding, 'binary') + self.assertEqual(document.label, TEST_NON_ASCII_DOCUMENT_FILENAME) + self.assertEqual(document.page_count, 1)