Watch folders: Add support for subfolders

Add support for subfolder scanning to watch folders. Closes
GitLab issue #498.

This commit adds a new field to watch folders called
"include_subdirectories".

The directory walk was also updated to use pathlib2.

Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
Roberto Rosario
2018-11-28 04:00:15 -04:00
parent d973a20c30
commit f9a0fb2e79
7 changed files with 124 additions and 47 deletions

View File

@@ -90,6 +90,8 @@
other ensures passwords have a minimum amount of numbers.
- Add support to the mailing profiles for specifying a from
address. Closes GitLab issue #522.
- Add support for subfolder scanning to watchfolders. Closes
GitLab issue #498.
3.1.9 (2018-11-01)
==================

View File

@@ -77,6 +77,7 @@ Backward incompatible changes
Bugs fixed or issues closed
---------------------------
* :gitlab-issue:`498` Can't scan subdirectories
* :gitlab-issue:`522` Office 365 SMTP
* :gitlab-issue:`539` Setting for default email sender is missing

View File

@@ -137,6 +137,6 @@ class WatchFolderSetupForm(forms.ModelForm):
class Meta:
fields = (
'label', 'enabled', 'interval', 'document_type', 'uncompress',
'folder_path'
'folder_path', 'include_subdirectories'
)
model = WatchFolderSource

View File

@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.16 on 2018-11-28 07:52
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('sources', '0019_auto_20180803_0440'),
]
operations = [
migrations.AddField(
model_name='watchfoldersource',
name='include_subdirectories',
field=models.BooleanField(default=False, help_text='If checked, not only will the folder path be scanned for files but also its subdirectories.', verbose_name='Include subdirectories?'),
preserve_default=False,
),
migrations.AlterField(
model_name='watchfoldersource',
name='folder_path',
field=models.CharField(help_text='Server side filesystem path to scan for files.', max_length=255, verbose_name='Folder path'),
),
]

View File

@@ -7,10 +7,10 @@ import os
import poplib
import subprocess
from pathlib2 import Path
import yaml
from django.core.exceptions import ValidationError
from django.core.files import File
from django.core.files.base import ContentFile
from django.db import models, transaction
from django.utils.encoding import (
@@ -755,8 +755,15 @@ class WatchFolderSource(IntervalBaseModel):
source_type = SOURCE_CHOICE_WATCH
folder_path = models.CharField(
help_text=_('Server side filesystem path.'), max_length=255,
verbose_name=_('Folder path')
help_text=_('Server side filesystem path to scan for files.'),
max_length=255, verbose_name=_('Folder path')
)
include_subdirectories = models.BooleanField(
help_text=_(
'If checked, not only will the folder path be scanned for files '
'but also its subdirectories.'
),
verbose_name=_('Include subdirectories?')
)
objects = models.Manager()
@@ -766,18 +773,22 @@ class WatchFolderSource(IntervalBaseModel):
verbose_name_plural = _('Watch folders')
def check_source(self):
# Force self.folder_path to unicode to avoid os.listdir returning
# str for non-latin filenames, gh-issue #163
for file_name in os.listdir(force_text(self.folder_path)):
full_path = os.path.join(self.folder_path, file_name)
if os.path.isfile(full_path):
with File(file=open(full_path, mode='rb')) as file_object:
path = Path(self.folder_path)
if self.include_subdirectories:
iterator = path.rglob('*')
else:
iterator = path.glob('*')
for entry in iterator:
if entry.is_file() or entry.is_symlink():
with entry.open(mode='rb') as file_object:
self.handle_upload(
file_object=file_object,
expand=(self.uncompress == SOURCE_UNCOMPRESS_CHOICE_Y),
label=file_name
label=entry.name
)
os.unlink(full_path)
entry.unlink()
class SourceLog(models.Model):

View File

@@ -112,3 +112,4 @@ QCDLAIEsAwSyDBDIMkAgywCBLAMEsgwQyDJAIMsAgSwDBLIMEMgyQCDLAIEsAwSyDBDIMkAg6wK+
TEST_SOURCE_LABEL = 'test source'
TEST_SOURCE_UNCOMPRESS_N = 'n'
TEST_STAGING_PREVIEW_WIDTH = 640
TEST_WATCHFOLDER_SUBFOLDER = 'test_subfolder'

View File

@@ -3,14 +3,16 @@ from __future__ import unicode_literals
import shutil
import mock
from pathlib2 import Path
from django.test import override_settings
from django.utils.encoding import force_text
from common.utils import mkdtemp
from common.tests import BaseTestCase
from documents.models import Document, DocumentType
from documents.tests import (
TEST_COMPRESSED_DOCUMENT_PATH, TEST_DOCUMENT_TYPE_LABEL,
DocumentTestMixin, TEST_COMPRESSED_DOCUMENT_PATH, TEST_DOCUMENT_TYPE_LABEL,
TEST_NON_ASCII_DOCUMENT_FILENAME, TEST_NON_ASCII_DOCUMENT_PATH,
TEST_NON_ASCII_COMPRESSED_DOCUMENT_PATH
)
@@ -25,7 +27,7 @@ from .literals import (
TEST_EMAIL_ATTACHMENT_AND_INLINE, TEST_EMAIL_BASE64_FILENAME,
TEST_EMAIL_BASE64_FILENAME_FROM, TEST_EMAIL_BASE64_FILENAME_SUBJECT,
TEST_EMAIL_INLINE_IMAGE, TEST_EMAIL_NO_CONTENT_TYPE,
TEST_EMAIL_NO_CONTENT_TYPE_STRING
TEST_EMAIL_NO_CONTENT_TYPE_STRING, TEST_WATCHFOLDER_SUBFOLDER
)
@@ -234,35 +236,47 @@ class POP3SourceTestCase(BaseTestCase):
@override_settings(OCR_AUTO_OCR=False)
class UploadDocumentTestCase(BaseTestCase):
"""
Test creating documents
"""
def setUp(self):
super(UploadDocumentTestCase, self).setUp()
self.document_type = DocumentType.objects.create(
label=TEST_DOCUMENT_TYPE_LABEL
)
class WatchFolderTestCase(DocumentTestMixin, BaseTestCase):
auto_upload_document = False
def tearDown(self):
self.document_type.delete()
super(UploadDocumentTestCase, self).tearDown()
def test_issue_gh_163(self):
"""
Non-ASCII chars in document name failing in upload via watch folder
gh-issue #163 https://github.com/mayan-edms/mayan-edms/issues/163
"""
temporary_directory = mkdtemp()
shutil.copy(TEST_NON_ASCII_DOCUMENT_PATH, temporary_directory)
watch_folder = WatchFolderSource.objects.create(
document_type=self.document_type, folder_path=temporary_directory,
def _create_watchfolder(self):
return WatchFolderSource.objects.create(
document_type=self.document_type,
folder_path=self.temporary_directory,
include_subdirectories=False,
uncompress=SOURCE_UNCOMPRESS_CHOICE_Y
)
watch_folder.check_source()
def setUp(self):
super(WatchFolderTestCase, self).setUp()
self.temporary_directory = mkdtemp()
def tearDown(self):
shutil.rmtree(self.temporary_directory)
super(WatchFolderTestCase, self).tearDown()
def test_subfolder_support_disabled(self):
watch_folder = self._create_watchfolder()
test_path = Path(self.temporary_directory)
test_subfolder = test_path.joinpath(TEST_WATCHFOLDER_SUBFOLDER)
test_subfolder.mkdir()
shutil.copy(TEST_NON_ASCII_DOCUMENT_PATH, force_text(test_subfolder))
watch_folder.check_source()
self.assertEqual(Document.objects.count(), 0)
def test_subfolder_support_enabled(self):
watch_folder = self._create_watchfolder()
watch_folder.include_subdirectories = True
watch_folder.save()
test_path = Path(self.temporary_directory)
test_subfolder = test_path.joinpath(TEST_WATCHFOLDER_SUBFOLDER)
test_subfolder.mkdir()
shutil.copy(TEST_NON_ASCII_DOCUMENT_PATH, force_text(test_subfolder))
watch_folder.check_source()
self.assertEqual(Document.objects.count(), 1)
document = Document.objects.first()
@@ -275,16 +289,18 @@ class UploadDocumentTestCase(BaseTestCase):
self.assertEqual(document.label, TEST_NON_ASCII_DOCUMENT_FILENAME)
self.assertEqual(document.page_count, 1)
# Test Non-ASCII named documents inside Non-ASCII named compressed file
shutil.copy(
TEST_NON_ASCII_COMPRESSED_DOCUMENT_PATH, temporary_directory
)
def test_issue_gh_163(self):
"""
Non-ASCII chars in document name failing in upload via watch folder
gh-issue #163 https://github.com/mayan-edms/mayan-edms/issues/163
"""
watch_folder = self._create_watchfolder()
shutil.copy(TEST_NON_ASCII_DOCUMENT_PATH, self.temporary_directory)
watch_folder.check_source()
document = Document.objects.all()[1]
self.assertEqual(Document.objects.count(), 1)
self.assertEqual(Document.objects.count(), 2)
document = Document.objects.first()
self.assertEqual(document.exists(), True)
self.assertEqual(document.size, 17436)
@@ -294,4 +310,24 @@ class UploadDocumentTestCase(BaseTestCase):
self.assertEqual(document.label, TEST_NON_ASCII_DOCUMENT_FILENAME)
self.assertEqual(document.page_count, 1)
shutil.rmtree(temporary_directory)
def test_issue_gh_163_expanded(self):
"""
Test Non-ASCII named documents inside Non-ASCII named compressed file
"""
watch_folder = self._create_watchfolder()
shutil.copy(
TEST_NON_ASCII_COMPRESSED_DOCUMENT_PATH, self.temporary_directory
)
watch_folder.check_source()
self.assertEqual(Document.objects.count(), 1)
document = Document.objects.first()
self.assertEqual(document.exists(), True)
self.assertEqual(document.size, 17436)
self.assertEqual(document.file_mimetype, 'image/png')
self.assertEqual(document.file_mime_encoding, 'binary')
self.assertEqual(document.label, TEST_NON_ASCII_DOCUMENT_FILENAME)
self.assertEqual(document.page_count, 1)