Split the code of the mountindex command to be able to add tests. Fix the way the children of IndexInstanceNode are accessed. Fixes GitLab issue #518. Thanks to TheOneValen @TheOneValen for the report. Remove newlines from the index name levels before using them as FUSE directories. Fixed duplicated FUSE directory removal.
Signed-off-by: Roberto Rosario <roberto.rosario.gonzalez@gmail.com>
This commit is contained in:
10
HISTORY.rst
10
HISTORY.rst
@@ -1,3 +1,13 @@
|
||||
3.1.5 (2018-10-XX)
|
||||
==================
|
||||
* Consolidate some document indexing test code into a new mixin.
|
||||
* Split the code of the mountindex command to be able to add tests.
|
||||
* Fix the way the children of IndexInstanceNode are accessed. Fixes
|
||||
GitLab issue #518. Thanks to TheOneValen @TheOneValen for the report.
|
||||
* Remove newlines from the index name levels before using them as FUSE
|
||||
directories.
|
||||
* Fixed duplicated FUSE directory removal.
|
||||
|
||||
3.1.4 (2018-10-4)
|
||||
=================
|
||||
* Fix the link to the documenation. Closes GitLab issue #516.
|
||||
|
||||
@@ -5,5 +5,6 @@ from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
|
||||
class MirroringApp(apps.AppConfig):
|
||||
has_tests = True
|
||||
name = 'mirroring'
|
||||
verbose_name = _('Mirroring')
|
||||
|
||||
220
mayan/apps/mirroring/classes.py
Normal file
220
mayan/apps/mirroring/classes.py
Normal file
@@ -0,0 +1,220 @@
|
||||
from __future__ import print_function, unicode_literals
|
||||
|
||||
import datetime
|
||||
from errno import ENOENT
|
||||
import logging
|
||||
from stat import S_IFDIR, S_IFREG
|
||||
from time import time
|
||||
|
||||
from fuse import FuseOSError, Operations
|
||||
|
||||
from django.core.cache import caches
|
||||
from django.core.exceptions import MultipleObjectsReturned
|
||||
from django.db.models import Count, F, Func, Value
|
||||
|
||||
from document_indexing.models import Index, IndexInstanceNode
|
||||
from documents.models import Document
|
||||
|
||||
from .literals import (
|
||||
MAX_FILE_DESCRIPTOR, MIN_FILE_DESCRIPTOR, FILE_MODE, DIRECTORY_MODE
|
||||
)
|
||||
from .settings import (
|
||||
setting_document_lookup_cache_timeout, setting_node_lookup_cache_timeout
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IndexFilesystem(Operations):
|
||||
@staticmethod
|
||||
def _clean_queryset(queryset):
|
||||
# Remove newline carriage return to make multiline indexes
|
||||
# valid directoy names
|
||||
return queryset.annotate(
|
||||
clean_value=Func(
|
||||
F('value'), Value('\r\n'), Value(' '), function='replace'
|
||||
)
|
||||
)
|
||||
|
||||
def _get_next_file_descriptor(self):
|
||||
while(True):
|
||||
self.file_descriptor_count += 1
|
||||
if self.file_descriptor_count > MAX_FILE_DESCRIPTOR:
|
||||
self.file_descriptor_count = MIN_FILE_DESCRIPTOR
|
||||
|
||||
try:
|
||||
if not self.file_descriptors[self.file_descriptor_count]:
|
||||
return self.file_descriptor_count
|
||||
except KeyError:
|
||||
return self.file_descriptor_count
|
||||
|
||||
def _path_to_node(self, path, access_only=False, directory_only=True):
|
||||
logger.debug('path: %s', path)
|
||||
logger.debug('directory_only: %s', directory_only)
|
||||
|
||||
parts = path.split('/')
|
||||
|
||||
logger.debug('parts: %s', parts)
|
||||
|
||||
node = self.index.instance_root
|
||||
|
||||
if len(parts) > 1 and parts[1] != '':
|
||||
obj = self.cache.get(path)
|
||||
|
||||
if obj:
|
||||
node_pk = obj.get('node_pk')
|
||||
if node_pk:
|
||||
if access_only:
|
||||
return True
|
||||
else:
|
||||
return IndexInstanceNode.objects.get(pk=node_pk)
|
||||
|
||||
document_pk = obj.get('document_pk')
|
||||
if document_pk:
|
||||
if access_only:
|
||||
return True
|
||||
else:
|
||||
return Document.objects.get(pk=document_pk)
|
||||
|
||||
for count, part in enumerate(parts[1:]):
|
||||
try:
|
||||
node = IndexFilesystem._clean_queryset(node.get_children()).get(clean_value=part)
|
||||
except IndexInstanceNode.DoesNotExist:
|
||||
logger.debug('%s does not exists', part)
|
||||
|
||||
if directory_only:
|
||||
return None
|
||||
else:
|
||||
try:
|
||||
if node.index_template_node.link_documents:
|
||||
result = node.documents.get(label=part)
|
||||
logger.debug(
|
||||
'path %s is a valid file path', path
|
||||
)
|
||||
self.cache.set(
|
||||
path, {'document_pk': result.pk},
|
||||
setting_document_lookup_cache_timeout.value
|
||||
)
|
||||
|
||||
return result
|
||||
else:
|
||||
return None
|
||||
except Document.DoesNotExist:
|
||||
logger.debug(
|
||||
'path %s is a file, but is not found', path
|
||||
)
|
||||
return None
|
||||
except MultipleObjectsReturned:
|
||||
return None
|
||||
except MultipleObjectsReturned:
|
||||
return None
|
||||
|
||||
self.cache.set(
|
||||
path, {'node_pk': node.pk},
|
||||
setting_node_lookup_cache_timeout.value
|
||||
)
|
||||
|
||||
logger.debug('node: %s', node)
|
||||
logger.debug('node is root: %s', node.is_root_node())
|
||||
|
||||
return node
|
||||
|
||||
def __init__(self, index_slug):
|
||||
self.file_descriptor_count = MIN_FILE_DESCRIPTOR
|
||||
self.file_descriptors = {}
|
||||
self.cache = caches['default']
|
||||
|
||||
try:
|
||||
self.index = Index.objects.get(slug=index_slug)
|
||||
except Index.DoesNotExist:
|
||||
print('Unknown index slug: {}.'.format(index_slug))
|
||||
exit(1)
|
||||
|
||||
def access(self, path, fh=None):
|
||||
result = self._path_to_node(
|
||||
path=path, access_only=True, directory_only=False
|
||||
)
|
||||
|
||||
if not result:
|
||||
raise FuseOSError(ENOENT)
|
||||
|
||||
def getattr(self, path, fh=None):
|
||||
logger.debug('path: %s, fh: %s', path, fh)
|
||||
|
||||
now = time()
|
||||
result = self._path_to_node(path=path, directory_only=False)
|
||||
|
||||
if not result:
|
||||
raise FuseOSError(ENOENT)
|
||||
|
||||
if isinstance(result, IndexInstanceNode):
|
||||
return {
|
||||
'st_mode': (S_IFDIR | DIRECTORY_MODE), 'st_ctime': now,
|
||||
'st_mtime': now, 'st_atime': now, 'st_nlink': 2
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'st_mode': (S_IFREG | FILE_MODE),
|
||||
'st_ctime': (
|
||||
result.date_added.replace(tzinfo=None) - result.date_added.utcoffset() - datetime.datetime(1970, 1, 1)
|
||||
).total_seconds(),
|
||||
'st_mtime': (
|
||||
result.latest_version.timestamp.replace(tzinfo=None) - result.latest_version.timestamp.utcoffset() - datetime.datetime(1970, 1, 1)
|
||||
).total_seconds(),
|
||||
'st_atime': now,
|
||||
'st_size': result.size
|
||||
}
|
||||
|
||||
def open(self, path, flags):
|
||||
result = self._path_to_node(path=path, directory_only=False)
|
||||
|
||||
if isinstance(result, Document):
|
||||
next_file_descriptor = self._get_next_file_descriptor()
|
||||
self.file_descriptors[next_file_descriptor] = result.open()
|
||||
return next_file_descriptor
|
||||
else:
|
||||
raise FuseOSError(ENOENT)
|
||||
|
||||
def read(self, path, size, offset, fh):
|
||||
self.file_descriptors[fh].seek(offset)
|
||||
return self.file_descriptors[fh].read(size)
|
||||
|
||||
def readdir(self, path, fh):
|
||||
logger.debug('path: %s', path)
|
||||
|
||||
node = self._path_to_node(path=path, directory_only=True)
|
||||
|
||||
if not node:
|
||||
raise FuseOSError(ENOENT)
|
||||
|
||||
yield '.'
|
||||
yield '..'
|
||||
|
||||
# Index instance nodes to directories
|
||||
queryset = IndexFilesystem._clean_queryset(node.get_children()).exclude(
|
||||
clean_value__contains='/'
|
||||
).values('clean_value')
|
||||
|
||||
# Find nodes with the same resulting value and remove them
|
||||
for duplicate in queryset.order_by().annotate(count_id=Count('id')).filter(count_id__gt=1):
|
||||
queryset = queryset.exclude(clean_value=duplicate['clean_value'])
|
||||
|
||||
for value in queryset.values_list('clean_value', flat=True):
|
||||
yield value
|
||||
|
||||
# Documents
|
||||
if node.index_template_node.link_documents:
|
||||
queryset = node.documents.values('label').exclude(
|
||||
label__contains='/'
|
||||
)
|
||||
|
||||
# Find duplicated document and remove them
|
||||
for duplicate in queryset.order_by().annotate(count_id=Count('id')).filter(count_id__gt=1):
|
||||
queryset = queryset.exclude(label=duplicate['label'])
|
||||
|
||||
for document_label in queryset.values_list('label', flat=True):
|
||||
yield document_label
|
||||
|
||||
def release(self, path, fh):
|
||||
self.file_descriptors[fh] = None
|
||||
del(self.file_descriptors[fh])
|
||||
@@ -1,215 +1,17 @@
|
||||
from __future__ import print_function, unicode_literals
|
||||
|
||||
import datetime
|
||||
from errno import ENOENT
|
||||
import logging
|
||||
from stat import S_IFDIR, S_IFREG
|
||||
from time import time
|
||||
|
||||
from fuse import FUSE, FuseOSError, Operations
|
||||
from fuse import FUSE
|
||||
|
||||
from django.core import management
|
||||
from django.core.cache import caches
|
||||
from django.core.exceptions import MultipleObjectsReturned
|
||||
from django.core.management.base import CommandError
|
||||
from django.db.models import Count
|
||||
|
||||
from document_indexing.models import Index, IndexInstanceNode
|
||||
from documents.models import Document
|
||||
|
||||
from ...literals import (
|
||||
MAX_FILE_DESCRIPTOR, MIN_FILE_DESCRIPTOR, FILE_MODE, DIRECTORY_MODE
|
||||
)
|
||||
from ...settings import (
|
||||
setting_document_lookup_cache_timeout, setting_node_lookup_cache_timeout
|
||||
)
|
||||
from ...classes import IndexFilesystem
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IndexFS(Operations):
|
||||
def _get_next_file_descriptor(self):
|
||||
while(True):
|
||||
self.file_descriptor_count += 1
|
||||
if self.file_descriptor_count > MAX_FILE_DESCRIPTOR:
|
||||
self.file_descriptor_count = MIN_FILE_DESCRIPTOR
|
||||
|
||||
try:
|
||||
if not self.file_descriptors[self.file_descriptor_count]:
|
||||
return self.file_descriptor_count
|
||||
except KeyError:
|
||||
return self.file_descriptor_count
|
||||
|
||||
def _path_to_node(self, path, access_only=False, directory_only=True):
|
||||
logger.debug('path: %s', path)
|
||||
logger.debug('directory_only: %s', directory_only)
|
||||
|
||||
parts = path.split('/')
|
||||
|
||||
logger.debug('parts: %s', parts)
|
||||
|
||||
node = self.index.instance_root
|
||||
|
||||
if len(parts) > 1 and parts[1] != '':
|
||||
obj = self.cache.get(path)
|
||||
|
||||
if obj:
|
||||
node_pk = obj.get('node_pk')
|
||||
if node_pk:
|
||||
if access_only:
|
||||
return True
|
||||
else:
|
||||
return IndexInstanceNode.objects.get(pk=node_pk)
|
||||
|
||||
document_pk = obj.get('document_pk')
|
||||
if document_pk:
|
||||
if access_only:
|
||||
return True
|
||||
else:
|
||||
return Document.objects.get(pk=document_pk)
|
||||
|
||||
for count, part in enumerate(parts[1:]):
|
||||
try:
|
||||
node = node.children.get(value=part)
|
||||
except IndexInstanceNode.DoesNotExist:
|
||||
logger.debug('%s does not exists', part)
|
||||
|
||||
if directory_only:
|
||||
return None
|
||||
else:
|
||||
try:
|
||||
if node.index_template_node.link_documents:
|
||||
result = node.documents.get(label=part)
|
||||
logger.debug(
|
||||
'path %s is a valid file path', path
|
||||
)
|
||||
self.cache.set(
|
||||
path, {'document_pk': result.pk},
|
||||
setting_document_lookup_cache_timeout.value
|
||||
)
|
||||
|
||||
return result
|
||||
else:
|
||||
return None
|
||||
except Document.DoesNotExist:
|
||||
logger.debug(
|
||||
'path %s is a file, but is not found', path
|
||||
)
|
||||
return None
|
||||
except MultipleObjectsReturned:
|
||||
return None
|
||||
except MultipleObjectsReturned:
|
||||
return None
|
||||
|
||||
self.cache.set(
|
||||
path, {'node_pk': node.pk},
|
||||
setting_node_lookup_cache_timeout.value
|
||||
)
|
||||
|
||||
logger.debug('node: %s', node)
|
||||
logger.debug('node is root: %s', node.is_root_node())
|
||||
|
||||
return node
|
||||
|
||||
def __init__(self, index_slug):
|
||||
self.file_descriptor_count = MIN_FILE_DESCRIPTOR
|
||||
self.file_descriptors = {}
|
||||
self.cache = caches['default']
|
||||
|
||||
try:
|
||||
self.index = Index.objects.get(slug=index_slug)
|
||||
except Index.DoesNotExist:
|
||||
print('Unknown index slug: {}.'.format(index_slug))
|
||||
exit(1)
|
||||
|
||||
def access(self, path, fh=None):
|
||||
result = self._path_to_node(
|
||||
path=path, access_only=True, directory_only=False
|
||||
)
|
||||
|
||||
if not result:
|
||||
raise FuseOSError(ENOENT)
|
||||
|
||||
def getattr(self, path, fh=None):
|
||||
logger.debug('path: %s, fh: %s', path, fh)
|
||||
|
||||
now = time()
|
||||
result = self._path_to_node(path=path, directory_only=False)
|
||||
|
||||
if not result:
|
||||
raise FuseOSError(ENOENT)
|
||||
|
||||
if isinstance(result, IndexInstanceNode):
|
||||
return {
|
||||
'st_mode': (S_IFDIR | DIRECTORY_MODE), 'st_ctime': now,
|
||||
'st_mtime': now, 'st_atime': now, 'st_nlink': 2
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'st_mode': (S_IFREG | FILE_MODE),
|
||||
'st_ctime': (
|
||||
result.date_added.replace(tzinfo=None) - result.date_added.utcoffset() - datetime.datetime(1970, 1, 1)
|
||||
).total_seconds(),
|
||||
'st_mtime': (
|
||||
result.latest_version.timestamp.replace(tzinfo=None) - result.latest_version.timestamp.utcoffset() - datetime.datetime(1970, 1, 1)
|
||||
).total_seconds(),
|
||||
'st_atime': now,
|
||||
'st_size': result.size
|
||||
}
|
||||
|
||||
def open(self, path, flags):
|
||||
result = self._path_to_node(path=path, directory_only=False)
|
||||
|
||||
if isinstance(result, Document):
|
||||
next_file_descriptor = self._get_next_file_descriptor()
|
||||
self.file_descriptors[next_file_descriptor] = result.open()
|
||||
return next_file_descriptor
|
||||
else:
|
||||
raise FuseOSError(ENOENT)
|
||||
|
||||
def release(self, path, fh):
|
||||
self.file_descriptors[fh] = None
|
||||
del(self.file_descriptors[fh])
|
||||
|
||||
def read(self, path, size, offset, fh):
|
||||
self.file_descriptors[fh].seek(offset)
|
||||
return self.file_descriptors[fh].read(size)
|
||||
|
||||
def readdir(self, path, fh):
|
||||
logger.debug('path: %s', path)
|
||||
|
||||
node = self._path_to_node(path=path, directory_only=True)
|
||||
|
||||
if not node:
|
||||
raise FuseOSError(ENOENT)
|
||||
|
||||
yield '.'
|
||||
yield '..'
|
||||
|
||||
# Nodes
|
||||
queryset = node.get_children().values('value').exclude(
|
||||
value__contains='/'
|
||||
)
|
||||
|
||||
for duplicate in queryset.order_by().annotate(count_id=Count('id')).filter(count_id__gt=1):
|
||||
queryset = queryset.exclude(label=duplicate['label'])
|
||||
|
||||
for child_node in queryset.values_list('value', flat=True):
|
||||
yield child_node
|
||||
|
||||
# Documents
|
||||
if node.index_template_node.link_documents:
|
||||
queryset = node.documents.values('label').exclude(
|
||||
label__contains='/'
|
||||
)
|
||||
|
||||
for duplicate in queryset.order_by().annotate(count_id=Count('id')).filter(count_id__gt=1):
|
||||
queryset = queryset.exclude(label=duplicate['label'])
|
||||
|
||||
for document_label in queryset.values_list('label', flat=True):
|
||||
yield document_label
|
||||
|
||||
|
||||
class Command(management.BaseCommand):
|
||||
help = 'Mount an index as a FUSE filesystem.'
|
||||
|
||||
@@ -235,7 +37,7 @@ class Command(management.BaseCommand):
|
||||
|
||||
try:
|
||||
FUSE(
|
||||
operations=IndexFS(index_slug=options['slug']),
|
||||
operations=IndexFilesystem(index_slug=options['slug']),
|
||||
mountpoint=options['mount_point'], nothreads=True, foreground=True,
|
||||
allow_other=options['allow_other'],
|
||||
allow_root=options['allow_root']
|
||||
|
||||
0
mayan/apps/mirroring/tests/__init__.py
Normal file
0
mayan/apps/mirroring/tests/__init__.py
Normal file
4
mayan/apps/mirroring/tests/literals.py
Normal file
4
mayan/apps/mirroring/tests/literals.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
TEST_NODE_EXPRESSION = 'level_1'
|
||||
TEST_NODE_EXPRESSION_MULTILINE = 'first\r\nsecond\r\nthird'
|
||||
116
mayan/apps/mirroring/tests/test_classes.py
Normal file
116
mayan/apps/mirroring/tests/test_classes.py
Normal file
@@ -0,0 +1,116 @@
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
import hashlib
|
||||
|
||||
from fuse import FuseOSError
|
||||
|
||||
from django.test import override_settings
|
||||
|
||||
from common.tests import BaseTestCase
|
||||
from documents.tests import DocumentTestMixin
|
||||
|
||||
from document_indexing.tests import DocumentIndexingTestMixin
|
||||
|
||||
from ..classes import IndexFilesystem
|
||||
|
||||
from .literals import (
|
||||
TEST_NODE_EXPRESSION, TEST_NODE_EXPRESSION_MULTILINE
|
||||
)
|
||||
|
||||
|
||||
@override_settings(OCR_AUTO_OCR=False)
|
||||
class IndexFSTestCase(DocumentIndexingTestMixin, DocumentTestMixin, BaseTestCase):
|
||||
auto_upload_document = False
|
||||
|
||||
def test_document_access(self):
|
||||
self._create_index()
|
||||
|
||||
self.index.node_templates.create(
|
||||
parent=self.index.template_root, expression=TEST_NODE_EXPRESSION,
|
||||
link_documents=True
|
||||
)
|
||||
|
||||
document = self.upload_document()
|
||||
index_filesystem = IndexFilesystem(index_slug=self.index.slug)
|
||||
|
||||
self.assertEqual(
|
||||
index_filesystem.access(
|
||||
'/{}/{}'.format(TEST_NODE_EXPRESSION, document.label)
|
||||
), None
|
||||
)
|
||||
|
||||
def test_document_access_failure(self):
|
||||
self._create_index()
|
||||
|
||||
self.index.node_templates.create(
|
||||
parent=self.index.template_root, expression=TEST_NODE_EXPRESSION,
|
||||
link_documents=True
|
||||
)
|
||||
|
||||
document = self.upload_document()
|
||||
index_filesystem = IndexFilesystem(index_slug=self.index.slug)
|
||||
|
||||
with self.assertRaises(FuseOSError):
|
||||
index_filesystem.access(
|
||||
'/{}/{}_non_valid'.format(TEST_NODE_EXPRESSION, document.label)
|
||||
)
|
||||
|
||||
def test_document_open(self):
|
||||
self._create_index()
|
||||
|
||||
self.index.node_templates.create(
|
||||
parent=self.index.template_root, expression=TEST_NODE_EXPRESSION,
|
||||
link_documents=True
|
||||
)
|
||||
|
||||
document = self.upload_document()
|
||||
index_filesystem = IndexFilesystem(index_slug=self.index.slug)
|
||||
|
||||
file_handle = index_filesystem.open(
|
||||
'/{}/{}'.format(TEST_NODE_EXPRESSION, document.label), 'rb'
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
hashlib.sha256(
|
||||
index_filesystem.read(
|
||||
path=None, size=document.size, offset=0, fh=file_handle
|
||||
)
|
||||
).hexdigest(),
|
||||
document.checksum
|
||||
)
|
||||
|
||||
def test_multiline_indexes(self):
|
||||
self._create_index()
|
||||
|
||||
self.index.node_templates.create(
|
||||
parent=self.index.template_root,
|
||||
expression=TEST_NODE_EXPRESSION_MULTILINE,
|
||||
link_documents=True
|
||||
)
|
||||
|
||||
self.upload_document()
|
||||
index_filesystem = IndexFilesystem(index_slug=self.index.slug)
|
||||
|
||||
self.assertEqual(
|
||||
list(index_filesystem.readdir('/', ''))[2:],
|
||||
[TEST_NODE_EXPRESSION_MULTILINE.replace('\r\n', ' ')]
|
||||
)
|
||||
|
||||
def test_duplicated_indexes(self):
|
||||
self._create_index()
|
||||
|
||||
self.index.node_templates.create(
|
||||
parent=self.index.template_root, expression=TEST_NODE_EXPRESSION,
|
||||
link_documents=True
|
||||
)
|
||||
self.index.node_templates.create(
|
||||
parent=self.index.template_root, expression=TEST_NODE_EXPRESSION,
|
||||
link_documents=True
|
||||
)
|
||||
|
||||
self.upload_document()
|
||||
index_filesystem = IndexFilesystem(index_slug=self.index.slug)
|
||||
|
||||
self.assertEqual(
|
||||
list(index_filesystem.readdir('/', ''))[2:], []
|
||||
)
|
||||
Reference in New Issue
Block a user