Add support for exporting an index as a FUSE filesystem. Closes gh-issue #173.
This commit is contained in:
@@ -72,6 +72,7 @@ What's new in Mayan EDMS v2.0
|
||||
* RGB tags
|
||||
* ``performupgrade`` management command.
|
||||
* Removal of eval from metadata type defaults and lookup fields. Django's own template language is now used instead.
|
||||
* Support to share an index as a FUSE filesystem.
|
||||
|
||||
Upgrading from a previous version
|
||||
=================================
|
||||
|
||||
@@ -3,8 +3,6 @@ from __future__ import unicode_literals
|
||||
from django import apps
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from common import MayanAppConfig
|
||||
|
||||
|
||||
class MirroringApp(apps.AppConfig):
|
||||
name = 'mirroring'
|
||||
|
||||
3
mayan/apps/mirroring/literals.py
Normal file
3
mayan/apps/mirroring/literals.py
Normal file
@@ -0,0 +1,3 @@
|
||||
MAX_FILE_DESCRIPTOR = 65535
|
||||
MIN_FILE_DESCRIPTOR = 0
|
||||
FILE_MODE = DIRECTORY_MODE = 0555
|
||||
@@ -3,27 +3,41 @@ from __future__ import unicode_literals
|
||||
import datetime
|
||||
from errno import ENOENT
|
||||
import logging
|
||||
from optparse import make_option
|
||||
from stat import S_IFDIR, S_IFREG
|
||||
from time import time
|
||||
|
||||
from fuse import FUSE, FuseOSError, Operations
|
||||
import pytz
|
||||
|
||||
from django.core import management
|
||||
|
||||
from djcelery.models import IntervalSchedule, PeriodicTask
|
||||
from django.core.cache import caches
|
||||
|
||||
from document_indexing.models import Index, IndexInstanceNode
|
||||
from documents.models import Document
|
||||
|
||||
MAX_FILE_DESCRIPTOR = 65535
|
||||
MIN_FILE_DESCRIPTOR = 0
|
||||
from ...literals import (
|
||||
MAX_FILE_DESCRIPTOR, MIN_FILE_DESCRIPTOR, FILE_MODE, DIRECTORY_MODE
|
||||
)
|
||||
from ...settings import (
|
||||
setting_document_lookup_cache_timeout, setting_node_lookup_cache_timeout
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IndexFS(Operations):
|
||||
def _path_to_node(self, path, directory_only=True):
|
||||
def _get_next_file_descriptor(self):
|
||||
while(True):
|
||||
self.file_descriptor_count += 1
|
||||
if self.file_descriptor_count > MAX_FILE_DESCRIPTOR:
|
||||
self.file_descriptor_count = MIN_FILE_DESCRIPTOR
|
||||
|
||||
try:
|
||||
if not self.file_descriptors[self.file_descriptor_count]:
|
||||
return self.file_descriptor_count
|
||||
except KeyError:
|
||||
return self.file_descriptor_count
|
||||
|
||||
def _path_to_node(self, path, access_only=False, directory_only=True):
|
||||
logger.debug('path: %s', path)
|
||||
logger.debug('directory_only: %s', directory_only)
|
||||
|
||||
@@ -34,9 +48,27 @@ class IndexFS(Operations):
|
||||
node = self.index.instance_root
|
||||
|
||||
if len(parts) > 1 and parts[1] != '':
|
||||
for part in parts[1:]:
|
||||
obj = self.cache.get(path)
|
||||
|
||||
if obj:
|
||||
node_pk = obj.get('node_pk')
|
||||
if node_pk:
|
||||
if access_only:
|
||||
return True
|
||||
else:
|
||||
return IndexInstanceNode.objects.get(pk=node_pk)
|
||||
|
||||
document_pk = obj.get('document_pk')
|
||||
if document_pk:
|
||||
if access_only:
|
||||
return True
|
||||
else:
|
||||
return Document.objects.get(pk=document_pk)
|
||||
|
||||
for count, part in enumerate(parts[1:]):
|
||||
try:
|
||||
node = node.children.get(value=part)
|
||||
|
||||
except IndexInstanceNode.DoesNotExist:
|
||||
logger.debug('%s does not exists', part)
|
||||
|
||||
@@ -46,28 +78,52 @@ class IndexFS(Operations):
|
||||
try:
|
||||
if node.index_template_node.link_documents:
|
||||
result = node.documents.get(label=part)
|
||||
logger.debug('path %s is a valid file path', path)
|
||||
logger.debug(
|
||||
'path %s is a valid file path', path
|
||||
)
|
||||
self.cache.set(
|
||||
path, {'document_pk': result.pk},
|
||||
setting_document_lookup_cache_timeout.value
|
||||
)
|
||||
|
||||
return result
|
||||
else:
|
||||
return None
|
||||
except Document.DoesNotExist:
|
||||
logger.debug('path %s is a file, but is not found', path)
|
||||
logger.debug(
|
||||
'path %s is a file, but is not found', path
|
||||
)
|
||||
return None
|
||||
|
||||
self.cache.set(
|
||||
path, {'node_pk': node.pk},
|
||||
setting_node_lookup_cache_timeout.value
|
||||
)
|
||||
|
||||
logger.debug('node: %s', node)
|
||||
logger.debug('node is root: %s', node.is_root_node())
|
||||
|
||||
return node
|
||||
|
||||
def __init__(self, index_slug):
|
||||
self.fd_count = MIN_FILE_DESCRIPTOR
|
||||
self.fd = {}
|
||||
self.file_descriptor_count = MIN_FILE_DESCRIPTOR
|
||||
self.file_descriptors = {}
|
||||
self.cache = caches['default']
|
||||
|
||||
try:
|
||||
self.index = Index.objects.get(slug=index_slug)
|
||||
except Index.DoesNotExist:
|
||||
print 'Unknown index.'
|
||||
print 'Unknown index slug: {}.'.format(index_slug)
|
||||
exit(1)
|
||||
|
||||
def access(self, path, fh=None):
|
||||
result = self._path_to_node(
|
||||
path=path, access_only=True, directory_only=False
|
||||
)
|
||||
|
||||
if not result:
|
||||
raise FuseOSError(ENOENT)
|
||||
|
||||
def getattr(self, path, fh=None):
|
||||
logger.debug('path: %s, fh: %s', path, fh)
|
||||
|
||||
@@ -79,42 +135,39 @@ class IndexFS(Operations):
|
||||
|
||||
if isinstance(result, IndexInstanceNode):
|
||||
return {
|
||||
'st_mode': (S_IFDIR | 0555), 'st_ctime': now, 'st_mtime': now,
|
||||
'st_atime': now, 'st_nlink': 2
|
||||
'st_mode': (S_IFDIR | DIRECTORY_MODE), 'st_ctime': now,
|
||||
'st_mtime': now, 'st_atime': now, 'st_nlink': 2
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'st_mode': (S_IFREG | 0555),
|
||||
'st_ctime': (result.date_added.replace(tzinfo=None) - result.date_added.utcoffset() - datetime.datetime(1970, 1, 1)).total_seconds(),
|
||||
'st_mtime': (result.latest_version.timestamp.replace(tzinfo=None) - result.latest_version.timestamp.utcoffset() - datetime.datetime(1970, 1, 1)).total_seconds(),
|
||||
'st_mode': (S_IFREG | FILE_MODE),
|
||||
'st_ctime': (
|
||||
result.date_added.replace(tzinfo=None) - result.date_added.utcoffset() - datetime.datetime(1970, 1, 1)
|
||||
).total_seconds(),
|
||||
'st_mtime': (
|
||||
result.latest_version.timestamp.replace(tzinfo=None) - result.latest_version.timestamp.utcoffset() - datetime.datetime(1970, 1, 1)
|
||||
).total_seconds(),
|
||||
'st_atime': now,
|
||||
'st_size': result.size
|
||||
}
|
||||
|
||||
def getxattr(self, path, name, position=0):
|
||||
return ''
|
||||
|
||||
def open(self, path, flags):
|
||||
result = self._path_to_node(path=path, directory_only=False)
|
||||
|
||||
if isinstance(result, Document):
|
||||
self.fd_count += 1
|
||||
if self.fd_count > MAX_FILE_DESCRIPTOR:
|
||||
self.fb_count = MIN_FILE_DESCRIPTOR
|
||||
# TODO: implement _get_next_file_descriptor()
|
||||
# TODO: don't provide a file descriptor already in use
|
||||
|
||||
self.fd[self.fd_count] = result.open()
|
||||
return self.fd_count
|
||||
next_file_descriptor = self._get_next_file_descriptor()
|
||||
self.file_descriptors[next_file_descriptor] = result.open()
|
||||
return next_file_descriptor
|
||||
else:
|
||||
raiseFuseOSError(ENOENT)
|
||||
raise FuseOSError(ENOENT)
|
||||
|
||||
def release(self, path, fh):
|
||||
self.fd[fh] = None
|
||||
del(self.fd[fh])
|
||||
self.file_descriptors[fh] = None
|
||||
del(self.file_descriptors[fh])
|
||||
|
||||
def read(self, path, size, offset, fh):
|
||||
return self.fd[self.fd_count].read(size)
|
||||
self.file_descriptors[fh].seek(offset)
|
||||
return self.file_descriptors[fh].read(size)
|
||||
|
||||
def readdir(self, path, fh):
|
||||
logger.debug('path: %s', path)
|
||||
@@ -122,40 +175,32 @@ class IndexFS(Operations):
|
||||
node = self._path_to_node(path=path, directory_only=True)
|
||||
|
||||
if not node:
|
||||
raiseFuseOSError(ENOENT)
|
||||
raise FuseOSError(ENOENT)
|
||||
|
||||
result = ['.', '..']
|
||||
yield '.'
|
||||
yield '..'
|
||||
|
||||
for child_node in node.get_children().values_list('value', flat=True):
|
||||
if '/' not in child_node:
|
||||
result.append(child_node)
|
||||
yield child_node
|
||||
|
||||
if node.index_template_node.link_documents:
|
||||
for document in node.documents.all():
|
||||
if '/' not in document.label:
|
||||
result.append(document.label)
|
||||
|
||||
return result
|
||||
for document_label in node.documents.values_list('label', flat=True):
|
||||
if '/' not in document_label:
|
||||
yield document_label
|
||||
|
||||
|
||||
class Command(management.BaseCommand):
|
||||
help = 'Mount an index as a FUSE filesystem.'
|
||||
|
||||
option_list = management.BaseCommand.option_list + (
|
||||
make_option(
|
||||
'--index',
|
||||
action='store',
|
||||
dest='index',
|
||||
help='Index to mirror at the mount point.'
|
||||
),
|
||||
|
||||
make_option(
|
||||
'--mountpoint',
|
||||
action='store',
|
||||
dest='mountpoint',
|
||||
help='Filesystem location at which to mount the selected index.'
|
||||
),
|
||||
)
|
||||
usage_str = 'Usage: ./manage.py mountindex [index slug] [mount point]'
|
||||
args = '[index slug] [mount point]'
|
||||
|
||||
def handle(self, *args, **options):
|
||||
fuse = FUSE(operations=IndexFS(index_slug=options['index']), mountpoint=options['mountpoint'], foreground=True)
|
||||
if len(args) != 2:
|
||||
print('Incorrect number of arguments')
|
||||
exit(1)
|
||||
|
||||
FUSE(
|
||||
operations=IndexFS(index_slug=args[0]), mountpoint=args[1],
|
||||
nothreads=True, foreground=True
|
||||
)
|
||||
|
||||
16
mayan/apps/mirroring/settings.py
Normal file
16
mayan/apps/mirroring/settings.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from smart_settings import Namespace
|
||||
|
||||
namespace = Namespace(name='mirroring', label=_('Mirroring'))
|
||||
|
||||
setting_document_lookup_cache_timeout = namespace.add_setting(
|
||||
global_name='MIRRORING_DOCUMENT_CACHE_LOOKUP_TIMEOUT', default=10,
|
||||
help_text=_('Time in seconds to cache the path lookup to a document.'),
|
||||
)
|
||||
setting_node_lookup_cache_timeout = namespace.add_setting(
|
||||
global_name='MIRRORING_NODE_CACHE_LOOKUP_TIMEOUT', default=10,
|
||||
help_text=_('Time in seconds to cache the path lookup to an index node.'),
|
||||
)
|
||||
Reference in New Issue
Block a user