Add support for exporting an index as a FUSE filesystem. Closes gh-issue #173.

This commit is contained in:
Roberto Rosario
2015-07-29 13:30:35 -04:00
parent 22f823aca2
commit ab35118b01
5 changed files with 123 additions and 60 deletions

View File

@@ -72,6 +72,7 @@ What's new in Mayan EDMS v2.0
* RGB tags
* ``performupgrade`` management command.
* Removal of eval from metadata type defaults and lookup fields. Django's own template language is now used instead.
* Support to share an index as a FUSE filesystem.
Upgrading from a previous version
=================================

View File

@@ -3,8 +3,6 @@ from __future__ import unicode_literals
from django import apps
from django.utils.translation import ugettext_lazy as _
from common import MayanAppConfig
class MirroringApp(apps.AppConfig):
name = 'mirroring'

View File

@@ -0,0 +1,3 @@
MAX_FILE_DESCRIPTOR = 65535
MIN_FILE_DESCRIPTOR = 0
FILE_MODE = DIRECTORY_MODE = 0555

View File

@@ -3,27 +3,41 @@ from __future__ import unicode_literals
import datetime
from errno import ENOENT
import logging
from optparse import make_option
from stat import S_IFDIR, S_IFREG
from time import time
from fuse import FUSE, FuseOSError, Operations
import pytz
from django.core import management
from djcelery.models import IntervalSchedule, PeriodicTask
from django.core.cache import caches
from document_indexing.models import Index, IndexInstanceNode
from documents.models import Document
MAX_FILE_DESCRIPTOR = 65535
MIN_FILE_DESCRIPTOR = 0
from ...literals import (
MAX_FILE_DESCRIPTOR, MIN_FILE_DESCRIPTOR, FILE_MODE, DIRECTORY_MODE
)
from ...settings import (
setting_document_lookup_cache_timeout, setting_node_lookup_cache_timeout
)
logger = logging.getLogger(__name__)
class IndexFS(Operations):
def _path_to_node(self, path, directory_only=True):
def _get_next_file_descriptor(self):
while(True):
self.file_descriptor_count += 1
if self.file_descriptor_count > MAX_FILE_DESCRIPTOR:
self.file_descriptor_count = MIN_FILE_DESCRIPTOR
try:
if not self.file_descriptors[self.file_descriptor_count]:
return self.file_descriptor_count
except KeyError:
return self.file_descriptor_count
def _path_to_node(self, path, access_only=False, directory_only=True):
logger.debug('path: %s', path)
logger.debug('directory_only: %s', directory_only)
@@ -34,9 +48,27 @@ class IndexFS(Operations):
node = self.index.instance_root
if len(parts) > 1 and parts[1] != '':
for part in parts[1:]:
obj = self.cache.get(path)
if obj:
node_pk = obj.get('node_pk')
if node_pk:
if access_only:
return True
else:
return IndexInstanceNode.objects.get(pk=node_pk)
document_pk = obj.get('document_pk')
if document_pk:
if access_only:
return True
else:
return Document.objects.get(pk=document_pk)
for count, part in enumerate(parts[1:]):
try:
node = node.children.get(value=part)
except IndexInstanceNode.DoesNotExist:
logger.debug('%s does not exists', part)
@@ -46,28 +78,52 @@ class IndexFS(Operations):
try:
if node.index_template_node.link_documents:
result = node.documents.get(label=part)
logger.debug('path %s is a valid file path', path)
logger.debug(
'path %s is a valid file path', path
)
self.cache.set(
path, {'document_pk': result.pk},
setting_document_lookup_cache_timeout.value
)
return result
else:
return None
except Document.DoesNotExist:
logger.debug('path %s is a file, but is not found', path)
logger.debug(
'path %s is a file, but is not found', path
)
return None
self.cache.set(
path, {'node_pk': node.pk},
setting_node_lookup_cache_timeout.value
)
logger.debug('node: %s', node)
logger.debug('node is root: %s', node.is_root_node())
return node
def __init__(self, index_slug):
self.fd_count = MIN_FILE_DESCRIPTOR
self.fd = {}
self.file_descriptor_count = MIN_FILE_DESCRIPTOR
self.file_descriptors = {}
self.cache = caches['default']
try:
self.index = Index.objects.get(slug=index_slug)
except Index.DoesNotExist:
print 'Unknown index.'
print 'Unknown index slug: {}.'.format(index_slug)
exit(1)
def access(self, path, fh=None):
result = self._path_to_node(
path=path, access_only=True, directory_only=False
)
if not result:
raise FuseOSError(ENOENT)
def getattr(self, path, fh=None):
logger.debug('path: %s, fh: %s', path, fh)
@@ -79,42 +135,39 @@ class IndexFS(Operations):
if isinstance(result, IndexInstanceNode):
return {
'st_mode': (S_IFDIR | 0555), 'st_ctime': now, 'st_mtime': now,
'st_atime': now, 'st_nlink': 2
'st_mode': (S_IFDIR | DIRECTORY_MODE), 'st_ctime': now,
'st_mtime': now, 'st_atime': now, 'st_nlink': 2
}
else:
return {
'st_mode': (S_IFREG | 0555),
'st_ctime': (result.date_added.replace(tzinfo=None) - result.date_added.utcoffset() - datetime.datetime(1970, 1, 1)).total_seconds(),
'st_mtime': (result.latest_version.timestamp.replace(tzinfo=None) - result.latest_version.timestamp.utcoffset() - datetime.datetime(1970, 1, 1)).total_seconds(),
'st_mode': (S_IFREG | FILE_MODE),
'st_ctime': (
result.date_added.replace(tzinfo=None) - result.date_added.utcoffset() - datetime.datetime(1970, 1, 1)
).total_seconds(),
'st_mtime': (
result.latest_version.timestamp.replace(tzinfo=None) - result.latest_version.timestamp.utcoffset() - datetime.datetime(1970, 1, 1)
).total_seconds(),
'st_atime': now,
'st_size': result.size
}
def getxattr(self, path, name, position=0):
return ''
def open(self, path, flags):
result = self._path_to_node(path=path, directory_only=False)
if isinstance(result, Document):
self.fd_count += 1
if self.fd_count > MAX_FILE_DESCRIPTOR:
self.fb_count = MIN_FILE_DESCRIPTOR
# TODO: implement _get_next_file_descriptor()
# TODO: don't provide a file descriptor already in use
self.fd[self.fd_count] = result.open()
return self.fd_count
next_file_descriptor = self._get_next_file_descriptor()
self.file_descriptors[next_file_descriptor] = result.open()
return next_file_descriptor
else:
raiseFuseOSError(ENOENT)
raise FuseOSError(ENOENT)
def release(self, path, fh):
self.fd[fh] = None
del(self.fd[fh])
self.file_descriptors[fh] = None
del(self.file_descriptors[fh])
def read(self, path, size, offset, fh):
return self.fd[self.fd_count].read(size)
self.file_descriptors[fh].seek(offset)
return self.file_descriptors[fh].read(size)
def readdir(self, path, fh):
logger.debug('path: %s', path)
@@ -122,40 +175,32 @@ class IndexFS(Operations):
node = self._path_to_node(path=path, directory_only=True)
if not node:
raiseFuseOSError(ENOENT)
raise FuseOSError(ENOENT)
result = ['.', '..']
yield '.'
yield '..'
for child_node in node.get_children().values_list('value', flat=True):
if '/' not in child_node:
result.append(child_node)
yield child_node
if node.index_template_node.link_documents:
for document in node.documents.all():
if '/' not in document.label:
result.append(document.label)
return result
for document_label in node.documents.values_list('label', flat=True):
if '/' not in document_label:
yield document_label
class Command(management.BaseCommand):
help = 'Mount an index as a FUSE filesystem.'
option_list = management.BaseCommand.option_list + (
make_option(
'--index',
action='store',
dest='index',
help='Index to mirror at the mount point.'
),
make_option(
'--mountpoint',
action='store',
dest='mountpoint',
help='Filesystem location at which to mount the selected index.'
),
)
usage_str = 'Usage: ./manage.py mountindex [index slug] [mount point]'
args = '[index slug] [mount point]'
def handle(self, *args, **options):
fuse = FUSE(operations=IndexFS(index_slug=options['index']), mountpoint=options['mountpoint'], foreground=True)
if len(args) != 2:
print('Incorrect number of arguments')
exit(1)
FUSE(
operations=IndexFS(index_slug=args[0]), mountpoint=args[1],
nothreads=True, foreground=True
)

View File

@@ -0,0 +1,16 @@
from __future__ import unicode_literals
from django.utils.translation import ugettext_lazy as _
from smart_settings import Namespace
namespace = Namespace(name='mirroring', label=_('Mirroring'))
setting_document_lookup_cache_timeout = namespace.add_setting(
global_name='MIRRORING_DOCUMENT_CACHE_LOOKUP_TIMEOUT', default=10,
help_text=_('Time in seconds to cache the path lookup to a document.'),
)
setting_node_lookup_cache_timeout = namespace.add_setting(
global_name='MIRRORING_NODE_CACHE_LOOKUP_TIMEOUT', default=10,
help_text=_('Time in seconds to cache the path lookup to an index node.'),
)