Merge branch 'development' into feature/document_states
Conflicts: mayan/urls.py
This commit is contained in:
@@ -1,3 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
DJANGO_SETTINGS_MODULE='mayan.settings.celery_redis' celery -A mayan worker -l DEBUG -Q checkouts,mailing,uploads,converter,ocr,tools,indexing,metadata -Ofair
|
||||
DJANGO_SETTINGS_MODULE='mayan.settings.celery_redis' celery -A mayan worker -l DEBUG -Q checkouts,mailing,uploads,converter,ocr,tools,indexing,metadata -Ofair -B
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
sudo apt-get update
|
||||
sudo apt-get -y upgrade
|
||||
sudo apt-get -y install git-core python-virtualenv gcc python-dev libjpeg-dev libpng-dev libtiff-dev tesseract-ocr poppler-utils unpaper redis-server
|
||||
sudo apt-get -y install git-core python-virtualenv gcc python-dev libjpeg-dev libpng-dev libtiff-dev tesseract-ocr poppler-utils unpaper redis-server libreoffice
|
||||
git clone /mayan-edms-repository/ /home/vagrant/mayan-edms
|
||||
cd /home/vagrant/mayan-edms
|
||||
git checkout development
|
||||
|
||||
@@ -99,7 +99,7 @@ Then on a separate console launch a celery worker from the same provisioned Vagr
|
||||
$ vagrant ssh
|
||||
vagrant@vagrant-ubuntu-trusty-32:~$ cd ~/mayan-edms/
|
||||
vagrant@vagrant-ubuntu-trusty-32:~$ source venv/bin/activate
|
||||
vagrant@vagrant-ubuntu-trusty-32:~$ DJANGO_SETTINGS_MODULE='mayan.settings.celery_redis' celery -A mayan worker -l DEBUG -Q checkouts,mailing,uploads,converter,ocr,tools,indexing,metadata -Ofair
|
||||
vagrant@vagrant-ubuntu-trusty-32:~$ DJANGO_SETTINGS_MODULE='mayan.settings.celery_redis' celery -A mayan worker -l DEBUG -Q checkouts,mailing,uploads,converter,ocr,tools,indexing,metadata -Ofair -B
|
||||
|
||||
|
||||
Contributing changes
|
||||
|
||||
@@ -77,10 +77,8 @@ register_links(Document, [document_events_view, document_version_list], menu_nam
|
||||
|
||||
# Document Version links
|
||||
register_links(DocumentVersion, [document_version_revert, document_version_download])
|
||||
secondary_menu_links = [document_list_recent, document_list]
|
||||
# TODO: register this at sources app too
|
||||
register_links(['documents:document_list_recent', 'documents:document_list', 'sources:document_create', 'sources:document_create_multiple', 'sources:upload_interactive', 'sources:staging_file_delete'], secondary_menu_links, menu_name='secondary_menu')
|
||||
register_links(Document, secondary_menu_links, menu_name='secondary_menu')
|
||||
register_links(['documents:document_list_recent', 'documents:document_list'], [document_list_recent, document_list], menu_name='secondary_menu')
|
||||
register_links(Document, [document_list_recent, document_list], menu_name='secondary_menu')
|
||||
|
||||
# Document page links
|
||||
register_links(DocumentPage, [
|
||||
|
||||
@@ -193,12 +193,11 @@ class Document(models.Model):
|
||||
if not self.is_new_versions_allowed(user=user):
|
||||
raise NewDocumentVersionNotAllowed
|
||||
|
||||
new_version = DocumentVersion(
|
||||
new_version = DocumentVersion.objects.create(
|
||||
document=self,
|
||||
file=file_object,
|
||||
comment=comment or '',
|
||||
)
|
||||
new_version.save()
|
||||
|
||||
logger.debug('new_version saved')
|
||||
|
||||
@@ -301,6 +300,9 @@ class DocumentVersion(models.Model):
|
||||
verbose_name = _(u'Document version')
|
||||
verbose_name_plural = _(u'Document version')
|
||||
|
||||
def __unicode__(self):
|
||||
return u'{0} - {1}'.format(self.document, self.timestamp)
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
"""
|
||||
Overloaded save method that updates the document version's checksum,
|
||||
|
||||
@@ -0,0 +1,123 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from south.utils import datetime_utils as datetime
|
||||
from south.db import db
|
||||
from south.v2 import DataMigration
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(DataMigration):
|
||||
|
||||
def forwards(self, orm):
|
||||
"Write your forwards methods here."
|
||||
# Note: Don't use "from appname.models import ModelName".
|
||||
# Use orm.ModelName to refer to models in this application,
|
||||
# and orm['appname.ModelName'] for models in other applications.
|
||||
try:
|
||||
orphan_document_type = orm.DocumentType.objects.get(name='_orphan_document_')
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
if not orphan_document_type.documents.count():
|
||||
orphan_document_type.delete()
|
||||
|
||||
def backwards(self, orm):
|
||||
"Write your backwards methods here."
|
||||
|
||||
models = {
|
||||
u'auth.group': {
|
||||
'Meta': {'object_name': 'Group'},
|
||||
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}),
|
||||
'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': u"orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'})
|
||||
},
|
||||
u'auth.permission': {
|
||||
'Meta': {'ordering': "(u'content_type__app_label', u'content_type__model', u'codename')", 'unique_together': "((u'content_type', u'codename'),)", 'object_name': 'Permission'},
|
||||
'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
|
||||
'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['contenttypes.ContentType']"}),
|
||||
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'name': ('django.db.models.fields.CharField', [], {'max_length': '50'})
|
||||
},
|
||||
u'auth.user': {
|
||||
'Meta': {'object_name': 'User'},
|
||||
'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
|
||||
'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}),
|
||||
'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
|
||||
'groups': ('django.db.models.fields.related.ManyToManyField', [], {'symmetrical': 'False', 'related_name': "u'user_set'", 'blank': 'True', 'to': u"orm['auth.Group']"}),
|
||||
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
|
||||
'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||
'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||
'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
|
||||
'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
|
||||
'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
|
||||
'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'symmetrical': 'False', 'related_name': "u'user_set'", 'blank': 'True', 'to': u"orm['auth.Permission']"}),
|
||||
'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'})
|
||||
},
|
||||
u'contenttypes.contenttype': {
|
||||
'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"},
|
||||
'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
|
||||
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
|
||||
'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
|
||||
},
|
||||
u'documents.document': {
|
||||
'Meta': {'ordering': "['-date_added']", 'object_name': 'Document'},
|
||||
'date_added': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
|
||||
'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
|
||||
'document_type': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'documents'", 'to': u"orm['documents.DocumentType']"}),
|
||||
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'label': ('django.db.models.fields.CharField', [], {'default': "u'Uninitialized document'", 'max_length': '255', 'db_index': 'True'}),
|
||||
'language': ('django.db.models.fields.CharField', [], {'default': "u'eng'", 'max_length': '8'}),
|
||||
'uuid': ('django.db.models.fields.CharField', [], {'default': "u'26db4eb3-1050-4d26-8324-74b09d61991f'", 'max_length': '48'})
|
||||
},
|
||||
u'documents.documentpage': {
|
||||
'Meta': {'ordering': "['page_number']", 'object_name': 'DocumentPage'},
|
||||
'content': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
|
||||
'document_version': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'pages'", 'to': u"orm['documents.DocumentVersion']"}),
|
||||
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'page_label': ('django.db.models.fields.CharField', [], {'max_length': '40', 'null': 'True', 'blank': 'True'}),
|
||||
'page_number': ('django.db.models.fields.PositiveIntegerField', [], {'default': '1', 'db_index': 'True'})
|
||||
},
|
||||
u'documents.documentpagetransformation': {
|
||||
'Meta': {'ordering': "('order',)", 'object_name': 'DocumentPageTransformation'},
|
||||
'arguments': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
|
||||
'document_page': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['documents.DocumentPage']"}),
|
||||
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'order': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0', 'null': 'True', 'db_index': 'True', 'blank': 'True'}),
|
||||
'transformation': ('django.db.models.fields.CharField', [], {'max_length': '128'})
|
||||
},
|
||||
u'documents.documenttype': {
|
||||
'Meta': {'ordering': "['name']", 'object_name': 'DocumentType'},
|
||||
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32'}),
|
||||
'ocr': ('django.db.models.fields.BooleanField', [], {'default': 'True'})
|
||||
},
|
||||
u'documents.documenttypefilename': {
|
||||
'Meta': {'ordering': "['filename']", 'unique_together': "(('document_type', 'filename'),)", 'object_name': 'DocumentTypeFilename'},
|
||||
'document_type': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'filenames'", 'to': u"orm['documents.DocumentType']"}),
|
||||
'enabled': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
|
||||
'filename': ('django.db.models.fields.CharField', [], {'max_length': '128', 'db_index': 'True'}),
|
||||
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
|
||||
},
|
||||
u'documents.documentversion': {
|
||||
'Meta': {'object_name': 'DocumentVersion'},
|
||||
'checksum': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
|
||||
'comment': ('django.db.models.fields.TextField', [], {'blank': 'True'}),
|
||||
'document': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'versions'", 'to': u"orm['documents.Document']"}),
|
||||
'encoding': ('django.db.models.fields.CharField', [], {'max_length': '64', 'null': 'True', 'blank': 'True'}),
|
||||
'file': ('django.db.models.fields.files.FileField', [], {'max_length': '100'}),
|
||||
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'mimetype': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
|
||||
'timestamp': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'})
|
||||
},
|
||||
u'documents.recentdocument': {
|
||||
'Meta': {'ordering': "('-datetime_accessed',)", 'object_name': 'RecentDocument'},
|
||||
'datetime_accessed': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'db_index': 'True', 'blank': 'True'}),
|
||||
'document': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['documents.Document']"}),
|
||||
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'user': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['auth.User']"})
|
||||
}
|
||||
}
|
||||
|
||||
complete_apps = ['documents']
|
||||
symmetrical = True
|
||||
@@ -2,7 +2,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from django.db import models
|
||||
from django.utils.encoding import python_2_unicode_compatible
|
||||
from django.utils.translation import ugettext as _
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from actstream.models import Action
|
||||
|
||||
|
||||
@@ -4,11 +4,12 @@ from django.conf import settings
|
||||
from django.contrib import messages
|
||||
from django.core.exceptions import PermissionDenied
|
||||
from django.core.urlresolvers import reverse
|
||||
from django.http import HttpResponseRedirect
|
||||
from django.shortcuts import get_object_or_404, render_to_response
|
||||
from django.http import HttpResponseRedirect, Http404
|
||||
from django.shortcuts import (get_list_or_404, get_object_or_404,
|
||||
render_to_response)
|
||||
from django.template import RequestContext
|
||||
from django.utils.http import urlencode
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
from django.utils.translation import ugettext_lazy as _, ungettext
|
||||
|
||||
from acls.models import AccessEntry
|
||||
from documents.models import Document, DocumentType
|
||||
@@ -36,15 +37,9 @@ from .permissions import (PERMISSION_METADATA_DOCUMENT_ADD,
|
||||
|
||||
def metadata_edit(request, document_id=None, document_id_list=None):
|
||||
if document_id:
|
||||
documents = [get_object_or_404(Document, pk=document_id)]
|
||||
if documents[0].metadata.count() == 0:
|
||||
messages.warning(request, _(u'The selected document doesn\'t have any metadata.'))
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
elif document_id_list:
|
||||
documents = [get_object_or_404(Document.objects.select_related('document_type'), pk=document_id) for document_id in document_id_list.split(',')]
|
||||
if len(set([document.document_type.pk for document in documents])) > 1:
|
||||
messages.error(request, _(u'Only select documents of the same type.'))
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
document_id_list = unicode(document_id)
|
||||
|
||||
documents = Document.objects.select_related('metadata').filter(pk__in=document_id_list.split(','))
|
||||
|
||||
try:
|
||||
Permission.objects.check_permissions(request.user, [PERMISSION_METADATA_DOCUMENT_EDIT])
|
||||
@@ -52,7 +47,23 @@ def metadata_edit(request, document_id=None, document_id_list=None):
|
||||
documents = AccessEntry.objects.filter_objects_by_access(PERMISSION_METADATA_DOCUMENT_EDIT, request.user, documents)
|
||||
|
||||
if not documents:
|
||||
messages.error(request, _(u'Must provide at least one document.'))
|
||||
if document_id:
|
||||
raise Http404
|
||||
else:
|
||||
messages.error(request, _(u'Must provide at least one document.'))
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
|
||||
if len(set([document.document_type.pk for document in documents])) > 1:
|
||||
messages.error(request, _(u'Only select documents of the same type.'))
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
|
||||
if set(documents.values_list('metadata__value' ,flat=True)) == set([None]):
|
||||
message = ungettext(
|
||||
u'The selected document doesn\'t have any metadata.',
|
||||
u'The selected documents doesn\'t have any metadata.',
|
||||
len(documents)
|
||||
)
|
||||
messages.warning(request, message)
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
|
||||
post_action_redirect = reverse('documents:document_list_recent')
|
||||
@@ -111,11 +122,18 @@ def metadata_edit(request, document_id=None, document_id_list=None):
|
||||
'form': formset,
|
||||
'next': next,
|
||||
}
|
||||
|
||||
if len(documents) == 1:
|
||||
context['object'] = documents[0]
|
||||
context['title'] = _(u'Edit metadata for document: %s') % ', '.join([unicode(d) for d in documents])
|
||||
elif len(documents) > 1:
|
||||
context['title'] = _(u'Edit metadata for documents: %s') % ', '.join([unicode(d) for d in documents])
|
||||
|
||||
context['title'] = ungettext(
|
||||
u'Edit metadata for document: %(document)s',
|
||||
u'Edit metadata for the %(count)d selected documents',
|
||||
len(documents)
|
||||
) % {
|
||||
u'count': len(documents),
|
||||
u'document': documents[0],
|
||||
}
|
||||
|
||||
return render_to_response('main/generic_form.html', context,
|
||||
context_instance=RequestContext(request))
|
||||
@@ -189,11 +207,18 @@ def metadata_add(request, document_id=None, document_id_list=None):
|
||||
'form': form,
|
||||
'next': next,
|
||||
}
|
||||
|
||||
if len(documents) == 1:
|
||||
context['object'] = documents[0]
|
||||
context['title'] = _(u'Add metadata type to document: %s') % ', '.join([unicode(d) for d in documents])
|
||||
elif len(documents) > 1:
|
||||
context['title'] = _(u'Add metadata type to documents: %s') % ', '.join([unicode(d) for d in documents])
|
||||
|
||||
context['title'] = ungettext(
|
||||
u'Add metadata types to document: %(document)s',
|
||||
u'Add metadata types to the %(count)d selected documents',
|
||||
len(documents)
|
||||
) % {
|
||||
u'count': len(documents),
|
||||
u'document': documents[0],
|
||||
}
|
||||
|
||||
return render_to_response('main/generic_form.html', context,
|
||||
context_instance=RequestContext(request))
|
||||
@@ -205,24 +230,33 @@ def metadata_multiple_add(request):
|
||||
|
||||
def metadata_remove(request, document_id=None, document_id_list=None):
|
||||
if document_id:
|
||||
documents = [get_object_or_404(Document, pk=document_id)]
|
||||
if documents[0].metadata.count() == 0:
|
||||
messages.warning(request, _(u'The selected document doesn\'t have any metadata.'))
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
document_id_list = unicode(document_id)
|
||||
|
||||
elif document_id_list:
|
||||
documents = [get_object_or_404(Document.objects.select_related('document_type'), pk=document_id) for document_id in document_id_list.split(',')]
|
||||
if len(set([document.document_type.pk for document in documents])) > 1:
|
||||
messages.error(request, _(u'Only select documents of the same type.'))
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
documents = Document.objects.select_related('metadata').filter(pk__in=document_id_list.split(','))
|
||||
|
||||
try:
|
||||
Permission.objects.check_permissions(request.user, [PERMISSION_METADATA_DOCUMENT_REMOVE])
|
||||
Permission.objects.check_permissions(request.user, [PERMISSION_METADATA_DOCUMENT_EDIT])
|
||||
except PermissionDenied:
|
||||
documents = AccessEntry.objects.filter_objects_by_access(PERMISSION_METADATA_DOCUMENT_REMOVE, request.user, documents)
|
||||
documents = AccessEntry.objects.filter_objects_by_access(PERMISSION_METADATA_DOCUMENT_EDIT, request.user, documents)
|
||||
|
||||
if not documents:
|
||||
messages.error(request, _(u'Must provide at least one document.'))
|
||||
if document_id:
|
||||
raise Http404
|
||||
else:
|
||||
messages.error(request, _(u'Must provide at least one document.'))
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
|
||||
if len(set([document.document_type.pk for document in documents])) > 1:
|
||||
messages.error(request, _(u'Only select documents of the same type.'))
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
|
||||
if set(documents.values_list('metadata__value' ,flat=True)) == set([None]):
|
||||
message = ungettext(
|
||||
u'The selected document doesn\'t have any metadata.',
|
||||
u'The selected documents doesn\'t have any metadata.',
|
||||
len(documents)
|
||||
)
|
||||
messages.warning(request, message)
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
|
||||
post_action_redirect = reverse('documents:document_list_recent')
|
||||
@@ -273,11 +307,18 @@ def metadata_remove(request, document_id=None, document_id_list=None):
|
||||
'form': formset,
|
||||
'next': next,
|
||||
}
|
||||
|
||||
if len(documents) == 1:
|
||||
context['object'] = documents[0]
|
||||
context['title'] = _(u'Remove metadata types from document: %s') % ', '.join([unicode(d) for d in documents])
|
||||
elif len(documents) > 1:
|
||||
context['title'] = _(u'Remove metadata types from documents: %s') % ', '.join([unicode(d) for d in documents])
|
||||
|
||||
context['title'] = ungettext(
|
||||
u'Remove metadata types from document: %(document)s',
|
||||
u'Remove metadata types from the %(count)d selected documents',
|
||||
len(documents)
|
||||
) % {
|
||||
u'count': len(documents),
|
||||
u'document': documents[0],
|
||||
}
|
||||
|
||||
return render_to_response('main/generic_form.html', context,
|
||||
context_instance=RequestContext(request))
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
import os
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
try:
|
||||
import magic
|
||||
USE_PYTHON_MAGIC = True
|
||||
@@ -11,83 +9,6 @@ except:
|
||||
USE_PYTHON_MAGIC = False
|
||||
|
||||
|
||||
MIMETYPE_ICONS_DIRECTORY_NAME = os.path.join('images', 'mimetypes')
|
||||
|
||||
UNKNWON_TYPE_FILE_NAME = 'unknown.png'
|
||||
ERROR_FILE_NAME = 'error.png'
|
||||
|
||||
mimetype_icons = {
|
||||
'application/pdf': 'file_extension_pdf.png',
|
||||
'application/zip': 'file_extension_zip.png',
|
||||
'application/ogg': 'file_extension_ogg.png',
|
||||
'application/postscript': 'file_extension_ps.png',
|
||||
'application/x-gzip': 'file_extension_gz.png',
|
||||
'application/x-rar-compressed': 'file_extension_rar.png',
|
||||
'application/x-troff-msvideo': 'file_extension_avi.png',
|
||||
'application/acad': 'file_extension_dwg.png',
|
||||
'application/octet-stream': 'file_extension_exe.png',
|
||||
'application/vnd.oasis.opendocument.text': 'ODF_textdocument_32x32.png',
|
||||
'application/vnd.oasis.opendocument.spreadsheet': 'ODF_spreadsheet_32x32.png',
|
||||
'application/vnd.oasis.opendocument.presentation': 'ODF_presentation_32x32.png',
|
||||
'application/vnd.oasis.opendocument.graphics': 'ODF_drawing_32x32.png',
|
||||
'application/vnd.ms-excel': 'file_extension_xls.png',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'file_extension_xls.png',
|
||||
'application/msword': 'file_extension_doc.png',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'file_extension_doc.png',
|
||||
'application/mspowerpoint': 'file_extension_pps.png',
|
||||
'application/vnd.ms-powerpoint': 'file_extension_pps.png',
|
||||
'application/wav': 'file_extension_wav.png',
|
||||
'application/x-wav': 'file_extension_wav.png',
|
||||
'application/vnd.oasis.opendocument.text': 'ODF_textdocument_32x32.png',
|
||||
|
||||
'image/jpeg': 'file_extension_jpeg.png',
|
||||
'image/png': 'file_extension_png.png',
|
||||
'image/x-png': 'file_extension_png.png',
|
||||
'image/tiff': 'file_extension_tif.png',
|
||||
'image/x-tiff': 'file_extension_tif.png',
|
||||
'image/bmp': 'file_extension_bmp.png',
|
||||
'image/gif': 'file_extension_gif.png',
|
||||
'image/vnd.dwg': 'file_extension_dwg.png',
|
||||
'image/x-dwg': 'file_extension_dwg.png',
|
||||
|
||||
'audio/mpeg': 'file_extension_mp3.png',
|
||||
'audio/mid': 'file_extension_mid.png',
|
||||
'audio/x-wav': 'file_extension_wav.png',
|
||||
'audio/vnd.wav': 'file_extension_wav.png',
|
||||
'audio/x-pn-realaudio': 'file_extension_ram.png',
|
||||
'audio/mp4': 'file_extension_mp4.png',
|
||||
'audio/x-ms-wma': 'file_extension_wma.png',
|
||||
|
||||
'video/avi': 'file_extension_avi.png',
|
||||
'video/mpeg': 'file_extension_mpeg.png',
|
||||
'video/quicktime': 'file_extension_mov.png',
|
||||
'video/x-ms-asf': 'file_extension_asf.png',
|
||||
'video/x-ms-wmv': 'file_extension_wmv.png',
|
||||
|
||||
'text/html': 'file_extension_html.png',
|
||||
'text/plain': 'file_extension_txt.png',
|
||||
}
|
||||
|
||||
|
||||
def get_icon_file_path(mimetype):
|
||||
file_name = mimetype_icons.get(mimetype, UNKNWON_TYPE_FILE_NAME)
|
||||
if settings.DEBUG:
|
||||
return os.path.join(settings.BASE_DIR, 'apps', 'mimetype', 'static', MIMETYPE_ICONS_DIRECTORY_NAME, file_name)
|
||||
else:
|
||||
return os.path.join(settings.STATIC_ROOT, MIMETYPE_ICONS_DIRECTORY_NAME, file_name)
|
||||
|
||||
|
||||
def get_error_icon_file_path():
|
||||
if settings.DEBUG:
|
||||
return os.path.join(settings.BASE_DIR, 'apps', 'mimetype', 'static', MIMETYPE_ICONS_DIRECTORY_NAME, ERROR_FILE_NAME)
|
||||
else:
|
||||
return os.path.join(settings.STATIC_ROOT, MIMETYPE_ICONS_DIRECTORY_NAME, ERROR_FILE_NAME)
|
||||
|
||||
|
||||
def get_error_icon_url():
|
||||
return os.path.join(MIMETYPE_ICONS_DIRECTORY_NAME, ERROR_FILE_NAME)
|
||||
|
||||
|
||||
def get_mimetype(file_description, filepath, mimetype_only=False):
|
||||
"""
|
||||
Determine a file's mimetype by calling the system's libmagic
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
# Create your views here.
|
||||
@@ -1,61 +1,75 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import logging
|
||||
|
||||
from django.dispatch import receiver
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from south.signals import post_migrate
|
||||
|
||||
from acls.api import class_permissions
|
||||
from common.utils import encapsulate
|
||||
from documents.models import Document, DocumentVersion
|
||||
from documents.signals import post_version_upload
|
||||
from documents.widgets import document_link
|
||||
from main.api import register_maintenance_links
|
||||
from navigation.api import register_links
|
||||
from navigation.api import register_links, register_model_list_columns
|
||||
from navigation.links import link_spacer
|
||||
from project_tools.api import register_tool
|
||||
from rest_api.classes import APIEndPoint
|
||||
|
||||
from .links import (all_document_ocr_cleanup, ocr_tool_link,
|
||||
queue_document_list, queue_document_multiple_delete,
|
||||
re_queue_multiple_document, submit_document,
|
||||
submit_document_multiple)
|
||||
from .models import DocumentQueue
|
||||
from .links import (
|
||||
link_document_all_ocr_cleanup, link_document_submit,
|
||||
link_document_submit_multiple, link_entry_delete,
|
||||
link_entry_delete_multiple, link_entry_list, link_entry_re_queue,
|
||||
link_entry_re_queue_multiple
|
||||
)
|
||||
from .models import DocumentVersionOCRError
|
||||
from .permissions import PERMISSION_OCR_DOCUMENT
|
||||
from .tasks import task_do_ocr
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
register_links(Document, [submit_document])
|
||||
register_links([Document], [submit_document_multiple, link_spacer], menu_name='multi_item_links')
|
||||
register_links(['ocr:queue_document_list'], [re_queue_multiple_document, queue_document_multiple_delete])
|
||||
register_links(['ocr:queue_document_list'], [queue_document_list], menu_name='secondary_menu')
|
||||
register_links(Document, [link_document_submit])
|
||||
register_links([Document], [link_document_submit_multiple, link_spacer], menu_name='multi_item_links')
|
||||
|
||||
register_maintenance_links([all_document_ocr_cleanup], namespace='ocr', title=_(u'OCR'))
|
||||
register_links([DocumentVersionOCRError], [link_entry_re_queue_multiple, link_entry_delete_multiple, link_spacer], menu_name='multi_item_links')
|
||||
register_links([DocumentVersionOCRError], [link_entry_re_queue, link_entry_delete])
|
||||
register_links(['ocr:entry_list', 'ocr:entry_delete_multiple', 'ocr:entry_re_queue_multiple', DocumentVersionOCRError], [link_entry_list], menu_name='secondary_menu')
|
||||
register_maintenance_links([link_document_all_ocr_cleanup], namespace='ocr', title=_('OCR'))
|
||||
|
||||
|
||||
def document_ocr_submit(self):
|
||||
task_do_ocr.apply_async(args=[self.pk], queue='ocr')
|
||||
|
||||
|
||||
def document_version_ocr_submit(self):
|
||||
task_do_ocr.apply_async(args=[self.document.pk], queue='ocr')
|
||||
|
||||
|
||||
@receiver(post_version_upload, dispatch_uid='post_version_upload_ocr', sender=DocumentVersion)
|
||||
def post_version_upload_ocr(sender, instance, **kwargs):
|
||||
logger.debug('received post_version_upload')
|
||||
logger.debug('instance: %s', instance)
|
||||
logger.debug('instance pk: %s', instance.pk)
|
||||
if instance.document.document_type.ocr:
|
||||
instance.document.submit_for_ocr()
|
||||
|
||||
|
||||
@receiver(post_migrate, dispatch_uid='create_default_queue')
|
||||
def create_default_queue_signal_handler(sender, **kwargs):
|
||||
if kwargs['app'] == 'ocr':
|
||||
DocumentQueue.objects.get_or_create(name='default')
|
||||
instance.submit_for_ocr()
|
||||
|
||||
|
||||
Document.add_to_class('submit_for_ocr', document_ocr_submit)
|
||||
DocumentVersion.add_to_class('submit_for_ocr', document_version_ocr_submit)
|
||||
|
||||
class_permissions(Document, [PERMISSION_OCR_DOCUMENT])
|
||||
|
||||
register_tool(ocr_tool_link)
|
||||
register_tool(link_entry_list)
|
||||
|
||||
APIEndPoint('ocr')
|
||||
|
||||
register_model_list_columns(DocumentVersionOCRError, [
|
||||
{
|
||||
'name': _('Document'), 'attribute': encapsulate(lambda entry: document_link(entry.document_version.document))
|
||||
},
|
||||
{
|
||||
'name': _('Added'), 'attribute': 'datetime_submitted'
|
||||
},
|
||||
{
|
||||
'name': _('Result'), 'attribute': 'result'
|
||||
},
|
||||
])
|
||||
|
||||
@@ -1,20 +1,13 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.contrib import admin
|
||||
|
||||
from .models import DocumentQueue, QueueDocument
|
||||
from .models import DocumentVersionOCRError
|
||||
|
||||
|
||||
class QueueDocumentInline(admin.StackedInline):
|
||||
model = QueueDocument
|
||||
extra = 1
|
||||
classes = ('collapse-open',)
|
||||
allow_add = True
|
||||
class DocumentVersionOCRErrorAdmin(admin.ModelAdmin):
|
||||
list_display = ('document_version', 'datetime_submitted')
|
||||
readonly_fields = ('document_version', 'datetime_submitted', 'result')
|
||||
|
||||
|
||||
class DocumentQueueAdmin(admin.ModelAdmin):
|
||||
inlines = [QueueDocumentInline]
|
||||
list_display = ('name', 'label')
|
||||
|
||||
|
||||
admin.site.register(DocumentQueue, DocumentQueueAdmin)
|
||||
admin.site.register(DocumentVersionOCRError, DocumentVersionOCRErrorAdmin)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import logging
|
||||
import os
|
||||
@@ -30,14 +30,14 @@ except sh.CommandNotFound:
|
||||
UNPAPER = None
|
||||
|
||||
|
||||
def do_document_ocr(document):
|
||||
def do_document_ocr(document_version):
|
||||
"""
|
||||
Try first to extract text from document pages using the registered
|
||||
parser, if the parser fails or if there is no parser registered for
|
||||
the document mimetype do a visual OCR by calling the corresponding
|
||||
OCR backend
|
||||
"""
|
||||
for document_page in document.pages.all():
|
||||
for document_page in document_version.pages.all():
|
||||
try:
|
||||
# Try to extract text by means of a parser
|
||||
parse_document_page(document_page)
|
||||
@@ -68,10 +68,10 @@ def do_document_ocr(document):
|
||||
|
||||
os.rename(pre_ocr_filepath, pre_ocr_filepath_w_ext)
|
||||
try:
|
||||
ocr_text = ocr_backend.execute(pre_ocr_filepath_w_ext, document.language)
|
||||
ocr_text = ocr_backend.execute(pre_ocr_filepath_w_ext, document_version.document.language)
|
||||
|
||||
document_page.content = ocr_cleanup(document.language, ocr_text)
|
||||
document_page.page_label = _(u'Text from OCR')
|
||||
document_page.content = ocr_cleanup(document_version.document.language, ocr_text)
|
||||
document_page.page_label = _('Text from OCR')
|
||||
document_page.save()
|
||||
finally:
|
||||
fs_cleanup(pre_ocr_filepath_w_ext)
|
||||
@@ -86,7 +86,7 @@ def ocr_cleanup(language, text):
|
||||
cleanup filter
|
||||
"""
|
||||
try:
|
||||
language_backend = load_backend(u'.'.join([u'ocr', u'lang', language, u'LanguageBackend']))()
|
||||
language_backend = load_backend('.'.join(['ocr', 'lang', language, 'LanguageBackend']))()
|
||||
except ImportError:
|
||||
language_backend = None
|
||||
|
||||
@@ -104,9 +104,9 @@ def ocr_cleanup(language, text):
|
||||
result = word
|
||||
if result:
|
||||
output.append(result)
|
||||
output.append(u'\n')
|
||||
output.append('\n')
|
||||
|
||||
return u' '.join(output)
|
||||
return ' '.join(output)
|
||||
|
||||
|
||||
def clean_pages():
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
from django.core.exceptions import PermissionDenied
|
||||
from django.shortcuts import get_object_or_404
|
||||
@@ -8,33 +8,33 @@ from rest_framework.response import Response
|
||||
from rest_framework.settings import api_settings
|
||||
|
||||
from acls.models import AccessEntry
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentVersion
|
||||
from permissions.models import Permission
|
||||
from rest_api.permissions import MayanPermission
|
||||
|
||||
from .permissions import PERMISSION_OCR_DOCUMENT
|
||||
from .serializers import DocumentOCRSerializer
|
||||
from .serializers import DocumentVersionOCRSerializer
|
||||
|
||||
|
||||
class DocumentOCRView(generics.GenericAPIView):
|
||||
serializer_class = DocumentOCRSerializer
|
||||
class DocumentVersionOCRView(generics.GenericAPIView):
|
||||
serializer_class = DocumentVersionOCRSerializer
|
||||
|
||||
permission_classes = (MayanPermission,)
|
||||
|
||||
def post(self, request, *args, **kwargs):
|
||||
"""Submit document OCR queue."""
|
||||
"""Submit document version for OCR."""
|
||||
|
||||
serializer = self.get_serializer(data=request.DATA, files=request.FILES)
|
||||
|
||||
if serializer.is_valid():
|
||||
document = get_object_or_404(Document, pk=serializer.data['document_id'])
|
||||
document_version = get_object_or_404(DocumentVersion, pk=serializer.data['document_version_id'])
|
||||
|
||||
try:
|
||||
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT])
|
||||
except PermissionDenied:
|
||||
AccessEntry.objects.check_access(PERMISSION_OCR_DOCUMENT, request.user, document)
|
||||
AccessEntry.objects.check_access(PERMISSION_OCR_DOCUMENT, request.user, document_version.document)
|
||||
|
||||
document.submit_for_ocr()
|
||||
document_version.submit_for_ocr()
|
||||
|
||||
headers = self.get_success_headers(serializer.data)
|
||||
return Response(serializer.data, status=status.HTTP_202_ACCEPTED,
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
class BackendBase(object):
|
||||
def execute(self, input_filename, language=None): # NOQA
|
||||
def execute(self, input_filename, language=None):
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import codecs
|
||||
import errno
|
||||
@@ -20,11 +20,11 @@ class Tesseract(BackendBase):
|
||||
"""
|
||||
fd, filepath = tempfile.mkstemp()
|
||||
os.close(fd)
|
||||
ocr_output = os.extsep.join([filepath, u'txt'])
|
||||
ocr_output = os.extsep.join([filepath, 'txt'])
|
||||
command = [unicode(TESSERACT_PATH), unicode(input_filename), unicode(filepath)]
|
||||
|
||||
if language is not None:
|
||||
command.extend([u'-l', language])
|
||||
command.extend(['-l', language])
|
||||
|
||||
try:
|
||||
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
class OCRError(Exception):
|
||||
"""
|
||||
Raised by the OCR backend
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
@@ -6,14 +6,13 @@ from .permissions import (PERMISSION_OCR_CLEAN_ALL_PAGES,
|
||||
PERMISSION_OCR_DOCUMENT,
|
||||
PERMISSION_OCR_DOCUMENT_DELETE)
|
||||
|
||||
submit_document = {'text': _('Submit to OCR queue'), 'view': 'ocr:submit_document', 'args': 'object.id', 'famfam': 'hourglass_add', 'permissions': [PERMISSION_OCR_DOCUMENT]}
|
||||
submit_document_multiple = {'text': _('Submit to OCR queue'), 'view': 'ocr:submit_document_multiple', 'famfam': 'hourglass_add', 'permissions': [PERMISSION_OCR_DOCUMENT]}
|
||||
re_queue_document = {'text': _('Re-queue'), 'view': 'ocr:re_queue_document', 'args': 'object.id', 'famfam': 'hourglass_add', 'permissions': [PERMISSION_OCR_DOCUMENT]}
|
||||
re_queue_multiple_document = {'text': _('Re-queue'), 'view': 'ocr:re_queue_multiple_document', 'famfam': 'hourglass_add', 'permissions': [PERMISSION_OCR_DOCUMENT]}
|
||||
queue_document_delete = {'text': _(u'Delete'), 'view': 'ocr:queue_document_delete', 'args': 'object.id', 'famfam': 'hourglass_delete', 'permissions': [PERMISSION_OCR_DOCUMENT_DELETE]}
|
||||
queue_document_multiple_delete = {'text': _(u'Delete'), 'view': 'ocr:queue_document_multiple_delete', 'famfam': 'hourglass_delete', 'permissions': [PERMISSION_OCR_DOCUMENT_DELETE]}
|
||||
link_document_submit = {'text': _('Submit to OCR queue'), 'view': 'ocr:document_submit', 'args': 'object.id', 'famfam': 'hourglass_add', 'permissions': [PERMISSION_OCR_DOCUMENT]}
|
||||
link_document_submit_multiple = {'text': _('Submit to OCR queue'), 'view': 'ocr:document_submit_multiple', 'famfam': 'hourglass_add'}
|
||||
link_entry_re_queue = {'text': _('Re-queue'), 'view': 'ocr:entry_re_queue', 'args': 'object.id', 'famfam': 'hourglass_add', 'permissions': [PERMISSION_OCR_DOCUMENT]}
|
||||
link_entry_re_queue_multiple = {'text': _('Re-queue'), 'view': 'ocr:entry_re_queue_multiple', 'famfam': 'hourglass_add'}
|
||||
link_entry_delete = {'text': _('Delete'), 'view': 'ocr:entry_delete', 'args': 'object.id', 'famfam': 'hourglass_delete', 'permissions': [PERMISSION_OCR_DOCUMENT_DELETE]}
|
||||
link_entry_delete_multiple = {'text': _('Delete'), 'view': 'ocr:entry_delete_multiple', 'famfam': 'hourglass_delete'}
|
||||
|
||||
all_document_ocr_cleanup = {'text': _(u'Clean up pages content'), 'view': 'ocr:all_document_ocr_cleanup', 'famfam': 'text_strikethrough', 'permissions': [PERMISSION_OCR_CLEAN_ALL_PAGES], 'description': _(u'Runs a language filter to remove common OCR mistakes from document pages content.')}
|
||||
link_document_all_ocr_cleanup = {'text': _('Clean up pages content'), 'view': 'ocr:document_all_ocr_cleanup', 'famfam': 'text_strikethrough', 'permissions': [PERMISSION_OCR_CLEAN_ALL_PAGES], 'description': _('Runs a language filter to remove common OCR mistakes from document pages content.')}
|
||||
|
||||
queue_document_list = {'text': _(u'Queue document list'), 'view': 'ocr:queue_document_list', 'famfam': 'hourglass', 'permissions': [PERMISSION_OCR_DOCUMENT]}
|
||||
ocr_tool_link = {'text': _(u'OCR'), 'view': 'ocr:queue_document_list', 'famfam': 'hourglass', 'icon': 'text.png', 'permissions': [PERMISSION_OCR_DOCUMENT]}
|
||||
link_entry_list = {'text': _('OCR Errors'), 'view': 'ocr:entry_list', 'famfam': 'hourglass', 'icon': 'text.png', 'permissions': [PERMISSION_OCR_DOCUMENT]}
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
DEFAULT_OCR_FILE_FORMAT = u'tiff'
|
||||
DEFAULT_OCR_FILE_EXTENSION = u'tif'
|
||||
UNPAPER_FILE_FORMAT = u'ppm'
|
||||
from __future__ import unicode_literals
|
||||
|
||||
DEFAULT_OCR_FILE_FORMAT = 'tiff'
|
||||
DEFAULT_OCR_FILE_EXTENSION = 'tif'
|
||||
LOCK_EXPIRE = 60 * 10 # Adjust to worst case scenario
|
||||
UNPAPER_FILE_FORMAT = 'ppm'
|
||||
|
||||
@@ -1,39 +1,22 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import models
|
||||
from django.core.exceptions import ObjectDoesNotExist
|
||||
from django.utils.translation import ugettext
|
||||
from django.utils.encoding import python_2_unicode_compatible
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentVersion
|
||||
|
||||
|
||||
class DocumentQueue(models.Model):
|
||||
name = models.CharField(max_length=64, unique=True, verbose_name=_(u'Name'))
|
||||
label = models.CharField(max_length=64, verbose_name=_(u'Label'))
|
||||
@python_2_unicode_compatible
|
||||
class DocumentVersionOCRError(models.Model):
|
||||
document_version = models.ForeignKey(DocumentVersion, verbose_name=_('Document version'))
|
||||
datetime_submitted = models.DateTimeField(verbose_name=_('Date time submitted'), auto_now=True, db_index=True)
|
||||
result = models.TextField(blank=True, null=True, verbose_name=_('Result'))
|
||||
|
||||
class Meta:
|
||||
verbose_name = _(u'Document queue')
|
||||
verbose_name_plural = _(u'Document queues')
|
||||
|
||||
def __unicode__(self):
|
||||
return self.label
|
||||
|
||||
|
||||
class QueueDocument(models.Model):
|
||||
document_queue = models.ForeignKey(DocumentQueue, related_name='documents', verbose_name=_(u'Document queue'))
|
||||
document = models.ForeignKey(Document, verbose_name=_(u'Document'))
|
||||
datetime_submitted = models.DateTimeField(verbose_name=_(u'Date time submitted'), auto_now=True, db_index=True)
|
||||
result = models.TextField(blank=True, null=True, verbose_name=_(u'Result'))
|
||||
node_name = models.CharField(max_length=256, verbose_name=_(u'Node name'), blank=True, null=True)
|
||||
def __str__(self):
|
||||
return unicode(self.document_version)
|
||||
|
||||
class Meta:
|
||||
ordering = ('datetime_submitted',)
|
||||
verbose_name = _(u'Queue document')
|
||||
verbose_name_plural = _(u'Queue documents')
|
||||
|
||||
def __unicode__(self):
|
||||
try:
|
||||
return unicode(self.document)
|
||||
except ObjectDoesNotExist:
|
||||
return ugettext(u'Missing document.')
|
||||
verbose_name = _('Document Version OCR Error')
|
||||
verbose_name_plural = _('Document Version OCR Errors')
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import logging
|
||||
import os
|
||||
import slate
|
||||
@@ -90,7 +92,7 @@ class SlateParser(Parser):
|
||||
raise ParserError
|
||||
|
||||
document_page.content = pdf_pages[document_page.page_number - 1]
|
||||
document_page.page_label = _(u'Text extracted from PDF')
|
||||
document_page.page_label = _('Text extracted from PDF')
|
||||
document_page.save()
|
||||
|
||||
|
||||
@@ -112,7 +114,7 @@ class OfficeParser(Parser):
|
||||
|
||||
# Now that the office document has been converted to PDF
|
||||
# call the coresponding PDF parser in this new file
|
||||
parse_document_page(document_page, descriptor=open(input_filepath), mimetype=u'application/pdf')
|
||||
parse_document_page(document_page, descriptor=open(input_filepath), mimetype='application/pdf')
|
||||
else:
|
||||
raise ParserError
|
||||
|
||||
@@ -126,7 +128,7 @@ class PopplerParser(Parser):
|
||||
PDF parser using the pdftotext execute from the poppler package
|
||||
"""
|
||||
def __init__(self):
|
||||
self.pdftotext_path = PDFTOTEXT_PATH if PDFTOTEXT_PATH else u'/usr/bin/pdftotext'
|
||||
self.pdftotext_path = PDFTOTEXT_PATH if PDFTOTEXT_PATH else '/usr/bin/pdftotext'
|
||||
if not os.path.exists(self.pdftotext_path):
|
||||
raise ParserError('cannot find pdftotext executable')
|
||||
logger.debug('self.pdftotext_path: %s', self.pdftotext_path)
|
||||
@@ -167,9 +169,9 @@ class PopplerParser(Parser):
|
||||
raise ParserError('No output')
|
||||
|
||||
document_page.content = output
|
||||
document_page.page_label = _(u'Text extracted from PDF')
|
||||
document_page.page_label = _('Text extracted from PDF')
|
||||
document_page.save()
|
||||
|
||||
|
||||
register_parser(mimetypes=[u'application/pdf'], parsers=[PopplerParser, SlateParser])
|
||||
register_parser(mimetypes=['application/pdf'], parsers=[PopplerParser, SlateParser])
|
||||
register_parser(mimetypes=office_converter.CONVERTER_OFFICE_FILE_MIMETYPES, parsers=[OfficeParser])
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from permissions.models import Permission, PermissionNamespace
|
||||
|
||||
ocr_namespace = PermissionNamespace('ocr', _(u'OCR'))
|
||||
PERMISSION_OCR_DOCUMENT = Permission.objects.register(ocr_namespace, 'ocr_document', _(u'Submit documents for OCR'))
|
||||
PERMISSION_OCR_DOCUMENT_DELETE = Permission.objects.register(ocr_namespace, 'ocr_document_delete', _(u'Delete documents from OCR queue'))
|
||||
PERMISSION_OCR_CLEAN_ALL_PAGES = Permission.objects.register(ocr_namespace, 'ocr_clean_all_pages', _(u'Can execute the OCR clean up on all document pages'))
|
||||
ocr_namespace = PermissionNamespace('ocr', _('OCR'))
|
||||
PERMISSION_OCR_DOCUMENT = Permission.objects.register(ocr_namespace, 'ocr_document', _('Submit documents for OCR'))
|
||||
PERMISSION_OCR_DOCUMENT_DELETE = Permission.objects.register(ocr_namespace, 'ocr_document_delete', _('Delete documents from OCR queue'))
|
||||
PERMISSION_OCR_CLEAN_ALL_PAGES = Permission.objects.register(ocr_namespace, 'ocr_clean_all_pages', _('Can execute the OCR clean up on all document pages'))
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from common.utils import load_backend
|
||||
|
||||
from .settings import BACKEND
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from rest_framework import serializers
|
||||
|
||||
|
||||
class DocumentOCRSerializer(serializers.Serializer):
|
||||
document_id = serializers.IntegerField()
|
||||
class DocumentVersionOCRSerializer(serializers.Serializer):
|
||||
document_version_id = serializers.IntegerField()
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
"""Configuration options for the ocr app"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from smart_settings.api import register_settings
|
||||
|
||||
register_settings(
|
||||
namespace=u'ocr',
|
||||
module=u'ocr.settings',
|
||||
namespace='ocr',
|
||||
module='ocr.settings',
|
||||
settings=[
|
||||
{'name': u'TESSERACT_PATH', 'global_name': u'OCR_TESSERACT_PATH', 'default': u'/usr/bin/tesseract', 'exists': True},
|
||||
{'name': u'UNPAPER_PATH', 'global_name': u'OCR_UNPAPER_PATH', 'default': u'/usr/bin/unpaper', 'description': _(u'File path to unpaper program.'), 'exists': True},
|
||||
{'name': u'PDFTOTEXT_PATH', 'global_name': u'OCR_PDFTOTEXT_PATH', 'default': u'/usr/bin/pdftotext', 'description': _(u'File path to poppler\'s pdftotext program used to extract text from PDF files.'), 'exists': True},
|
||||
{'name': u'BACKEND', 'global_name': u'OCR_BACKEND', 'default': u'ocr.backends.tesseract.Tesseract', 'description': _(u'Full path to the backend to be used to do OCR.')},
|
||||
{'name': 'TESSERACT_PATH', 'global_name': 'OCR_TESSERACT_PATH', 'default': '/usr/bin/tesseract', 'exists': True},
|
||||
{'name': 'UNPAPER_PATH', 'global_name': 'OCR_UNPAPER_PATH', 'default': '/usr/bin/unpaper', 'description': _('File path to unpaper program.'), 'exists': True},
|
||||
{'name': 'PDFTOTEXT_PATH', 'global_name': 'OCR_PDFTOTEXT_PATH', 'default': '/usr/bin/pdftotext', 'description': _('File path to poppler\'s pdftotext program used to extract text from PDF files.'), 'exists': True},
|
||||
{'name': 'BACKEND', 'global_name': 'OCR_BACKEND', 'default': 'ocr.backends.tesseract.Tesseract', 'description': _('Full path to the backend to be used to do OCR.')},
|
||||
]
|
||||
)
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from south.utils import datetime_utils as datetime
|
||||
from south.db import db
|
||||
from south.v2 import SchemaMigration
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(SchemaMigration):
|
||||
|
||||
def forwards(self, orm):
|
||||
# Deleting model 'DocumentQueue'
|
||||
db.delete_table(u'ocr_documentqueue')
|
||||
|
||||
# Deleting model 'QueueDocument'
|
||||
db.delete_table(u'ocr_queuedocument')
|
||||
|
||||
# Adding model 'DocumentVersionOCRError'
|
||||
db.create_table(u'ocr_documentversionocrerror', (
|
||||
(u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
|
||||
('document_version', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['documents.DocumentVersion'])),
|
||||
('datetime_submitted', self.gf('django.db.models.fields.DateTimeField')(auto_now=True, db_index=True, blank=True)),
|
||||
('result', self.gf('django.db.models.fields.TextField')(null=True, blank=True)),
|
||||
))
|
||||
db.send_create_signal(u'ocr', ['DocumentVersionOCRError'])
|
||||
|
||||
|
||||
def backwards(self, orm):
|
||||
# Adding model 'DocumentQueue'
|
||||
db.create_table(u'ocr_documentqueue', (
|
||||
(u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
|
||||
('name', self.gf('django.db.models.fields.CharField')(max_length=64, unique=True)),
|
||||
('label', self.gf('django.db.models.fields.CharField')(max_length=64)),
|
||||
))
|
||||
db.send_create_signal(u'ocr', ['DocumentQueue'])
|
||||
|
||||
# Adding model 'QueueDocument'
|
||||
db.create_table(u'ocr_queuedocument', (
|
||||
('node_name', self.gf('django.db.models.fields.CharField')(max_length=256, null=True, blank=True)),
|
||||
('result', self.gf('django.db.models.fields.TextField')(null=True, blank=True)),
|
||||
('datetime_submitted', self.gf('django.db.models.fields.DateTimeField')(auto_now=True, blank=True, db_index=True)),
|
||||
('document_queue', self.gf('django.db.models.fields.related.ForeignKey')(related_name='documents', to=orm['ocr.DocumentQueue'])),
|
||||
('document', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['documents.Document'])),
|
||||
(u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
|
||||
))
|
||||
db.send_create_signal(u'ocr', ['QueueDocument'])
|
||||
|
||||
# Deleting model 'DocumentVersionOCRError'
|
||||
db.delete_table(u'ocr_documentversionocrerror')
|
||||
|
||||
|
||||
models = {
|
||||
u'documents.document': {
|
||||
'Meta': {'ordering': "['-date_added']", 'object_name': 'Document'},
|
||||
'date_added': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
|
||||
'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
|
||||
'document_type': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'documents'", 'to': u"orm['documents.DocumentType']"}),
|
||||
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'label': ('django.db.models.fields.CharField', [], {'default': "u'Uninitialized document'", 'max_length': '255', 'db_index': 'True'}),
|
||||
'language': ('django.db.models.fields.CharField', [], {'default': "u'eng'", 'max_length': '8'}),
|
||||
'uuid': ('django.db.models.fields.CharField', [], {'default': "u'b5b498b5-ffe5-4b70-b8a6-6c875ed11bf2'", 'max_length': '48'})
|
||||
},
|
||||
u'documents.documenttype': {
|
||||
'Meta': {'ordering': "['name']", 'object_name': 'DocumentType'},
|
||||
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32'}),
|
||||
'ocr': ('django.db.models.fields.BooleanField', [], {'default': 'True'})
|
||||
},
|
||||
u'documents.documentversion': {
|
||||
'Meta': {'object_name': 'DocumentVersion'},
|
||||
'checksum': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
|
||||
'comment': ('django.db.models.fields.TextField', [], {'blank': 'True'}),
|
||||
'document': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'versions'", 'to': u"orm['documents.Document']"}),
|
||||
'encoding': ('django.db.models.fields.CharField', [], {'max_length': '64', 'null': 'True', 'blank': 'True'}),
|
||||
'file': ('django.db.models.fields.files.FileField', [], {'max_length': '100'}),
|
||||
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'mimetype': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}),
|
||||
'timestamp': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'})
|
||||
},
|
||||
u'ocr.documentversionocrerror': {
|
||||
'Meta': {'ordering': "('datetime_submitted',)", 'object_name': 'DocumentVersionOCRError'},
|
||||
'datetime_submitted': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'db_index': 'True', 'blank': 'True'}),
|
||||
'document_version': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['documents.DocumentVersion']"}),
|
||||
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'result': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'})
|
||||
}
|
||||
}
|
||||
|
||||
complete_apps = ['ocr']
|
||||
@@ -1,65 +1,61 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import logging
|
||||
import platform
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentVersion
|
||||
from lock_manager import Lock, LockError
|
||||
from mayan.celery import app
|
||||
|
||||
from .api import do_document_ocr
|
||||
from .models import DocumentQueue, QueueDocument
|
||||
from .literals import LOCK_EXPIRE
|
||||
from .models import DocumentVersionOCRError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
LOCK_EXPIRE = 60 * 10 # Adjust to worst case scenario
|
||||
|
||||
|
||||
@app.task(ignore_result=True)
|
||||
def task_do_ocr(document_pk):
|
||||
lock_id = u'task_do_ocr_doc-%d' % document_pk
|
||||
def task_do_ocr(document_version_pk):
|
||||
lock_id = 'task_do_ocr_doc_version-%d' % document_version_pk
|
||||
try:
|
||||
logger.debug('trying to acquire lock: %s', lock_id)
|
||||
# Acquire lock to avoid doing OCR on the same document more than once
|
||||
# concurrently
|
||||
# Acquire lock to avoid doing OCR on the same document version more than
|
||||
# once concurrently
|
||||
lock = Lock.acquire_lock(lock_id, LOCK_EXPIRE)
|
||||
logger.debug('acquired lock: %s', lock_id)
|
||||
document = None
|
||||
document_version = None
|
||||
try:
|
||||
logger.info('Starting document OCR for document: %d', document_pk)
|
||||
document = Document.objects.get(pk=document_pk)
|
||||
do_document_ocr(document)
|
||||
logger.info('Starting document OCR for document version: %d', document_version_pk)
|
||||
document_version = DocumentVersion.objects.get(pk=document_version_pk)
|
||||
do_document_ocr(document_version)
|
||||
except Exception as exception:
|
||||
logger.error('OCR error for document: %d; %s', document_pk, exception)
|
||||
document_queue = DocumentQueue.objects.get(name='default')
|
||||
if document:
|
||||
queue_document, created = document_queue.documents.get_or_create(document=document)
|
||||
queue_document.node_name = platform.node()
|
||||
logger.error('OCR error for document version: %d; %s', document_version_pk, exception)
|
||||
if document_version:
|
||||
entry, created = DocumentVersionOCRError.objects.get_or_create(document_version=document_version)
|
||||
|
||||
if settings.DEBUG:
|
||||
result = []
|
||||
type, value, tb = sys.exc_info()
|
||||
result.append('%s: %s' % (type.__name__, value))
|
||||
result.extend(traceback.format_tb(tb))
|
||||
queue_document.result = '\n'.join(result)
|
||||
entry.result = '\n'.join(result)
|
||||
else:
|
||||
queue_document.result = exception
|
||||
entry.result = exception
|
||||
|
||||
queue_document.save()
|
||||
entry.save()
|
||||
else:
|
||||
logger.info('OCR for document: %d ended', document_pk)
|
||||
document_queue = DocumentQueue.objects.get(name='default')
|
||||
logger.info('OCR for document: %d ended', document_version_pk)
|
||||
try:
|
||||
queue_document = document_queue.documents.get(document=document)
|
||||
except QueueDocument.DoesNotExist:
|
||||
entry = DocumentVersionOCRError.objects.get(document_version=document_version)
|
||||
except DocumentVersionOCRError.DoesNotExist:
|
||||
pass
|
||||
else:
|
||||
queue_document.delete()
|
||||
entry.delete()
|
||||
finally:
|
||||
lock.release()
|
||||
except LockError:
|
||||
logger.debug('unable to obtain lock')
|
||||
logger.debug('unable to obtain lock: %s' % lock_id)
|
||||
pass
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.core.files.base import File
|
||||
from django.test import TransactionTestCase
|
||||
@@ -6,8 +6,6 @@ from django.test import TransactionTestCase
|
||||
from documents.models import Document, DocumentType
|
||||
from documents.tests import TEST_SMALL_DOCUMENT_PATH, TEST_DOCUMENT_TYPE
|
||||
|
||||
from .models import DocumentQueue, QueueDocument
|
||||
|
||||
|
||||
class DocumentOCRTestCase(TransactionTestCase):
|
||||
def setUp(self):
|
||||
@@ -16,11 +14,6 @@ class DocumentOCRTestCase(TransactionTestCase):
|
||||
with open(TEST_SMALL_DOCUMENT_PATH) as file_object:
|
||||
self.document = Document.objects.new_document(file_object=File(file_object), document_type=self.document_type)[0].document
|
||||
|
||||
DocumentQueue.objects.get_or_create(name='default')
|
||||
|
||||
# Clear OCR queue
|
||||
QueueDocument.objects.all().delete()
|
||||
|
||||
def _test_ocr_language_issue_16(self, language, result):
|
||||
"""
|
||||
Reusable OCR test for a specific language
|
||||
|
||||
@@ -1,19 +1,21 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.conf.urls import patterns, url
|
||||
|
||||
from .api_views import DocumentOCRView
|
||||
from .api_views import DocumentVersionOCRView
|
||||
|
||||
urlpatterns = patterns('ocr.views',
|
||||
url(r'^document/(?P<document_id>\d+)/submit/$', 'submit_document', (), 'submit_document'),
|
||||
url(r'^document/multiple/submit/$', 'submit_document_multiple', (), 'submit_document_multiple'),
|
||||
url(r'^queue/document/list/$', 'queue_document_list', (), 'queue_document_list'),
|
||||
url(r'^queue/document/(?P<queue_document_id>\d+)/delete/$', 'queue_document_delete', (), 'queue_document_delete'),
|
||||
url(r'^queue/document/multiple/delete/$', 'queue_document_multiple_delete', (), 'queue_document_multiple_delete'),
|
||||
url(r'^queue/document/(?P<queue_document_id>\d+)/re-queue/$', 're_queue_document', (), 're_queue_document'),
|
||||
url(r'^queue/document/multiple/re-queue/$', 're_queue_multiple_document', (), 're_queue_multiple_document'),
|
||||
url(r'^document/(?P<pk>\d+)/submit/$', 'document_submit', (), 'document_submit'),
|
||||
url(r'^document/multiple/submit/$', 'document_submit_multiple', (), 'document_submit_multiple'),
|
||||
url(r'^document/all/clean_up/$', 'document_all_ocr_cleanup', (), 'document_all_ocr_cleanup'),
|
||||
|
||||
url(r'^document/all/clean_up/$', 'all_document_ocr_cleanup', (), 'all_document_ocr_cleanup'),
|
||||
url(r'^all/$', 'entry_list', (), 'entry_list'),
|
||||
url(r'^(?P<pk>\d+)/delete/$', 'entry_delete', (), 'entry_delete'),
|
||||
url(r'^multiple/delete/$', 'entry_delete_multiple', (), 'entry_delete_multiple'),
|
||||
url(r'^(?P<pk>\d+)/re-queue/$', 'entry_re_queue', (), 'entry_re_queue'),
|
||||
url(r'^multiple/re-queue/$', 'entry_re_queue_multiple', (), 'entry_re_queue_multiple'),
|
||||
)
|
||||
|
||||
api_urls = patterns('',
|
||||
url(r'^submit/$', DocumentOCRView.as_view(), name='document-ocr-submit-view'),
|
||||
url(r'^submit/$', DocumentVersionOCRView.as_view(), name='document-version-ocr-submit-view'),
|
||||
)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
from django.contrib import messages
|
||||
from django.core.exceptions import PermissionDenied
|
||||
@@ -6,173 +6,43 @@ from django.core.urlresolvers import reverse
|
||||
from django.http import HttpResponseRedirect
|
||||
from django.shortcuts import get_object_or_404, render_to_response
|
||||
from django.template import RequestContext
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
from django.utils.translation import ugettext_lazy as _, ungettext
|
||||
|
||||
from acls.models import AccessEntry
|
||||
from common.utils import encapsulate
|
||||
from documents.models import Document
|
||||
from documents.widgets import document_link, document_thumbnail
|
||||
from documents.models import Document, DocumentVersion
|
||||
from permissions.models import Permission
|
||||
|
||||
from .api import clean_pages
|
||||
from .models import DocumentQueue, QueueDocument
|
||||
from .models import DocumentVersionOCRError
|
||||
from .permissions import (PERMISSION_OCR_CLEAN_ALL_PAGES,
|
||||
PERMISSION_OCR_DOCUMENT,
|
||||
PERMISSION_OCR_DOCUMENT_DELETE)
|
||||
|
||||
|
||||
def queue_document_list(request, queue_name='default'):
|
||||
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT])
|
||||
|
||||
document_queue = get_object_or_404(DocumentQueue, name=queue_name)
|
||||
|
||||
context = {
|
||||
'object_list': document_queue.documents.all(),
|
||||
'title': _(u'Documents in queue: %s') % document_queue,
|
||||
'hide_object': True,
|
||||
'queue': document_queue,
|
||||
'navigation_object_name': 'queue',
|
||||
'list_object_variable_name': 'queue_document',
|
||||
'extra_columns': [
|
||||
{'name': _('Document'), 'attribute': encapsulate(lambda x: document_link(x.document) if hasattr(x, 'document') else _(u'Missing document.'))},
|
||||
{'name': _(u'Thumbnail'), 'attribute': encapsulate(lambda x: document_thumbnail(x.document))},
|
||||
{'name': _('Added'), 'attribute': encapsulate(lambda x: unicode(x.datetime_submitted).split('.')[0]), 'keep_together':True},
|
||||
{'name': _('Node'), 'attribute': 'node_name'},
|
||||
{'name': _('Result'), 'attribute': 'result'},
|
||||
],
|
||||
}
|
||||
|
||||
return render_to_response('main/generic_list.html', context,
|
||||
context_instance=RequestContext(request))
|
||||
|
||||
|
||||
def queue_document_delete(request, queue_document_id=None, queue_document_id_list=None):
|
||||
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT_DELETE])
|
||||
|
||||
if queue_document_id:
|
||||
queue_documents = [get_object_or_404(QueueDocument, pk=queue_document_id)]
|
||||
elif queue_document_id_list:
|
||||
queue_documents = [get_object_or_404(QueueDocument, pk=queue_document_id) for queue_document_id in queue_document_id_list.split(',')]
|
||||
else:
|
||||
messages.error(request, _(u'Must provide at least one queue document.'))
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
|
||||
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None)))
|
||||
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
|
||||
|
||||
if request.method == 'POST':
|
||||
for queue_document in queue_documents:
|
||||
try:
|
||||
queue_document.delete()
|
||||
messages.success(request, _(u'Queue document: %(document)s deleted successfully.') % {
|
||||
'document': queue_document.document})
|
||||
|
||||
except Exception as exception:
|
||||
messages.error(request, _(u'Error deleting document: %(document)s; %(error)s') % {
|
||||
'document': queue_document, 'error': exception})
|
||||
return HttpResponseRedirect(next)
|
||||
|
||||
context = {
|
||||
'next': next,
|
||||
'previous': previous,
|
||||
'delete_view': True,
|
||||
}
|
||||
|
||||
if len(queue_documents) == 1:
|
||||
context['object'] = queue_documents[0]
|
||||
context['title'] = _(u'Are you sure you wish to delete queue document: %s?') % ', '.join([unicode(d) for d in queue_documents])
|
||||
elif len(queue_documents) > 1:
|
||||
context['title'] = _(u'Are you sure you wish to delete queue documents: %s?') % ', '.join([unicode(d) for d in queue_documents])
|
||||
|
||||
return render_to_response('main/generic_confirm.html', context,
|
||||
context_instance=RequestContext(request))
|
||||
|
||||
|
||||
def queue_document_multiple_delete(request):
|
||||
return queue_document_delete(request, queue_document_id_list=request.GET.get('id_list', ''))
|
||||
|
||||
|
||||
def submit_document_multiple(request):
|
||||
for item_id in request.GET.get('id_list', '').split(','):
|
||||
submit_document(request, item_id)
|
||||
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
|
||||
|
||||
def submit_document(request, document_id):
|
||||
document = get_object_or_404(Document, pk=document_id)
|
||||
def document_submit(request, pk):
|
||||
document = get_object_or_404(Document, pk=pk)
|
||||
|
||||
try:
|
||||
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT])
|
||||
except PermissionDenied:
|
||||
AccessEntry.objects.check_access(PERMISSION_OCR_DOCUMENT, request.user, document)
|
||||
|
||||
return submit_document_to_queue(request, document=document,
|
||||
post_submit_redirect=request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
|
||||
|
||||
def submit_document_to_queue(request, document, post_submit_redirect=None):
|
||||
"""
|
||||
This view is meant to be reusable
|
||||
"""
|
||||
|
||||
document.submit_for_ocr()
|
||||
messages.success(request, _(u'Document: %(document)s was added to the OCR queue.') % {
|
||||
messages.success(request, _('Document: %(document)s was added to the OCR queue.') % {
|
||||
'document': document}
|
||||
)
|
||||
|
||||
if post_submit_redirect:
|
||||
return HttpResponseRedirect(post_submit_redirect)
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
|
||||
|
||||
def re_queue_document(request, queue_document_id=None, queue_document_id_list=None):
|
||||
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT])
|
||||
def document_submit_multiple(request):
|
||||
for item_id in request.GET.get('id_list', '').split(','):
|
||||
document_submit(request, item_id)
|
||||
|
||||
if queue_document_id:
|
||||
queue_documents = [get_object_or_404(QueueDocument, pk=queue_document_id)]
|
||||
elif queue_document_id_list:
|
||||
queue_documents = [get_object_or_404(QueueDocument, pk=queue_document_id) for queue_document_id in queue_document_id_list.split(',')]
|
||||
else:
|
||||
messages.error(request, _(u'Must provide at least one queue document.'))
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
|
||||
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None)))
|
||||
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
|
||||
|
||||
if request.method == 'POST':
|
||||
for queue_document in queue_documents:
|
||||
try:
|
||||
queue_document.document.submit_for_ocr()
|
||||
messages.success(
|
||||
request,
|
||||
_(u'Document: %(document)s was re-queued for OCR.') % {
|
||||
'document': queue_document.document
|
||||
}
|
||||
)
|
||||
except Document.DoesNotExist:
|
||||
messages.error(request, _(u'Document id#: %d, no longer exists.') % queue_document.document_id)
|
||||
return HttpResponseRedirect(next)
|
||||
|
||||
context = {
|
||||
'next': next,
|
||||
'previous': previous,
|
||||
}
|
||||
|
||||
if len(queue_documents) == 1:
|
||||
context['object'] = queue_documents[0]
|
||||
context['title'] = _(u'Are you sure you wish to re-queue document: %s?') % ', '.join([unicode(d) for d in queue_documents])
|
||||
elif len(queue_documents) > 1:
|
||||
context['title'] = _(u'Are you sure you wish to re-queue documents: %s?') % ', '.join([unicode(d) for d in queue_documents])
|
||||
|
||||
return render_to_response('main/generic_confirm.html', context,
|
||||
context_instance=RequestContext(request))
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
|
||||
|
||||
def re_queue_multiple_document(request):
|
||||
return re_queue_document(request, queue_document_id_list=request.GET.get('id_list', []))
|
||||
|
||||
|
||||
def all_document_ocr_cleanup(request):
|
||||
def document_all_ocr_cleanup(request):
|
||||
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_CLEAN_ALL_PAGES])
|
||||
|
||||
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
|
||||
@@ -182,27 +52,133 @@ def all_document_ocr_cleanup(request):
|
||||
return render_to_response('main/generic_confirm.html', {
|
||||
'previous': previous,
|
||||
'next': next,
|
||||
'title': _(u'Are you sure you wish to clean up all the pages content?'),
|
||||
'message': _(u'On large databases this operation may take some time to execute.'),
|
||||
'title': _('Are you sure you wish to clean up all the pages content?'),
|
||||
'message': _('On large databases this operation may take some time to execute.'),
|
||||
}, context_instance=RequestContext(request))
|
||||
else:
|
||||
try:
|
||||
# TODO: turn this into a Celery task
|
||||
clean_pages()
|
||||
messages.success(request, _(u'Document pages content clean up complete.'))
|
||||
messages.success(request, _('Document pages content clean up complete.'))
|
||||
except Exception as exception:
|
||||
messages.error(request, _(u'Document pages content clean up error: %s') % exception)
|
||||
messages.error(request, _('Document pages content clean up error: %s') % exception)
|
||||
|
||||
return HttpResponseRedirect(next)
|
||||
|
||||
|
||||
def display_link(obj):
|
||||
output = []
|
||||
if hasattr(obj, 'get_absolute_url'):
|
||||
output.append(u'<a href="%(url)s">%(obj)s</a>' % {
|
||||
'url': obj.get_absolute_url(),
|
||||
'obj': obj
|
||||
})
|
||||
if output:
|
||||
return u''.join(output)
|
||||
def entry_list(request):
|
||||
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT])
|
||||
|
||||
context = {
|
||||
'object_list': DocumentVersionOCRError.objects.all(),
|
||||
'title': _('OCR errors'),
|
||||
'hide_object': True,
|
||||
}
|
||||
|
||||
return render_to_response('main/generic_list.html', context,
|
||||
context_instance=RequestContext(request))
|
||||
|
||||
|
||||
def entry_delete(request, pk=None, pk_list=None):
|
||||
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT_DELETE])
|
||||
|
||||
if pk:
|
||||
entries = [get_object_or_404(DocumentVersionOCRError, pk=pk)]
|
||||
elif pk_list:
|
||||
entries = [get_object_or_404(DocumentVersionOCRError, pk=pk) for pk in pk_list.split(',')]
|
||||
else:
|
||||
return obj
|
||||
messages.error(request, _('Make at least one selection.'))
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
|
||||
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None)))
|
||||
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
|
||||
|
||||
if request.method == 'POST':
|
||||
for entry in entries:
|
||||
try:
|
||||
entry.delete()
|
||||
messages.success(request, _('Entry: %(entry)s deleted successfully.') % {
|
||||
'entry': entry})
|
||||
|
||||
except Exception as exception:
|
||||
messages.error(request, _('Error entry: %(entry)s; %(error)s') % {
|
||||
'entry': entry, 'error': exception})
|
||||
return HttpResponseRedirect(next)
|
||||
|
||||
context = {
|
||||
'next': next,
|
||||
'previous': previous,
|
||||
'delete_view': True,
|
||||
}
|
||||
|
||||
if len(entries) == 1:
|
||||
context['object'] = entries[0]
|
||||
|
||||
context['title'] = ungettext(
|
||||
'Are you sure you wish to delete the entry: %(entry)s?',
|
||||
'Are you sure you wish to delete these %(count)d entries.',
|
||||
len(entries)
|
||||
) % {
|
||||
'count': len(entries),
|
||||
'entry': entries[0],
|
||||
}
|
||||
|
||||
return render_to_response('main/generic_confirm.html', context,
|
||||
context_instance=RequestContext(request))
|
||||
|
||||
|
||||
def entry_delete_multiple(request):
|
||||
return entry_delete(request, pk_list=request.GET.get('id_list', ''))
|
||||
|
||||
|
||||
def entry_re_queue(request, pk=None, pk_list=None):
|
||||
Permission.objects.check_permissions(request.user, [PERMISSION_OCR_DOCUMENT])
|
||||
|
||||
if pk:
|
||||
entries = [get_object_or_404(DocumentVersionOCRError, pk=pk)]
|
||||
elif pk_list:
|
||||
entries = [get_object_or_404(DocumentVersionOCRError, pk=pk) for pk in pk_list.split(',')]
|
||||
else:
|
||||
messages.error(request, _('Make at least one selection.'))
|
||||
return HttpResponseRedirect(request.META.get('HTTP_REFERER', reverse('main:home')))
|
||||
|
||||
next = request.POST.get('next', request.GET.get('next', request.META.get('HTTP_REFERER', None)))
|
||||
previous = request.POST.get('previous', request.GET.get('previous', request.META.get('HTTP_REFERER', None)))
|
||||
|
||||
if request.method == 'POST':
|
||||
for entry in entries:
|
||||
try:
|
||||
entry.document_version.submit_for_ocr()
|
||||
messages.success(
|
||||
request,
|
||||
_('Entry: %(entry)s was re-queued for OCR.') % {
|
||||
'entry': entry
|
||||
}
|
||||
)
|
||||
except DocumentVersion.DoesNotExist:
|
||||
messages.error(request, _('Document version id#: %d, no longer exists.') % entry.document_version_id)
|
||||
return HttpResponseRedirect(next)
|
||||
|
||||
context = {
|
||||
'next': next,
|
||||
'previous': previous,
|
||||
}
|
||||
|
||||
if len(entries) == 1:
|
||||
context['object'] = entries[0]
|
||||
|
||||
context['title'] = ungettext(
|
||||
'Are you sure you wish to re-queue the entry: %(entry)s?',
|
||||
'Are you sure you wish to re-queue these %(count)d entries.',
|
||||
len(entries)
|
||||
) % {
|
||||
'count': len(entries),
|
||||
'entry': entries[0],
|
||||
}
|
||||
|
||||
return render_to_response('main/generic_confirm.html', context,
|
||||
context_instance=RequestContext(request))
|
||||
|
||||
|
||||
def entry_re_queue_multiple(request):
|
||||
return entry_re_queue(request, pk_list=request.GET.get('id_list', []))
|
||||
|
||||
@@ -3,6 +3,7 @@ from __future__ import absolute_import
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from common.utils import encapsulate
|
||||
from documents.links import document_list_recent, document_list
|
||||
from documents.models import Document
|
||||
from navigation.api import register_links, register_model_list_columns
|
||||
from project_setup.api import register_setup
|
||||
@@ -24,17 +25,6 @@ from .links import (document_create_multiple, document_create_siblings,
|
||||
from .models import Source, SourceTransformation
|
||||
from .widgets import staging_file_thumbnail
|
||||
|
||||
register_links([StagingFile], [staging_file_delete])
|
||||
|
||||
register_links([Source, 'sources:setup_source_list', 'sources:setup_source_create'], [setup_sources, setup_source_create_webform, setup_source_create_staging_folder, setup_source_create_pop3_email, setup_source_create_imap_email, setup_source_create_watch_folder], menu_name='secondary_menu')
|
||||
register_links([Source], [setup_source_edit, setup_source_transformation_list, setup_source_delete])
|
||||
|
||||
register_links(SourceTransformation, [setup_source_transformation_edit, setup_source_transformation_delete])
|
||||
register_links(['sources:setup_source_transformation_create', 'sources:setup_source_transformation_edit', 'sources:setup_source_transformation_delete', 'sources:setup_source_transformation_list'], [setup_source_transformation_create], menu_name='sidebar')
|
||||
|
||||
# Document version
|
||||
register_links(['documents:document_version_list', 'documents:upload_version', 'documents:document_version_revert'], [upload_version], menu_name='sidebar')
|
||||
|
||||
register_model_list_columns(StagingFile, [
|
||||
{
|
||||
'name': _(u'Thumbnail'), 'attribute':
|
||||
@@ -42,9 +32,16 @@ register_model_list_columns(StagingFile, [
|
||||
},
|
||||
])
|
||||
|
||||
register_setup(setup_sources)
|
||||
|
||||
register_links([StagingFile], [staging_file_delete])
|
||||
register_links([Source, 'sources:setup_source_list', 'sources:setup_source_create'], [setup_sources, setup_source_create_webform, setup_source_create_staging_folder, setup_source_create_pop3_email, setup_source_create_imap_email, setup_source_create_watch_folder], menu_name='secondary_menu')
|
||||
register_links([Source], [setup_source_edit, setup_source_transformation_list, setup_source_delete])
|
||||
register_links(SourceTransformation, [setup_source_transformation_edit, setup_source_transformation_delete])
|
||||
register_links(['sources:setup_source_transformation_create', 'sources:setup_source_transformation_edit', 'sources:setup_source_transformation_delete', 'sources:setup_source_transformation_list'], [setup_source_transformation_create], menu_name='sidebar')
|
||||
register_links(['documents:document_version_list', 'documents:upload_version', 'documents:document_version_revert'], [upload_version], menu_name='sidebar')
|
||||
register_links([Document, 'documents:document_list_recent', 'documents:document_list', 'sources:document_create', 'sources:document_create_multiple', 'sources:upload_interactive', 'sources:staging_file_delete'], [document_create_multiple], menu_name='secondary_menu')
|
||||
register_links(Document, [document_create_siblings])
|
||||
register_links(['sources:document_create', 'sources:document_create_multiple', 'sources:upload_interactive', 'sources:staging_file_delete'], [document_list_recent, document_list], menu_name='secondary_menu')
|
||||
|
||||
register_setup(setup_sources)
|
||||
|
||||
APIEndPoint('sources')
|
||||
|
||||
@@ -13,6 +13,9 @@ def fake_get_or_create(model, *args, **kwargs):
|
||||
|
||||
|
||||
class Migration(DataMigration):
|
||||
needed_by = (
|
||||
('documents', '0024_auto__add_field_documenttype_ocr'),
|
||||
)
|
||||
|
||||
def forwards(self, orm):
|
||||
"Write your forwards methods here."
|
||||
|
||||
@@ -14,6 +14,13 @@ def fake_get_or_create(model, *args, **kwargs):
|
||||
|
||||
|
||||
class Migration(SchemaMigration):
|
||||
depends_on = (
|
||||
('documents', '0024_auto__add_field_documenttype_ocr'),
|
||||
)
|
||||
|
||||
needed_by = (
|
||||
('documents', '0031_remove_orphan_documents'),
|
||||
)
|
||||
|
||||
def forwards(self, orm):
|
||||
# Deleting model 'WatchFolderSource'
|
||||
|
||||
@@ -5,37 +5,37 @@ from django.contrib import admin
|
||||
admin.autodiscover()
|
||||
|
||||
urlpatterns = patterns('',
|
||||
url(r'^', include('common.urls', namespace='common')),
|
||||
url(r'^', include('main.urls', namespace='main')),
|
||||
url(r'^accounts/', include('user_management.urls', namespace='user_management')),
|
||||
url(r'^acls/', include('acls.urls', namespace='acls')),
|
||||
url(r'^admin/', include(admin.site.urls)),
|
||||
url(r'^api/', include('rest_api.urls')),
|
||||
url(r'^checkouts/', include('checkouts.urls', namespace='checkouts')),
|
||||
url(r'^comments/', include('document_comments.urls', namespace='comments')),
|
||||
url(r'^document_acls/', include('document_acls.urls', namespace='document_acls')),
|
||||
url(r'^document_indexing/', include('document_indexing.urls', namespace='indexing')),
|
||||
url(r'^common/', include('common.urls', namespace='common')),
|
||||
url(r'^document/acls/', include('document_acls.urls', namespace='document_acls')),
|
||||
url(r'^document/signatures/', include('document_signatures.urls', namespace='signatures')),
|
||||
url(r'^document/states/', include('document_states.urls', namespace='document_states')),
|
||||
url(r'^documents/', include('documents.urls', namespace='documents')),
|
||||
url(r'^documents/signatures/', include('document_signatures.urls', namespace='signatures')),
|
||||
url(r'^docs/', include('rest_framework_swagger.urls')),
|
||||
url(r'^events/', include('events.urls', namespace='events')),
|
||||
url(r'^folders/', include('folders.urls', namespace='folders')),
|
||||
url(r'^gpg/', include('django_gpg.urls', namespace='django_gpg')),
|
||||
url(r'^indexing/', include('document_indexing.urls', namespace='indexing')),
|
||||
url(r'^installation/', include('installation.urls', namespace='installation')),
|
||||
url(r'^linking/', include('linking.urls', namespace='linking')),
|
||||
url(r'^mailer/', include('mailer.urls', namespace='mailer')),
|
||||
url(r'^metadata/', include('metadata.urls', namespace='metadata')),
|
||||
url(r'^ocr/', include('ocr.urls', namespace='ocr')),
|
||||
url(r'^permissions/', include('permissions.urls', namespace='permissions')),
|
||||
url(r'^project_setup/', include('project_setup.urls', namespace='project_setup')),
|
||||
url(r'^project_tools/', include('project_tools.urls', namespace='project_tools')),
|
||||
url(r'^registration/', include('registration.urls', namespace='registration')),
|
||||
url(r'^search/', include('dynamic_search.urls', namespace='search')),
|
||||
url(r'^settings/', include('smart_settings.urls', namespace='settings')),
|
||||
url(r'^setup/', include('project_setup.urls', namespace='project_setup')),
|
||||
url(r'^sources/', include('sources.urls', namespace='sources')),
|
||||
url(r'^statistics/', include('statistics.urls', namespace='statistics')),
|
||||
url(r'^tags/', include('tags.urls', namespace='tags')),
|
||||
url(r'^user_management/', include('user_management.urls', namespace='user_management')),
|
||||
url(r'^tools/', include('project_tools.urls', namespace='project_tools')),
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user