Finish redactions app

Signed-off-by: Roberto Rosario <roberto.rosario@mayan-edms.com>
This commit is contained in:
Roberto Rosario
2019-06-26 14:16:11 -04:00
parent 3d22f48555
commit 42a7ebeea2
14 changed files with 163 additions and 391 deletions

View File

@@ -265,22 +265,22 @@ class TransformationDrawRectanglePercent(BaseTransformation):
super(TransformationDrawRectanglePercent, self).execute_on(*args, **kwargs)
try:
left = int(self.left or '0')
left = float(self.left or '0')
except ValueError:
left = 0
try:
top = int(self.top or '0')
top = float(self.top or '0')
except ValueError:
top = 0
try:
right = int(self.right or '0')
right = float(self.right or '0')
except ValueError:
right = 0
try:
bottom = int(self.bottom or '0')
bottom = float(self.bottom or '0')
except ValueError:
bottom = 0
@@ -308,11 +308,11 @@ class TransformationDrawRectanglePercent(BaseTransformation):
if bottom > 100:
bottom = 100
if left > right:
left, right = right, left
#if left > right:
# left, right = right, left
if top > bottom:
top, bottom = bottom, top
#if top > bottom:
# top, bottom = bottom, top
logger.debug(
'left: %f, top: %f, right: %f, bottom: %f', left, top, right,

View File

@@ -1,15 +0,0 @@
from __future__ import unicode_literals
from django.contrib import admin
#from .models import OCRZone, OCRZoneContent
#@admin.register(OCRZone)
#class OCRZoneAdmin(admin.ModelAdmin):
# list_display = ('document_type', 'label', 'slug', 'enabled')
# list_display_links = ('label', 'slug')
# prepopulated_fields = {'slug': ('label',)}
#admin.site.register(OCRZoneContent)

View File

@@ -3,20 +3,16 @@ from __future__ import unicode_literals
import logging
from django.apps import apps
from django.db.models.signals import post_save
from django.utils.translation import ugettext_lazy as _
from mayan.apps.acls.classes import ModelPermission
from mayan.apps.common.apps import MayanAppConfig
from mayan.apps.common.menus import (
menu_facet, menu_list_facet, menu_multi_item, menu_object, menu_secondary,
menu_tools
menu_list_facet, menu_object, menu_secondary,
)
from mayan.apps.navigation.classes import SourceColumn
from .handlers import handler_create_default_full_zone
from .links import (
link_redaction_create, link_redaction_edit, link_redaction_list
link_redaction_create, link_redaction_delete, link_redaction_edit,
link_redaction_list
)
logger = logging.getLogger(__name__)
@@ -33,19 +29,10 @@ class RedactionsApp(MayanAppConfig):
def ready(self):
super(RedactionsApp, self).ready()
Document = apps.get_model(
app_label='documents', model_name='Document'
)
DocumentPage = apps.get_model(
app_label='documents', model_name='DocumentPage'
)
Redaction = self.get_model(model_name='Redaction')
Transformation = apps.get_model(
app_label='converter', model_name='Transformation'
)
#columns = SourceColumn.get_for_source(context=None, source=Transformation)
#print("@@@", columns)
menu_list_facet.bind_links(
links=(
@@ -53,7 +40,8 @@ class RedactionsApp(MayanAppConfig):
), sources=(DocumentPage,)
)
menu_object.bind_links(
links=(link_redaction_edit,), sources=(Transformation,)
links=(link_redaction_delete, link_redaction_edit,),
sources=(Redaction,)
)
menu_secondary.bind_links(
links=(link_redaction_create,), sources=(Redaction,)

View File

@@ -1,44 +1,15 @@
from __future__ import unicode_literals
import base64
from PIL import Image
from django import forms
from django.utils.encoding import force_unicode
from django.utils.html import conditional_escape
from django.utils.safestring import mark_safe
from django.utils.translation import ugettext_lazy as _
from mayan.apps.common.widgets import TextAreaDiv
from mayan.apps.converter.models import Transformation
#from metadata.models import MetadataType
from .models import Redaction
class RedactionForm(forms.ModelForm):
def __init__(self, *args, **kwargs):
self.document_page = kwargs.pop('document_page', None)
super(RedactionForm, self).__init__(*args, **kwargs)
#if not self.document_type and self.instance:
# self.document_type = self.instance.document_type
class Meta:
#fields = ('label', 'slug', 'enabled')
fields = ()
model = Redaction
class RedactionCoordinatesForm(forms.ModelForm):
class Meta:
#fields = ('label', 'slug', 'enabled', 'top', 'left', 'right', 'bottom')
#fields = ('top', 'left', 'right', 'bottom')
fields = ('arguments',)
model = Redaction
widgets = {
#'top': forms.widgets.HiddenInput,
#'left': forms.widgets.HiddenInput,
#'right': forms.widgets.HiddenInput,
#'bottom': forms.widgets.HiddenInput,
'arguments': forms.widgets.Textarea(attrs={'class': 'hidden'}),
}

View File

@@ -6,4 +6,6 @@ icon_redaction_create = Icon(
driver_name='fontawesome-dual', primary_symbol='highlighter',
secondary_symbol='plus'
)
icon_redaction_delete = Icon(driver_name='fontawesome', symbol='times')
icon_redaction_edit = Icon(driver_name='fontawesome', symbol='pencil-alt')
icon_redactions = Icon(driver_name='fontawesome', symbol='highlighter')

View File

@@ -4,22 +4,29 @@ from django.utils.translation import ugettext_lazy as _
from mayan.apps.navigation.classes import Link
from .permissions import (
permission_redaction_create, permission_redaction_delete,
permission_redaction_edit, permission_redaction_view
)
link_redaction_create = Link(
icon_class_path='mayan.apps.redactions.icons.icon_redaction_create',
#permissions=(,), text=_('Redactions'),
text=_('Add redaction'),
permissions=(permission_redaction_create,), text=_('Create redaction'),
view='redactions:redaction_create', args='resolved_object.id'
)
link_redaction_delete = Link(
icon_class_path='mayan.apps.redactions.icons.icon_redaction_delete',
permissions=(permission_redaction_delete,), tags='dangerous',
text=_('Delete'), view='redactions:redaction_delete',
args='resolved_object.id'
)
link_redaction_edit = Link(
#icon_class_path='mayan.apps.redactions.icons.icon_redaction_create',
#permissions=(,), text=_('Redactions'),
text=_('Edit'),
icon_class_path='mayan.apps.redactions.icons.icon_redaction_edit',
permissions=(permission_redaction_edit,), text=_('Edit'),
view='redactions:redaction_edit', args='resolved_object.id'
)
link_redaction_list = Link(
icon_class_path='mayan.apps.redactions.icons.icon_redactions',
#permissions=(,), text=_('Redactions'),
text=_('Redactions'),
permissions=(permission_redaction_view,), text=_('Redactions'),
view='redactions:redaction_list', args='resolved_object.id'
)

View File

@@ -1,7 +0,0 @@
#from __future__ import unicode_literals
#
#DEFAULT_OCR_FILE_FORMAT = 'tiff'
#DEFAULT_OCR_FILE_EXTENSION = 'tif'
#DEFAULT_OCR_TASK_RETRY_DELAY = 10
#LOCK_EXPIRE = 60 * 10 # Adjust to worst case scenario
#UNPAPER_FILE_FORMAT = 'ppm'

View File

@@ -1,10 +1,5 @@
from __future__ import unicode_literals
import io
from django.core.exceptions import ValidationError, NON_FIELD_ERRORS
from django.db import models
from django.utils.encoding import python_2_unicode_compatible
from django.utils.translation import ugettext_lazy as _
from mayan.apps.converter.models import Transformation

View File

@@ -1,178 +0,0 @@
from __future__ import unicode_literals
import logging
import os
#import slate
import subprocess
import tempfile
from django.utils.translation import ugettext_lazy as _
from common.settings import setting_temporary_directory
from common.utils import copyfile
from converter.exceptions import OfficeConversionError
from converter.classes import (
CONVERTER_OFFICE_FILE_MIMETYPES
)
from ..settings import setting_pdftotext_path
from .exceptions import ParserError, ParserUnknownFile
mimetype_registry = {}
logger = logging.getLogger(__name__)
def register_parser(mimetypes, parsers):
for mimetype in mimetypes:
for parser in parsers:
try:
parser_instance = parser()
except ParserError:
# If parser fails initialization is not added to the list for this mimetype
pass
else:
mimetype_registry.setdefault(mimetype, []).append(parser_instance)
def parse_document_page(document_page, descriptor=None, mimetype=None):
logger.debug('executing')
logger.debug('document_page: %s', document_page)
logger.debug('document mimetype: %s', document_page.document.file_mimetype)
if not mimetype:
mimetype = document_page.document.file_mimetype
if mimetype.startswith('text/'):
if mimetype not in CONVERTER_OFFICE_FILE_MIMETYPES:
mimetype = 'text/plain'
logger.debug('fallback to mimetype text/plain')
logger.debug('used mimetype: %s', mimetype)
try:
for parser in mimetype_registry[mimetype]:
try:
parser.parse(document_page, descriptor)
except ParserError:
# If parser raises error, try next parser in the list
pass
else:
# If parser was successfull there is no need to try
# others in the list for this mimetype
return
raise ParserError('Parser list exhausted')
except KeyError:
raise ParserUnknownFile
class Parser(object):
"""
Parser base class
"""
def parse(self, document_page, descriptor=None):
raise NotImplementedError('Your %s class has not defined a parse() method, which is required.', self.__class__.__name__)
class SlateParser(Parser):
"""
Parser for PDF files using the slate library for Python
"""
def parse(self, document_page, descriptor=None):
logger.debug('Starting SlateParser')
if not descriptor:
descriptor = document_page.document_version.open()
pdf_pages = slate.PDF(descriptor)
descriptor.close()
if pdf_pages[document_page.page_number - 1] == b'\x0c':
logger.debug('The Slate parser didn\'t return any output')
raise ParserError('No output')
document_page.content = pdf_pages[document_page.page_number - 1]
document_page.page_label = _('Text extracted from PDF')
document_page.save()
class OfficeParser(Parser):
"""
Parser for office document formats
"""
def parse(self, document_page, descriptor=None):
logger.debug('executing')
try:
office_converter = OfficeConverter()
document_file = document_page.document.document_save_to_temp_dir(document_page.document.checksum)
logger.debug('document_file: %s', document_file)
office_converter.convert(document_file, mimetype=document_page.document.file_mimetype)
if office_converter.exists:
input_filepath = office_converter.output_filepath
logger.debug('office_converter.output_filepath: %s', input_filepath)
# Now that the office document has been converted to PDF
# call the coresponding PDF parser in this new file
parse_document_page(document_page, descriptor=open(input_filepath), mimetype='application/pdf')
else:
raise ParserError
except OfficeConversionError as exception:
logger.error(exception)
raise ParserError
class PopplerParser(Parser):
"""
PDF parser using the pdftotext execute from the poppler package
"""
def __init__(self):
self.pdftotext_path = setting_pdftotext_path.value if setting_pdftotext_path.value else '/usr/bin/pdftotext'
if not os.path.exists(self.pdftotext_path):
raise ParserError('cannot find pdftotext executable')
logger.debug('self.pdftotext_path: %s', self.pdftotext_path)
def parse(self, document_page, descriptor=None):
logger.debug('parsing PDF with PopplerParser')
pagenum = str(document_page.page_number)
if descriptor:
destination_descriptor, temp_filepath = tempfile.mkstemp(dir=setting_temporary_directory.value)
copyfile(descriptor, temp_filepath)
document_file = temp_filepath
else:
document_file = document_page.document.document_save_to_temp_dir(document_page.document.checksum)
logger.debug('document_file: %s', document_file)
logger.debug('parsing PDF page %s', pagenum)
command = []
command.append(self.pdftotext_path)
command.append('-f')
command.append(pagenum)
command.append('-l')
command.append(pagenum)
command.append(document_file)
command.append('-')
proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
return_code = proc.wait()
if return_code != 0:
logger.error(proc.stderr.readline())
raise ParserError
output = proc.stdout.read()
if output == b'\x0c':
logger.debug('Parser didn\'t return any output')
raise ParserError('No output')
document_page.content = output
document_page.page_label = _('Text extracted from PDF')
document_page.save()
#register_parser(mimetypes=['application/pdf'], parsers=[PopplerParser, SlateParser])
# register_parser(mimetypes=office_converter.CONVERTER_OFFICE_FILE_MIMETYPES, parsers=[OfficeParser]) # TODO: FIX

View File

@@ -1,10 +0,0 @@
class ParserError(Exception):
"""
Raised when a text parser fails to understand a file it been passed
or the resulting parsed text is invalid
"""
pass
class ParserUnknownFile(Exception):
pass

View File

@@ -1 +1,20 @@
from __future__ import absolute_import, unicode_literals
from django.utils.translation import ugettext_lazy as _
from mayan.apps.permissions import PermissionNamespace
namespace = PermissionNamespace(label=_('Redactions'), name='redactions')
permission_redaction_create = namespace.add_permission(
label=_('Create new redactions'), name='redaction_create'
)
permission_redaction_delete = namespace.add_permission(
label=_('Delete redactions'), name='redaction_delete'
)
permission_redaction_edit = namespace.add_permission(
label=_('Edit redactions'), name='redaction_edit'
)
permission_redaction_view = namespace.add_permission(
label=_('View existing redactions'), name='redaction_view'
)

View File

@@ -18,22 +18,10 @@
.cropper-main img {
max-width: 100%;
}
/*
.cropper-preview {
width: 100%;
overflow: hidden;
}
.cropper-preview img {
max-width: 100%;
}
*/
</style>
{% endblock %}
{% block content %}
<div id="cropper-result"></div>
<div class="cropper-main">
<img src="{{ document_page.get_api_image_url }}">
</div>
@@ -52,6 +40,36 @@
var containerData;
var $image = $('.cropper-main img');
var cropperInstance;
var defaultArguments = {
left: 10,
top: 10,
right: 10,
bottom: 10,
fillcolor: '#000000',
}
var initialArguments = JSON.parse($('#id_arguments').text() || JSON.stringify(defaultArguments));
var callbackCrop = function (data) {
var crop_left = (data.detail.x / pic_real_width * 100).toFixed(2);
var crop_top = (data.detail.y / pic_real_height * 100).toFixed(2);
var crop_right = (100.001 - (data.detail.x + data.detail.width) / pic_real_width * 100).toFixed(2);
var crop_bottom = (100.001 - (data.detail.y + data.detail.height) / pic_real_height * 100).toFixed(2);
var arguments = {
'left': parseFloat(crop_left),
'top': parseFloat(crop_top),
'right': parseFloat(crop_right),
'bottom': parseFloat(crop_bottom),
'fillcolor': '#000000',
}
$('#id_arguments').text(JSON.stringify(arguments));
}
jQuery(document).ready(function() {
$('.help-block').hide();
$('label').hide();
});
$.getScript("{% static 'redactions/node_modules/cropperjs/dist/cropper.js' %}")
.done(function (script, textStatus) {
@@ -65,45 +83,22 @@
.on('load', function () {
pic_real_width = this.width;
pic_real_height = this.height;
console.log('loaded');
});
cropperInstance = $image.cropper({
crop: function (data) {
crop_left = (data.detail.x / pic_real_width * 100).toFixed(2);
crop_top = (data.detail.y / pic_real_height * 100).toFixed(2);
crop_right = (100.001 - (data.detail.x + data.detail.width) / pic_real_width * 100).toFixed(2);
crop_bottom = (100.001 - (data.detail.y + data.detail.height) / pic_real_height * 100).toFixed(2);
$('#id_left').val(crop_left);
$('#id_top').val(crop_top);
$('#id_right').val(crop_right);
$('#id_bottom').val(crop_bottom);
var arguments = {
'left': crop_left,
'top': crop_top,
'right': crop_right,
'bottom': crop_bottom,
'fillcolor': '#000000',
}
//$('#id_arguments').text(JSON.stringify(arguments));
},
crop: callbackCrop,
mouseWheelZoom: false,
movable: false,
//preview: '.cropper-preview',
ready: function () {
canvasData = $image.cropper('getCanvasData');
containerData = $image.cropper('getContainerData');
var arguments = JSON.parse($('#id_arguments').text());
console.log(arguments);
$image.cropper('setCropBoxData', {
left: Math.round(arguments.left / 100.0 * canvasData.width + canvasData.left),
top: Math.round(arguments.top / 100.0 * canvasData.height + canvasData.top),// + canvasData.top),
width: Math.round((100.0 - arguments.right - arguments.left) / 100.0 * canvasData.width),// + canvasData.left,
height: Math.round((100.0 - arguments.bottom - arguments.top) / 100.0 * canvasData.height),// + canvasData.top),
left: initialArguments.left / 100.0 * canvasData.width + canvasData.left,
top: initialArguments.top / 100.0 * canvasData.height + canvasData.top,
width: (100.0 - initialArguments.right - initialArguments.left) / 100.0 * canvasData.width,
height: (100.0 - initialArguments.bottom - initialArguments.top) / 100.0 * canvasData.height,
});
},
rotatable: false,

View File

@@ -3,7 +3,8 @@ from __future__ import unicode_literals
from django.conf.urls import url
from .views import (
RedactionCreateView, RedactionEditView, RedactionListView,
RedactionCreateView, RedactionDeleteView, RedactionEditView,
RedactionListView,
)
@@ -16,13 +17,14 @@ urlpatterns = [
regex=r'^document_pages/(?P<pk>\d+)/redactions/$',
view=RedactionListView.as_view(), name='redaction_list'
),
#url(
# regex=r'^delete/(?P<pk>\d+)/$', view=RedactionDeleteView.as_view(),
# name='redaction_delete'
#),
url(
regex=r'^edit/(?P<pk>\d+)/$', view=RedactionEditView.as_view(),
name='redaction_edit'
regex=r'^redactions/(?P<pk>\d+)/delete/$',
view=RedactionDeleteView.as_view(), name='redaction_delete'
),
url(
regex=r'^redactions/(?P<pk>\d+)/edit/$',
view=RedactionEditView.as_view(), name='redaction_edit'
),
]
api_urls = []

View File

@@ -1,60 +1,54 @@
from __future__ import absolute_import, unicode_literals
from django.contrib import messages
from django.core.exceptions import PermissionDenied
from django.core.paginator import Paginator, EmptyPage
import logging
from django.core.urlresolvers import reverse
from django.http import Http404, HttpResponseRedirect
from django.shortcuts import get_object_or_404, render_to_response
from django.template import RequestContext
from django.utils.translation import ugettext_lazy as _
from mayan.apps.common.generics import (
SingleObjectCreateView, SingleObjectEditView, SingleObjectListView
SingleObjectCreateView, SingleObjectDeleteView, SingleObjectEditView,
SingleObjectListView
)
from mayan.apps.common.mixins import ExternalObjectMixin
from mayan.apps.converter.models import Transformation
from mayan.apps.converter.transformations import TransformationDrawRectangle
from mayan.apps.converter.views import TransformationListView
from mayan.apps.documents.models import Document, DocumentPage
from mayan.apps.converter.transformations import TransformationDrawRectanglePercent
from mayan.apps.documents.models import DocumentPage
from .forms import RedactionCoordinatesForm, RedactionForm
from .forms import RedactionCoordinatesForm
from .icons import icon_redactions
from .links import link_redaction_create
from .models import Redaction
from .permissions import (
permission_redaction_create, permission_redaction_delete,
permission_redaction_edit, permission_redaction_view
)
logger = logging.getLogger(__name__)
class RedactionCreateView(ExternalObjectMixin, SingleObjectCreateView):
external_object_class = DocumentPage
external_object_pk_url_kwarg = 'pk'
form_class = RedactionForm
form_class = RedactionCoordinatesForm
model = Redaction
#object_permission =
object_permission = permission_redaction_create
template_name = 'redactions/cropper.html'
def form_valid(self, form):
instance = form.save(commit=False)
instance.name = TransformationDrawRectangle.name
instance.content_object = self.external_object
instance.name = TransformationDrawRectanglePercent.name
instance.save()
#messages.success(self.request, _('Redaction created successfully.'))
return HttpResponseRedirect(self.get_success_url())
return super(RedactionCreateView, self).form_valid(form)
def get_extra_context(self, **kwargs):
return {
'object': self.external_object,
'title': _(
'Create redaction for document page: %s'
) % self.external_object
context = {
'document_page': self.external_object,
'redaction': self.object,
'title': _('Create redaction for: %s') % self.external_object
}
def get_form_kwargs(self):
"""
Returns the keyword arguments for instantiating the form.
"""
kwargs = super(RedactionCreateView, self).get_form_kwargs()
kwargs.update({'document_page': self.external_object})
return kwargs
return context
def get_post_action_redirect(self):
return reverse(
@@ -64,27 +58,49 @@ class RedactionCreateView(ExternalObjectMixin, SingleObjectCreateView):
)
class RedactionDeleteView(SingleObjectDeleteView):
model = Redaction
object_permission = permission_redaction_delete
def get_post_action_redirect(self):
return reverse(
viewname='redactions:redaction_list', kwargs={
'pk': self.object.content_object.pk
}
)
def get_extra_context(self):
return {
'content_object': self.object.content_object,
'navigation_object_list': ('content_object', 'redaction'),
'previous': reverse(
viewname='redactions:redaction_list', kwargs={
'pk': self.object.content_object.pk
}
),
'redaction': self.object,
'title': _(
'Delete refaction for: %(content_object)s?'
) % {
'content_object': self.object.content_object
},
}
class RedactionEditView(SingleObjectEditView):
form_class = RedactionCoordinatesForm
model = Redaction
#object_permission =
#page_kwarg = 'page'
#paginate_by = 1
object_permission = permission_redaction_edit
template_name = 'redactions/cropper.html'
def get_extra_context(self, **kwargs):
context = {
#'api_image_data_url': document.get_api_image_url,
'document_page': self.object.content_object,
'hide_help_text': True,
'hide_required_text': True,
'hide_title': True,
'navigation_object_list': ['document_page', 'redaction'],
'redaction': self.object,
'title': _('Edit redaction: %s') % self.object
}
return context
def get_post_action_redirect(self):
@@ -97,7 +113,7 @@ class RedactionEditView(SingleObjectEditView):
class RedactionListView(ExternalObjectMixin, SingleObjectListView):
external_object_class = DocumentPage
#external_object_permission =
object_permission = permission_redaction_view
external_object_pk_url_kwarg = 'pk'
def dispatch(self, request, *args, **kwargs):
@@ -107,38 +123,25 @@ class RedactionListView(ExternalObjectMixin, SingleObjectListView):
def get_extra_context(self):
return {
'hide_object': True,
'object': self.external_object,
#'hide_link': True,
#'hide_object': True,
#'navigation_object_list': ('content_object',),
'no_results_icon': icon_redactions,
#'no_results_main_link': link_transformation_create.resolve(
# context=RequestContext(
# request=self.request, dict_={
# 'content_object': self.content_object
# }
# )
#),
#'no_results_text': _(
# 'Transformations allow changing the visual appearance '
# 'of documents without making permanent changes to the '
# 'document file themselves.'
#),
'no_results_title': _('No redactions exist'),
'no_results_main_link': link_redaction_create.resolve(
context=RequestContext(
request=self.request, dict_={
'object': self.external_object
}
)
),
'no_results_text': _(
'Redactions allow removing access to confidential and '
'sensitive information without having to modify the document.'
),
'no_results_title': _('No existing redactions'),
'title': _('Redactions for: %s') % self.external_object,
}
def get_source_queryset(self):
return Transformation.objects.get_for_object(
return Redaction.objects.get_for_object(
obj=self.external_object
).filter(name__startswith='draw')
result = Transformation.objects.none()
for version in self.external_object.versions.all():
for page in version.pages.all():
result = result | Transformation.objects.get_for_object(obj=page)
return result.filter(name__startswith='draw')
#return Transformation.objects.get_for_object(obj=self.external_object)