Support passing MIMETypes and actual document filenames to TextParser for better lexer guessing
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
import codecs
|
||||
import logging
|
||||
|
||||
from pygments import highlight
|
||||
from pygments.lexers import TextLexer, guess_lexer, get_lexer_for_filename, ClassNotFound
|
||||
from pygments.lexers import TextLexer, guess_lexer, get_lexer_for_filename, ClassNotFound, get_lexer_for_mimetype
|
||||
from pygments.formatters import ImageFormatter
|
||||
|
||||
DEFAULT_PAGE_WIDTH = 70
|
||||
@@ -13,6 +14,8 @@ SPACE = u' '
|
||||
|
||||
TEXT_PARSER_MIMETYPES = ['text/plain' ,'text/x-python', 'text/html', 'text/x-shellscript']
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TextParser(object):
|
||||
def render_to_viewport(self, filename, page_width=DEFAULT_PAGE_WIDTH, page_height=DEFAULT_PAGE_HEIGHT, fill_last_page=False):
|
||||
@@ -56,26 +59,47 @@ class TextParser(object):
|
||||
|
||||
return pages
|
||||
|
||||
def render_to_image(self, filename, page_width=DEFAULT_PAGE_WIDTH, page_height=DEFAULT_PAGE_HEIGHT, page_number=None, lexer=None, line_numbers=True, line_number_pad=DEFAULT_LINE_NUMBER_PAD):
|
||||
def render_to_image(self, filename, mimetype=None, actual_filename=None, page_width=DEFAULT_PAGE_WIDTH, page_height=DEFAULT_PAGE_HEIGHT, page_number=None, lexer=None, line_numbers=True, line_number_pad=DEFAULT_LINE_NUMBER_PAD):
|
||||
"""
|
||||
Turn a list of pages and lines and product and image representation,
|
||||
selecting the best parser possible based on the filename and contents
|
||||
"""
|
||||
pages = self.render_to_viewport(filename, page_width, page_height, fill_last_page=True)
|
||||
|
||||
|
||||
if not lexer:
|
||||
# Read entire file to guess the lexer
|
||||
with codecs.open(filename, 'r', 'utf-8') as descriptor:
|
||||
file_data = descriptor.read()
|
||||
if not lexer:
|
||||
try:
|
||||
lexer = get_lexer_for_filename(filename, file_data)
|
||||
except ClassNotFound, err:
|
||||
if mimetype:
|
||||
try:
|
||||
lexer = get_lexer_for_mimetype(mimetype)
|
||||
except ClassNotFound:
|
||||
pass
|
||||
else:
|
||||
logger.debug('get_lexer_for_mimetype: %s' % lexer)
|
||||
else:
|
||||
# Read entire file to guess the lexer
|
||||
with codecs.open(filename, 'r', 'utf-8') as descriptor:
|
||||
logger.debug('guessing lexer for file: %s' % filename)
|
||||
|
||||
file_data = descriptor.read()
|
||||
if not lexer:
|
||||
try:
|
||||
lexer = guess_lexer(file_data)
|
||||
except ClassNotFound:
|
||||
lexer = TextLexer()
|
||||
if actual_filename:
|
||||
lexer = get_lexer_for_filename(actual_filename, file_data)
|
||||
else:
|
||||
lexer = get_lexer_for_filename(filename, file_data)
|
||||
except ClassNotFound, err:
|
||||
logger.debug('get_lexer_for_filename error: %s', err)
|
||||
try:
|
||||
lexer = guess_lexer(file_data)
|
||||
except ClassNotFound:
|
||||
lexer = TextLexer()
|
||||
logger.debug('unable to guess lexer')
|
||||
else:
|
||||
logger.debug('guess_lexer returned: %s' % lexer)
|
||||
else:
|
||||
logger.debug('get_lexer_for_filename returned: %s' % lexer)
|
||||
|
||||
logger.debug('lexer: %s' % lexer)
|
||||
|
||||
if page_number:
|
||||
# Render a single page into image
|
||||
return highlight(u'\n'.join(pages[page_number - 1]), lexer, ImageFormatter(line_number_start=(page_number - 1) * page_height + 1, line_numbers=line_numbers, line_number_pad=line_number_pad))
|
||||
|
||||
Reference in New Issue
Block a user