diff --git a/apps/ocr/api.py b/apps/ocr/api.py
index 57b1bb54eb..f244b45ae5 100755
--- a/apps/ocr/api.py
+++ b/apps/ocr/api.py
@@ -1,5 +1,6 @@
 #Some code from http://wiki.github.com/hoffstaetter/python-tesseract
 
+import codecs
 import os
 import subprocess
 import tempfile
@@ -14,6 +15,11 @@ from documents.models import Document
 from converter.api import convert_document_for_ocr
 
 from ocr.conf.settings import TESSERACT_PATH
+from ocr.conf.settings import TESSERACT_LANGUAGE
+
+
+class TesseractError(Exception):
+    pass
 
 
 def cleanup(filename):
@@ -23,51 +29,37 @@ def cleanup(filename):
     except OSError:
         pass
 
-class TesseractError(Exception):
-    pass
-#    def __init__(self, status, message):
-#        self.status = status
-#        self.message = message
-
-def get_errors(error_string):
-    '''
-    returns all lines in the error_string that start with the string "error"
-
-    '''
-    lines = error_string.splitlines()
-    return lines[1]
-    #error_lines = (line for line in lines if line.find('error') >= 0)
-    #return '\n'.join(error_lines)
 
 def run_tesseract(input_filename, output_filename_base, lang=None):
     command = [TESSERACT_PATH, input_filename, output_filename_base]
     if lang is not None:
         command += ['-l', lang]
-
-    proc = subprocess.Popen(command, stderr=subprocess.PIPE)
-    return (proc.wait(), proc.stderr.read())
+    
+    proc = subprocess.Popen(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+    return_code = proc.wait()
+    if return_code != 0:
+        error_text = proc.stderr.read()
+        raise TesseractError(error_text)
 
 
 def do_document_ocr(document):
-    for page_index, document_page in enumerate(document.documentpage_set.all()):    
+    for page_index, document_page in enumerate(document.documentpage_set.all()):
         imagefile = convert_document_for_ocr(document, page=page_index)
         desc, filepath = tempfile.mkstemp()
         try:
-            status, error_string = run_tesseract(imagefile, filepath)
-            if status:
-                errors = get_errors(error_string)
-                raise TesseractError(errors)
-        finally:
+            run_tesseract(imagefile, filepath, TESSERACT_LANGUAGE)
             ocr_output = os.extsep.join([filepath, 'txt'])
-
-        f = file(ocr_output)
-        try:
+            f = codecs.open(ocr_output, 'r', 'utf-8')
             document_page = document.documentpage_set.get(page_number=page_index+1)
             document_page.content = f.read().strip()
             document_page.page_label = _(u'Text from OCR')
             document_page.save()
-        finally:
             f.close()
-            cleanup(filepath)
             cleanup(ocr_output)
+        except TesseractError, e:
+            cleanup(filepath)
+            cleanup(imagefile)
+            raise TesseractError(e)
+        finally:
+            cleanup(filepath)
             cleanup(imagefile)