# -*- coding: utf-8 -*-

import sys
import glob
import os
import traceback
from invenio.websubmit_file_converter import pdf2hocr2pdf
from resource import ResourceList
from config import *

input_files = glob.glob("*" + CFG_INPUT_EXT)
print input_files
if len(input_files) > 0:
    rl = ResourceList(input_files[0])
else:
    # Failsafe, no input files, just process all pdfs as english.
    rl = ResourceList()
    pdfs = glob.glob("*.pdf")
    rl.add_option('lang', 'eng')
    for f in pdfs:
        rl.add_resource(f)
        rl.add_output_resource(f+'.hocr')

for (type, value) in rl:
    if type == ResourceList.RESOURCE and value.input == True:
        try:
             print value.uri
             lang = rl.get_current_option('lang')
             if not lang:
                 lang = 'eng'
             pdf2hocr2pdf(value.filename, value.filename, ln = lang)
             if os.path.exists(value.filename):
                 print >> sys.stderr, "__DONE: " + value.filename
             if os.path.exists(value.filename+'.hocr'):
                 print >> sys.stderr, "__DONE: " + value.filename + '.hocr'
        except Exception:
             print >> sys.stderr, "__FAIL: " + value.filename
             print >> sys.stderr, "__FAIL: " + value.filename + '.hocr'
             traceback.print_exc(file=sys.stderr)
             pass
