#!/bin/sh

echo "Grid OCR bulk   start."
echo "uname"
uname -a
cat /etc/redhat-release

echo "which pdfopt"
which pdfopt


#download ocropus software from the location of the parameter of the script
mkdir ocropus-0.3.1-i386
cd ocropus-0.3.1-i386
echo "Downloading ocropus from location: $1 "
wget $1
echo "Unzipping software archive..."
tar -xvf org.gcube.application.inspire-ocropus-servicearchive*
echo "Unzipping ocropus..."
tar -xvf ./Ocropus/Ocropus*
cd ..
ls -la

export OCROPUS_PATH=./ocropus-0.3.1-i386
PYTHON_MAIN=$OCROPUS_PATH/run.py
export OCROSCRIPTS=$OCROPUS_PATH/share/ocropus/scripts
export OCRODATA=$OCROPUS_PATH/share/ocropus
export TESSDATA_PREFIX=$OCROPUS_PATH/share/
export LD_LIBRARY_PATH=$OCROPUS_PATH/lib
export PYTHONPATH=$OCROPUS_PATH/python
if [ -z "$PYTHONPATH" ] ; then
  export PYTHONPATH=$OCROPUS_PATH/python:$PYTHONPATH
else
  export PYTHONPATH=$OCROPUS_PATH/python
fi

echo "unzipping input pdfs"
unzip input_pdfs.zip
echo "before OCR"
ls -la

echo "OCR starting...    (executing $PYTHON_MAIN ) "
python $PYTHON_MAIN
echo "OCR finished..."

echo "after OCR"
ls -la

#deleting temporary pdfs
rm -rf tmp*

echo "zipping output pdfs"
zip pdfs.zip  *.pdf 
echo "zipping output hocs"
zip hocrs.zip *.hocr

echo "in the end"
ls -la

rm -rf $OCROPUS_PATH conversion* tmp*
rm *.pdf *.hocr

if [ $? != 0 ]
then
	echo "An error has occured above"
	exit 1
else
	echo "No errors occured"
	exit 0
fi
