/*
 * Decompiled with CFR 0.152.
 */
package org.gcube.execution.textExtraction.job;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.rmi.Remote;
import java.rmi.RemoteException;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.apache.axis.message.addressing.Address;
import org.apache.axis.message.addressing.AttributedURI;
import org.apache.axis.message.addressing.EndpointReference;
import org.apache.axis.message.addressing.EndpointReferenceType;
import org.apache.axis.types.URI;
import org.gcube.application.framework.contentmanagement.exceptions.OCRException;
import org.gcube.application.framework.contentmanagement.exceptions.ReadingRSException;
import org.gcube.application.framework.contentmanagement.exceptions.ServiceEPRRetrievalException;
import org.gcube.application.framework.contentmanagement.util.DocumentInfos;
import org.gcube.application.framework.core.cache.RIsManager;
import org.gcube.application.framework.search.library.exception.QuerySubmissionSearchException;
import org.gcube.application.framework.search.library.exception.SearchSystemPortRetrievalException;
import org.gcube.application.framework.search.library.exception.URIRetrievalFromISCacheException;
import org.gcube.application.framework.search.library.exception.gRS2CreationException;
import org.gcube.common.core.contexts.GCUBERemotePortTypeContext;
import org.gcube.common.core.faults.GCUBERetryEquivalentFault;
import org.gcube.common.core.scope.GCUBEScope;
import org.gcube.common.core.security.GCUBESecurityManager;
import org.gcube.execution.textExtraction.job.utils.JobDigitalObject;
import org.gcube.execution.textExtraction.job.utils.JobResultSetConsumer;
import org.gcube.execution.textExtraction.job.utils.TextExtractionUtils;
import org.gcube.informationsystem.cache.SrvType;
import org.gcube.resource.discovery.api.ResourceDiscoveryAPI;
import org.gcube.searchsystem.searchsystemservice.stubs.SearchResponse;
import org.gcube.searchsystem.searchsystemservice.stubs.SearchSystemServicePortType;
import org.gcube.searchsystem.searchsystemservice.stubs.service.SearchMasterServiceAddressingLocator;

public class TextExtractionJob {
    protected static AtomicInteger SMid = new AtomicInteger();

    public static void main(String[] args) {
        if (args.length != 11) {
            System.out.println("This program requires 11 arguments. The number of arguments passed is: " + args.length);
            System.out.println("Printing arguments: ");
            for (int i = 0; i < args.length; ++i) {
                System.out.println("Argument: " + args[i]);
            }
            System.exit(1);
        } else {
            String servletUrl = args[0].replaceAll("__", "//");
            System.err.println("ServletURL: " + servletUrl);
            String query = args[1].replaceAll("__", "//").replaceAll("_space_", " ");
            System.err.println("query: " + query);
            String ftpHost = args[2].replaceAll("__", "//");
            System.err.println("ftpHost: " + ftpHost);
            String ftpFolderName = args[3].replaceAll("__", "//");
            System.err.println("ftpFolderName: " + ftpFolderName);
            String scope = args[4].replaceAll("__", "//");
            System.err.println("scope: " + scope);
            String dtsInputCollectionId = args[5].replaceAll("__", "//");
            System.err.println("dtsInputCollectionId: " + dtsInputCollectionId);
            String ftpUser = args[6].replaceAll("__", "//");
            System.err.println("ftpUser: " + ftpUser);
            String ftpPassword = args[7].replaceAll("__", "//");
            System.err.println("ftpPassword: " + ftpPassword);
            System.err.println("ftpPort: " + args[8]);
            int ftpPort = Integer.parseInt(args[8]);
            String suggestedDtsEpr = args[9].replaceAll("__", "//");
            String suggestedOCREpr = args[10].replaceAll("__", "//");
            String[] parsedFolder = ftpFolderName.split("/");
            System.out.println("The length is:" + parsedFolder.length);
            String repositoryId = parsedFolder[parsedFolder.length - 1];
            String[] parsedQuery = query.split(" ");
            String guid = parsedQuery[parsedQuery.length - 1];
            String metadataFolderName = repositoryId + "/metaDirectory";
            ArrayList<Object> dobjs = new ArrayList();
            List documentInfos = new ArrayList();
            System.out.println("********************************************************");
            System.out.println("TextExtractionBenchmarking: A Job has started. Parameters are: ");
            System.out.println("Scope: " + scope);
            System.out.println("FTPFolderName: " + ftpFolderName);
            System.out.println("Query: " + query);
            System.out.println("FTPLocationDirectory: " + ftpHost);
            System.out.println("DTSInputCollectionId: " + dtsInputCollectionId);
            System.out.println("********************************************************");
            try {
                System.out.println("TextExtractionBenchmarking: Performing the Search");
                JobResultSetConsumer rs = TextExtractionJob.genericSearch(query, scope);
                dobjs = rs.getAllResultIds(scope, guid);
                System.out.println("TextExtractionBenchmarking: Got Back - " + dobjs.size() + " results.");
                System.out.println("TextExtractionBenchmarking: Looking for PDF locations.");
                ResourceDiscoveryAPI rd = new ResourceDiscoveryAPI();
                ArrayList<String> mimetypes = new ArrayList<String>();
                mimetypes.add("application/pdf");
                rd.setMimeTypes(mimetypes);
                ArrayList<org.gcube.resource.discovery.api.DocumentInfos> docInfos = new ArrayList<org.gcube.resource.discovery.api.DocumentInfos>();
                System.out.println("Before resource discovery - nmber of dObjects: " + dobjs.size());
                for (int i = 0; i < dobjs.size(); ++i) {
                    org.gcube.resource.discovery.api.DocumentInfos docInfo = new org.gcube.resource.discovery.api.DocumentInfos();
                    docInfo.setInputURI(((JobDigitalObject)dobjs.get(i)).getObjectId());
                    docInfo.setReferenceURI(((JobDigitalObject)dobjs.get(i)).getTitle());
                    docInfos.add(docInfo);
                }
                rd.setDocumentInfos(docInfos);
                try {
                    documentInfos = rd.getResourcesParallel();
                }
                catch (Exception e) {
                    System.err.println(e.getCause());
                    System.exit(1);
                }
            }
            catch (URIRetrievalFromISCacheException e) {
                System.err.println("Problem while retrieving resources.");
                System.err.println(e.getCause());
                System.exit(1);
            }
            catch (URISyntaxException e) {
                System.err.println("Problem while retrieving resources.");
                System.err.println(e.getCause());
                System.exit(1);
            }
            catch (gRS2CreationException e) {
                System.err.println("Problem while retrieving resources.");
                System.err.println(e.getCause());
                System.exit(1);
            }
            catch (Exception e) {
                System.err.println("Problem while retrieving resources.");
                System.err.println(e.getCause());
                System.exit(1);
            }
            System.out.println("************* Passing document infos to DTS! *************");
            System.out.println("TextExtractionBenchmarking: Creating input file for DTS and file with metadata.");
            UUID uuid = UUID.randomUUID();
            String randomUUIDString = uuid.toString();
            UUID uuidLocal = UUID.randomUUID();
            String randomUIIDStringFTP = uuidLocal.toString();
            FileWriter fstream = null;
            try {
                fstream = new FileWriter("metaInfo_" + randomUIIDStringFTP + ".txt");
            }
            catch (IOException e1) {
                System.err.println("Problem while creating file in disc");
            }
            BufferedWriter out = new BufferedWriter(fstream);
            UUID uuidLocal2 = UUID.randomUUID();
            String randomUIIDStringFailures = uuidLocal2.toString();
            FileWriter fstream2 = null;
            try {
                fstream2 = new FileWriter("metaInfo_" + randomUIIDStringFailures + ".txt");
            }
            catch (IOException e1) {
                System.err.println("Problem while creating file in disc");
            }
            BufferedWriter out1 = new BufferedWriter(fstream2);
            String documentList = new String();
            ArrayList<DocumentInfos> diList = new ArrayList<DocumentInfos>();
            for (int i = 0; i < documentInfos.size(); ++i) {
                if (!((org.gcube.resource.discovery.api.DocumentInfos)documentInfos.get(i)).getOutputURI().equals("not_found")) {
                    UUID uuidFName = UUID.randomUUID();
                    String randomUUIDStringFileName = uuidFName.toString();
                    String newDocumentRecord = "\"" + ((org.gcube.resource.discovery.api.DocumentInfos)documentInfos.get(i)).getOutputURI() + "\"" + " \"" + randomUUIDStringFileName + "\" \n";
                    documentList = documentList + newDocumentRecord;
                    DocumentInfos docInfoOCR = new DocumentInfos();
                    docInfoOCR.setDocumentId(((org.gcube.resource.discovery.api.DocumentInfos)documentInfos.get(i)).getInputURI());
                    docInfoOCR.setName(randomUUIDStringFileName);
                    docInfoOCR.setPdfURI(((org.gcube.resource.discovery.api.DocumentInfos)documentInfos.get(i)).getOutputURI());
                    docInfoOCR.setReferenceId(((org.gcube.resource.discovery.api.DocumentInfos)documentInfos.get(i)).getReferenceURI());
                    diList.add(docInfoOCR);
                    try {
                        out.write(newDocumentRecord + "\n");
                    }
                    catch (IOException e) {
                        e.printStackTrace();
                    }
                    continue;
                }
                String newDocumentRecord = "\"" + ((org.gcube.resource.discovery.api.DocumentInfos)documentInfos.get(i)).getInputURI() + "\"" + " \"" + ((org.gcube.resource.discovery.api.DocumentInfos)documentInfos.get(i)).getReferenceURI() + "\" \n";
                try {
                    out1.write(newDocumentRecord + "\n");
                    continue;
                }
                catch (IOException e) {
                    e.printStackTrace();
                }
            }
            try {
                out.close();
            }
            catch (IOException e2) {
                e2.printStackTrace();
            }
            try {
                out1.close();
            }
            catch (IOException e2) {
                e2.printStackTrace();
            }
            File failuresFile = new File("metaInfo_" + randomUIIDStringFailures + ".txt");
            if (failuresFile.exists()) {
                TextExtractionUtils.storeFileInFTP(randomUIIDStringFailures, ftpHost, ftpPort, ftpUser, ftpPassword, metadataFolderName + "/discoveryFailures");
                failuresFile.delete();
            }
            System.out.println("Number of documents in diList: " + diList.size());
            if (diList.size() == 0) {
                File dtsFile = new File("metaInfo_" + randomUIIDStringFTP + ".txt");
                if (dtsFile.exists()) {
                    dtsFile.delete();
                }
                System.exit(0);
            }
            Object fileId = null;
            ArrayList<DocumentInfos> failedDocuments = new ArrayList<DocumentInfos>();
            try {
                System.out.println("TextExtractionBenchmarking: Calling DTS.");
                System.out.println("Uploading file in FTP");
                TextExtractionUtils.storeFileInFTP(randomUIIDStringFTP, ftpHost, ftpPort, ftpUser, ftpPassword, metadataFolderName + "/dtsInputFiles");
                String fileLocation = "ftp://" + ftpUser + ":" + ftpPassword + "@" + ftpHost + "/" + metadataFolderName + "/dtsInputFiles/metaInfo_" + randomUIIDStringFTP + ".txt";
                System.out.println("The FTP URL for DTS input is: " + fileLocation);
                String rsLocator = null;
                try {
                    rsLocator = TextExtractionUtils.transformPDFDocumentsToText(fileLocation, ftpHost, ftpUser, Integer.toString(ftpPort), ftpPassword, ftpFolderName, scope, suggestedDtsEpr);
                }
                catch (Exception e) {
                    e.printStackTrace();
                    System.err.println("Problem while Transforming Documents with DTS - trying with another epr");
                    try {
                        rsLocator = TextExtractionUtils.transformPDFDocumentsToText(fileLocation, ftpHost, ftpUser, Integer.toString(ftpPort), ftpPassword, ftpFolderName, scope, "");
                    }
                    catch (Exception e1) {
                        e1.printStackTrace();
                        System.err.println("Problem while Transforming Documents with DTS - passing all documents to ocr");
                        failedDocuments = diList;
                    }
                }
                System.out.println("TextExtractionBenchmarking: About to get Reports");
                try {
                    failedDocuments = TextExtractionUtils.getListOfFailuresFromReport(rsLocator, diList, ftpHost, repositoryId, randomUUIDString, query);
                }
                catch (ReadingRSException e) {
                    System.err.println("Problem while getting reports from DTS");
                    e.printStackTrace();
                    failedDocuments = diList;
                }
                TextExtractionUtils.storeFileInFTP(randomUUIDString, ftpHost, ftpPort, ftpUser, ftpPassword, metadataFolderName);
                TextExtractionUtils.deleteFileFromFTP("metaInfo_" + randomUIIDStringFTP + ".txt", ftpHost, ftpPort, ftpUser, ftpPassword, metadataFolderName + "/dtsInputFiles");
                System.out.println("TextExtractionBenchmarking: Transformation from DTS completed. Number of failures: " + failedDocuments.size());
            }
            catch (GCUBEScope.MalformedScopeExpressionException e) {
                System.err.println(e.getCause());
            }
            catch (Exception e) {
                e.printStackTrace();
            }
            try {
                System.out.println("TextExtractionBenchmarking: OCR");
                System.out.println("Number of failed documents: " + failedDocuments.size() + " - Passing them to OCR Service");
                if (failedDocuments.size() > 0) {
                    UUID uuid2 = UUID.randomUUID();
                    String randomUUIDString2 = uuid2.toString();
                    ArrayList<String> outputURIs = TextExtractionUtils.performOCRtoPDF_HTTPInput(failedDocuments, suggestedOCREpr, scope, ftpHost, ftpUser, ftpPassword, Integer.toString(ftpPort), randomUUIDString2, repositoryId);
                    System.out.println("TextExtractionBenchmarking: Finished OCR from Job: ");
                    if (outputURIs != null) {
                        System.out.println("TextExtractionBenchmarking: " + outputURIs.size() + " documents transformed.");
                        TextExtractionUtils.storeFileInFTP(randomUUIDString2, ftpHost, ftpPort, ftpUser, ftpPassword, metadataFolderName + "/failedTransformations");
                    }
                }
            }
            catch (ServiceEPRRetrievalException e) {
                System.err.println(e.getCause());
                System.exit(1);
            }
        }
        System.exit(0);
    }

    private static String createFileRecord(String fileName, String fileRepId) {
        String record = "\"" + fileName + "\"" + " " + "\"" + fileRepId + "\"";
        return record;
    }

    private static String createOAIDCRecord(String documentId, String driverDocumentId) throws OCRException {
        TransformerFactory transFact = TransformerFactory.newInstance();
        File xsltFile = new File("FieldsToOAI_DC");
        StreamSource xsltSource = new StreamSource(xsltFile);
        StringBuffer buffer = new StringBuffer();
        try {
            int ch;
            Transformer trans = transFact.newTransformer(xsltSource);
            trans.setParameter("identifier", documentId);
            trans.setParameter("reference", driverDocumentId);
            ByteArrayOutputStream ba_stream = new ByteArrayOutputStream();
            trans.transform(new StreamSource(), new StreamResult(ba_stream));
            InputStreamReader isr = new InputStreamReader((InputStream)new ByteArrayInputStream(ba_stream.toByteArray()), "UTF8");
            BufferedReader in2 = new BufferedReader(isr);
            while ((ch = ((Reader)in2).read()) > -1) {
                buffer.append((char)ch);
            }
            ((Reader)in2).close();
            return buffer.toString();
        }
        catch (TransformerConfigurationException e) {
            System.err.println(e.getCause());
        }
        catch (TransformerException e) {
            System.err.println(e.getCause());
        }
        catch (UnsupportedEncodingException e) {
            System.err.println(e.getCause());
        }
        catch (IOException e) {
            System.err.println(e.getCause());
        }
        return null;
    }

    public static JobResultSetConsumer genericSearch(String query, String scope) throws URIRetrievalFromISCacheException, URISyntaxException, gRS2CreationException {
        EndpointReference[] searchMasters = TextExtractionJob.findSearchMasterEPR(scope);
        System.out.println("The scope is for search master is: " + scope);
        if (searchMasters == null || searchMasters.length == 0) {
            System.out.println("No Search Masters Found");
        } else {
            System.out.println("Number of Search Master EPRs: " + searchMasters.length);
        }
        String searchEPR = null;
        JobResultSetConsumer searchRSC = null;
        for (int i = 0; i < searchMasters.length; ++i) {
            try {
                searchEPR = TextExtractionJob.submitSearch(query, searchMasters[SMid.getAndIncrement() % searchMasters.length].getAddress().toString(), scope);
            }
            catch (URI.MalformedURIException e) {
                e.printStackTrace();
                System.err.println(e.getCause());
            }
            catch (SearchSystemPortRetrievalException e) {
                e.printStackTrace();
                System.err.println(e.getCause());
            }
            catch (QuerySubmissionSearchException e) {
                e.printStackTrace();
                System.err.println(e.getCause());
            }
            if (searchEPR != null) {
                System.out.println("The rs EPR returned is: " + searchEPR);
                searchRSC = new JobResultSetConsumer(searchEPR);
                return searchRSC;
            }
            System.out.println("No RS epr found - it is null.");
        }
        return null;
    }

    protected static EndpointReference[] findSearchMasterEPR(String scope) throws URIRetrievalFromISCacheException {
        EndpointReference[] searchMasters = null;
        System.out.println("Looking for a Search Master epr - scope is: " + scope);
        try {
            searchMasters = RIsManager.getInstance().getISCache(GCUBEScope.getScope((String)scope)).getEPRsFor("Search", "SearchSystemService", SrvType.SIMPLE.name());
        }
        catch (Exception e) {
            System.out.println(e.getMessage());
            throw new URIRetrievalFromISCacheException((Throwable)e);
        }
        return searchMasters;
    }

    protected static String submitSearch(String query, String searchMasterURI, String scope) throws URI.MalformedURIException, SearchSystemPortRetrievalException, QuerySubmissionSearchException {
        String queryToSubmit = query.trim();
        System.out.println("Query to Submit: " + queryToSubmit);
        System.out.println("Query will be submitted to: " + searchMasterURI);
        SearchResponse ret = null;
        try {
            EndpointReferenceType endpoint = new EndpointReferenceType();
            endpoint.setAddress((AttributedURI)new Address(searchMasterURI));
            SearchSystemServicePortType sspt = new SearchMasterServiceAddressingLocator().getSearchSystemServicePortTypePort(endpoint);
            sspt = (SearchSystemServicePortType)GCUBERemotePortTypeContext.getProxy((Remote)sspt, (GCUBEScope)GCUBEScope.getScope((String)scope), (GCUBESecurityManager[])new GCUBESecurityManager[0]);
            ret = sspt.search(queryToSubmit);
        }
        catch (GCUBERetryEquivalentFault e) {
            System.out.println("Error while submitting search query");
            e.printStackTrace();
            System.err.println(e.getCause());
            throw new QuerySubmissionSearchException((Throwable)e);
        }
        catch (RemoteException e) {
            System.out.println("Error while submitting search query");
            e.printStackTrace();
            System.err.println(e.getCause());
            throw new QuerySubmissionSearchException((Throwable)e);
        }
        catch (GCUBEScope.MalformedScopeExpressionException e) {
            e.printStackTrace();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        System.out.println("The rs epr is: " + ret.getResultSetEpr());
        return ret.getResultSetEpr();
    }
}

