package org.gcube.execution.textExtraction.job;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.rmi.RemoteException;
import java.util.ArrayList;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.apache.axis.message.addressing.Address;
import org.apache.axis.message.addressing.EndpointReference;
import org.apache.axis.message.addressing.EndpointReferenceType;
import org.apache.axis.types.URI;
import org.gcube.application.framework.contentmanagement.exceptions.OCRException;
import org.gcube.application.framework.contentmanagement.exceptions.ReadingRSException;
import org.gcube.application.framework.contentmanagement.exceptions.ServiceEPRRetrievalException;
import org.gcube.application.framework.core.cache.RIsManager;
import org.gcube.application.framework.search.library.exception.QuerySubmissionSearchException;
import org.gcube.application.framework.search.library.exception.SearchSystemPortRetrievalException;
import org.gcube.application.framework.search.library.exception.URIRetrievalFromISCacheException;
import org.gcube.application.framework.search.library.exception.gRS2CreationException;
import org.gcube.common.core.contexts.GCUBERemotePortTypeContext;
import org.gcube.common.core.faults.GCUBERetryEquivalentFault;
import org.gcube.common.core.scope.GCUBEScope;
import org.gcube.common.core.security.GCUBESecurityManager;
import org.gcube.execution.textExtraction.job.utils.JobDigitalObject;
import org.gcube.execution.textExtraction.job.utils.JobResultSetConsumer;
import org.gcube.execution.textExtraction.job.utils.TextExtractionUtils;
import org.gcube.informationsystem.cache.SrvType;
import org.gcube.resource.discovery.api.DocumentInfos;
import org.gcube.resource.discovery.api.ResourceDiscoveryAPI;
import org.gcube.searchsystem.searchsystemservice.stubs.SearchResponse;
import org.gcube.searchsystem.searchsystemservice.stubs.service.SearchMasterServiceAddressingLocator;

/* loaded from: input_file:org/gcube/execution/textExtraction/job/TextExtractionJob.class */
public class TextExtractionJob {
    protected static AtomicInteger SMid = new AtomicInteger();

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v240, types: [java.util.List] */
    public static void main(String[] strArr) {
        if (strArr.length != 11) {
            System.out.println("This program requires 11 arguments. The number of arguments passed is: " + strArr.length);
            System.out.println("Printing arguments: ");
            for (String str : strArr) {
                System.out.println("Argument: " + str);
            }
            System.exit(1);
        } else {
            System.err.println("ServletURL: " + strArr[0].replaceAll("__", "//"));
            String replaceAll = strArr[1].replaceAll("__", "//").replaceAll("_space_", " ");
            System.err.println("query: " + replaceAll);
            String replaceAll2 = strArr[2].replaceAll("__", "//");
            System.err.println("ftpHost: " + replaceAll2);
            String replaceAll3 = strArr[3].replaceAll("__", "//");
            System.err.println("ftpFolderName: " + replaceAll3);
            String replaceAll4 = strArr[4].replaceAll("__", "//");
            System.err.println("scope: " + replaceAll4);
            String replaceAll5 = strArr[5].replaceAll("__", "//");
            System.err.println("dtsInputCollectionId: " + replaceAll5);
            String replaceAll6 = strArr[6].replaceAll("__", "//");
            System.err.println("ftpUser: " + replaceAll6);
            String replaceAll7 = strArr[7].replaceAll("__", "//");
            System.err.println("ftpPassword: " + replaceAll7);
            System.err.println("ftpPort: " + strArr[8]);
            int parseInt = Integer.parseInt(strArr[8]);
            String replaceAll8 = strArr[9].replaceAll("__", "//");
            String replaceAll9 = strArr[10].replaceAll("__", "//");
            String[] split = replaceAll3.split("/");
            System.out.println("The length is:" + split.length);
            String str2 = split[split.length - 1];
            String[] split2 = replaceAll.split(" ");
            String str3 = split2[split2.length - 1];
            String str4 = str2 + "/metaDirectory";
            new ArrayList();
            ArrayList arrayList = new ArrayList();
            System.out.println("********************************************************");
            System.out.println("TextExtractionBenchmarking: A Job has started. Parameters are: ");
            System.out.println("Scope: " + replaceAll4);
            System.out.println("FTPFolderName: " + replaceAll3);
            System.out.println("Query: " + replaceAll);
            System.out.println("FTPLocationDirectory: " + replaceAll2);
            System.out.println("DTSInputCollectionId: " + replaceAll5);
            System.out.println("********************************************************");
            try {
                System.out.println("TextExtractionBenchmarking: Performing the Search");
                ArrayList<JobDigitalObject> allResultIds = genericSearch(replaceAll, replaceAll4).getAllResultIds(replaceAll4, str3);
                System.out.println("TextExtractionBenchmarking: Got Back - " + allResultIds.size() + " results.");
                System.out.println("TextExtractionBenchmarking: Looking for PDF locations.");
                ResourceDiscoveryAPI resourceDiscoveryAPI = new ResourceDiscoveryAPI();
                ArrayList arrayList2 = new ArrayList();
                arrayList2.add("application/pdf");
                resourceDiscoveryAPI.setMimeTypes(arrayList2);
                ArrayList arrayList3 = new ArrayList();
                System.out.println("Before resource discovery - nmber of dObjects: " + allResultIds.size());
                for (int i = 0; i < allResultIds.size(); i++) {
                    DocumentInfos documentInfos = new DocumentInfos();
                    documentInfos.setInputURI(allResultIds.get(i).getObjectId());
                    documentInfos.setReferenceURI(allResultIds.get(i).getTitle());
                    arrayList3.add(documentInfos);
                }
                resourceDiscoveryAPI.setDocumentInfos(arrayList3);
                try {
                    arrayList = resourceDiscoveryAPI.getResourcesParallel();
                } catch (Exception e) {
                    System.err.println(e.getCause());
                    System.exit(1);
                }
            } catch (URISyntaxException e2) {
                System.err.println("Problem while retrieving resources.");
                System.err.println(e2.getCause());
                System.exit(1);
            } catch (URIRetrievalFromISCacheException e3) {
                System.err.println("Problem while retrieving resources.");
                System.err.println(e3.getCause());
                System.exit(1);
            } catch (gRS2CreationException e4) {
                System.err.println("Problem while retrieving resources.");
                System.err.println(e4.getCause());
                System.exit(1);
            } catch (Exception e5) {
                System.err.println("Problem while retrieving resources.");
                System.err.println(e5.getCause());
                System.exit(1);
            }
            System.out.println("************* Passing document infos to DTS! *************");
            System.out.println("TextExtractionBenchmarking: Creating input file for DTS and file with metadata.");
            String uuid = UUID.randomUUID().toString();
            String uuid2 = UUID.randomUUID().toString();
            FileWriter fileWriter = null;
            try {
                fileWriter = new FileWriter("metaInfo_" + uuid2 + ".txt");
            } catch (IOException e6) {
                System.err.println("Problem while creating file in disc");
            }
            BufferedWriter bufferedWriter = new BufferedWriter(fileWriter);
            String uuid3 = UUID.randomUUID().toString();
            FileWriter fileWriter2 = null;
            try {
                fileWriter2 = new FileWriter("metaInfo_" + uuid3 + ".txt");
            } catch (IOException e7) {
                System.err.println("Problem while creating file in disc");
            }
            BufferedWriter bufferedWriter2 = new BufferedWriter(fileWriter2);
            String str5 = new String();
            ArrayList<org.gcube.application.framework.contentmanagement.util.DocumentInfos> arrayList4 = new ArrayList<>();
            for (int i2 = 0; i2 < arrayList.size(); i2++) {
                if (((DocumentInfos) arrayList.get(i2)).getOutputURI().equals("not_found")) {
                    try {
                        bufferedWriter2.write(("\"" + ((DocumentInfos) arrayList.get(i2)).getInputURI() + "\" \"" + ((DocumentInfos) arrayList.get(i2)).getReferenceURI() + "\" \n") + "\n");
                    } catch (IOException e8) {
                        e8.printStackTrace();
                    }
                } else {
                    String uuid4 = UUID.randomUUID().toString();
                    String str6 = "\"" + ((DocumentInfos) arrayList.get(i2)).getOutputURI() + "\" \"" + uuid4 + "\" \n";
                    str5 = str5 + str6;
                    org.gcube.application.framework.contentmanagement.util.DocumentInfos documentInfos2 = new org.gcube.application.framework.contentmanagement.util.DocumentInfos();
                    documentInfos2.setDocumentId(((DocumentInfos) arrayList.get(i2)).getInputURI());
                    documentInfos2.setName(uuid4);
                    documentInfos2.setPdfURI(((DocumentInfos) arrayList.get(i2)).getOutputURI());
                    documentInfos2.setReferenceId(((DocumentInfos) arrayList.get(i2)).getReferenceURI());
                    arrayList4.add(documentInfos2);
                    try {
                        bufferedWriter.write(str6 + "\n");
                    } catch (IOException e9) {
                        e9.printStackTrace();
                    }
                }
            }
            try {
                bufferedWriter.close();
            } catch (IOException e10) {
                e10.printStackTrace();
            }
            try {
                bufferedWriter2.close();
            } catch (IOException e11) {
                e11.printStackTrace();
            }
            File file = new File("metaInfo_" + uuid3 + ".txt");
            if (file.exists()) {
                TextExtractionUtils.storeFileInFTP(uuid3, replaceAll2, parseInt, replaceAll6, replaceAll7, str4 + "/discoveryFailures");
                file.delete();
            }
            System.out.println("Number of documents in diList: " + arrayList4.size());
            if (arrayList4.size() == 0) {
                File file2 = new File("metaInfo_" + uuid2 + ".txt");
                if (file2.exists()) {
                    file2.delete();
                }
                System.exit(0);
            }
            ArrayList<org.gcube.application.framework.contentmanagement.util.DocumentInfos> arrayList5 = new ArrayList<>();
            try {
                System.out.println("TextExtractionBenchmarking: Calling DTS.");
                System.out.println("Uploading file in FTP");
                TextExtractionUtils.storeFileInFTP(uuid2, replaceAll2, parseInt, replaceAll6, replaceAll7, str4 + "/dtsInputFiles");
                String str7 = "ftp://" + replaceAll6 + ":" + replaceAll7 + "@" + replaceAll2 + "/" + str4 + "/dtsInputFiles/metaInfo_" + uuid2 + ".txt";
                System.out.println("The FTP URL for DTS input is: " + str7);
                String str8 = null;
                try {
                    str8 = TextExtractionUtils.transformPDFDocumentsToText(str7, replaceAll2, replaceAll6, Integer.toString(parseInt), replaceAll7, replaceAll3, replaceAll4, replaceAll8);
                } catch (Exception e12) {
                    e12.printStackTrace();
                    System.err.println("Problem while Transforming Documents with DTS - trying with another epr");
                    try {
                        str8 = TextExtractionUtils.transformPDFDocumentsToText(str7, replaceAll2, replaceAll6, Integer.toString(parseInt), replaceAll7, replaceAll3, replaceAll4, "");
                    } catch (Exception e13) {
                        e13.printStackTrace();
                        System.err.println("Problem while Transforming Documents with DTS - passing all documents to ocr");
                        arrayList5 = arrayList4;
                    }
                }
                System.out.println("TextExtractionBenchmarking: About to get Reports");
                try {
                    arrayList5 = TextExtractionUtils.getListOfFailuresFromReport(str8, arrayList4, replaceAll2, str2, uuid, replaceAll);
                } catch (ReadingRSException e14) {
                    System.err.println("Problem while getting reports from DTS");
                    e14.printStackTrace();
                    arrayList5 = arrayList4;
                }
                TextExtractionUtils.storeFileInFTP(uuid, replaceAll2, parseInt, replaceAll6, replaceAll7, str4);
                TextExtractionUtils.deleteFileFromFTP("metaInfo_" + uuid2 + ".txt", replaceAll2, parseInt, replaceAll6, replaceAll7, str4 + "/dtsInputFiles");
                System.out.println("TextExtractionBenchmarking: Transformation from DTS completed. Number of failures: " + arrayList5.size());
            } catch (GCUBEScope.MalformedScopeExpressionException e15) {
                System.err.println(e15.getCause());
            } catch (Exception e16) {
                e16.printStackTrace();
            }
            try {
                System.out.println("TextExtractionBenchmarking: OCR");
                System.out.println("Number of failed documents: " + arrayList5.size() + " - Passing them to OCR Service");
                if (arrayList5.size() > 0) {
                    String uuid5 = UUID.randomUUID().toString();
                    ArrayList<String> performOCRtoPDF_HTTPInput = TextExtractionUtils.performOCRtoPDF_HTTPInput(arrayList5, replaceAll9, replaceAll4, replaceAll2, replaceAll6, replaceAll7, Integer.toString(parseInt), uuid5, str2);
                    System.out.println("TextExtractionBenchmarking: Finished OCR from Job: ");
                    if (performOCRtoPDF_HTTPInput != null) {
                        System.out.println("TextExtractionBenchmarking: " + performOCRtoPDF_HTTPInput.size() + " documents transformed.");
                        TextExtractionUtils.storeFileInFTP(uuid5, replaceAll2, parseInt, replaceAll6, replaceAll7, str4 + "/failedTransformations");
                    }
                }
            } catch (ServiceEPRRetrievalException e17) {
                System.err.println(e17.getCause());
                System.exit(1);
            }
        }
        System.exit(0);
    }

    private static String createFileRecord(String str, String str2) {
        return "\"" + str + "\" \"" + str2 + "\"";
    }

    private static String createOAIDCRecord(String str, String str2) throws OCRException {
        TransformerFactory newInstance = TransformerFactory.newInstance();
        StreamSource streamSource = new StreamSource(new File("FieldsToOAI_DC"));
        StringBuffer stringBuffer = new StringBuffer();
        try {
            Transformer newTransformer = newInstance.newTransformer(streamSource);
            newTransformer.setParameter("identifier", str);
            newTransformer.setParameter("reference", str2);
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            newTransformer.transform(new StreamSource(), new StreamResult(byteArrayOutputStream));
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()), "UTF8"));
            while (true) {
                int read = bufferedReader.read();
                if (read <= -1) {
                    bufferedReader.close();
                    return stringBuffer.toString();
                }
                stringBuffer.append((char) read);
            }
        } catch (UnsupportedEncodingException e) {
            System.err.println(e.getCause());
            return null;
        } catch (IOException e2) {
            System.err.println(e2.getCause());
            return null;
        } catch (TransformerConfigurationException e3) {
            System.err.println(e3.getCause());
            return null;
        } catch (TransformerException e4) {
            System.err.println(e4.getCause());
            return null;
        }
    }

    public static JobResultSetConsumer genericSearch(String str, String str2) throws URIRetrievalFromISCacheException, URISyntaxException, gRS2CreationException {
        EndpointReference[] findSearchMasterEPR = findSearchMasterEPR(str2);
        System.out.println("The scope is for search master is: " + str2);
        if (findSearchMasterEPR == null || findSearchMasterEPR.length == 0) {
            System.out.println("No Search Masters Found");
        } else {
            System.out.println("Number of Search Master EPRs: " + findSearchMasterEPR.length);
        }
        String str3 = null;
        for (int i = 0; i < findSearchMasterEPR.length; i++) {
            try {
                str3 = submitSearch(str, findSearchMasterEPR[SMid.getAndIncrement() % findSearchMasterEPR.length].getAddress().toString(), str2);
            } catch (SearchSystemPortRetrievalException e) {
                e.printStackTrace();
                System.err.println(e.getCause());
            } catch (QuerySubmissionSearchException e2) {
                e2.printStackTrace();
                System.err.println(e2.getCause());
            } catch (URI.MalformedURIException e3) {
                e3.printStackTrace();
                System.err.println(e3.getCause());
            }
            if (str3 != null) {
                System.out.println("The rs EPR returned is: " + str3);
                return new JobResultSetConsumer(str3);
            }
            System.out.println("No RS epr found - it is null.");
        }
        return null;
    }

    protected static EndpointReference[] findSearchMasterEPR(String str) throws URIRetrievalFromISCacheException {
        System.out.println("Looking for a Search Master epr - scope is: " + str);
        try {
            return RIsManager.getInstance().getISCache(GCUBEScope.getScope(str)).getEPRsFor("Search", "SearchSystemService", SrvType.SIMPLE.name());
        } catch (Exception e) {
            System.out.println(e.getMessage());
            throw new URIRetrievalFromISCacheException(e);
        }
    }

    protected static String submitSearch(String str, String str2, String str3) throws URI.MalformedURIException, SearchSystemPortRetrievalException, QuerySubmissionSearchException {
        String trim = str.trim();
        System.out.println("Query to Submit: " + trim);
        System.out.println("Query will be submitted to: " + str2);
        SearchResponse searchResponse = null;
        try {
            EndpointReferenceType endpointReferenceType = new EndpointReferenceType();
            endpointReferenceType.setAddress(new Address(str2));
            searchResponse = GCUBERemotePortTypeContext.getProxy(new SearchMasterServiceAddressingLocator().getSearchSystemServicePortTypePort(endpointReferenceType), GCUBEScope.getScope(str3), new GCUBESecurityManager[0]).search(trim);
        } catch (Exception e) {
            e.printStackTrace();
        } catch (GCUBEScope.MalformedScopeExpressionException e2) {
            e2.printStackTrace();
        } catch (RemoteException e3) {
            System.out.println("Error while submitting search query");
            e3.printStackTrace();
            System.err.println(e3.getCause());
            throw new QuerySubmissionSearchException(e3);
        } catch (GCUBERetryEquivalentFault e4) {
            System.out.println("Error while submitting search query");
            e4.printStackTrace();
            System.err.println(e4.getCause());
            throw new QuerySubmissionSearchException(e4);
        }
        System.out.println("The rs epr is: " + searchResponse.getResultSetEpr());
        return searchResponse.getResultSetEpr();
    }
}
