package org.gcube.dataanalysis.executor.nodes.transducers.bionym;

import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.UUID;
import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger;
import org.gcube.dataanalysis.ecoengine.configuration.ALG_PROPS;
import org.gcube.dataanalysis.ecoengine.configuration.AlgorithmConfiguration;
import org.gcube.dataanalysis.ecoengine.datatypes.ColumnType;
import org.gcube.dataanalysis.ecoengine.datatypes.DatabaseType;
import org.gcube.dataanalysis.ecoengine.datatypes.InputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.OutputTable;
import org.gcube.dataanalysis.ecoengine.datatypes.PrimitiveType;
import org.gcube.dataanalysis.ecoengine.datatypes.ServiceType;
import org.gcube.dataanalysis.ecoengine.datatypes.StatisticalType;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.PrimitiveTypes;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.ServiceParameters;
import org.gcube.dataanalysis.ecoengine.datatypes.enumtypes.TableTemplates;
import org.gcube.dataanalysis.ecoengine.interfaces.ActorNode;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseFactory;
import org.gcube.dataanalysis.ecoengine.utils.DatabaseUtils;
import org.gcube.dataanalysis.ecoengine.utils.Transformations;
import org.gcube.dataanalysis.executor.generators.D4ScienceDistributedProcessing;
import org.gcube.dataanalysis.executor.job.management.QueueJobManager;
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.abstracts.MatcherOutput;
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.abstracts.SingleEntry;
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.implementations.matchers.FuzzyMatcher;
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.implementations.matchers.GsayMatcher;
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.implementations.matchers.LevensteinMatcher;
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.implementations.matchers.MixedLexicalMatcher;
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.implementations.matchers.SoundexMatcher;
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.implementations.matchers.TrigramMatcher;
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.implementations.workflows.BiOnymWF;
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.interfaces.Matcher;
import org.gcube.dataanalysis.executor.nodes.transducers.bionym.utils.YasmeenGlobalParameters;
import org.gcube.dataanalysis.executor.scripts.OSCommand;
import org.hibernate.SessionFactory;

/* loaded from: input_file:org/gcube/dataanalysis/executor/nodes/transducers/bionym/BionymFlexibleWorkflowTransducer.class */
public class BionymFlexibleWorkflowTransducer extends ActorNode {
    protected AlgorithmConfiguration currentconfig;
    protected SessionFactory dbconnection;
    float status;
    public int prevbroadcastTimePeriod;
    public int prevmaxNumberOfStages;
    public int prevmaxMessages;
    String destinationTable;
    String destinationTableLabel;
    public static String headers = "SOURCE_DATA,TARGET_DATA_SCIENTIFIC_NAME,TARGET_DATA_AUTHORITY,MATCHING_SCORE,TARGET_DATA_SOURCE,TARGET_DATA_ID";
    private static String createOutputTable = "CREATE TABLE %1$s (SOURCE_DATA character varying(255),  TARGET_DATA_SCIENTIFIC_NAME character varying(255), TARGET_DATA_AUTHORITY character varying(255), MATCHING_SCORE real,TARGET_DATA_SOURCE character varying, TARGET_DATA_ID character varying)";
    public static String destinationTableParam = "OutputTable";
    public static String destinationTableLableParam = "OutputTableLabel";
    public static String originTableParam = "RawTaxaNamesTable";
    public static String rawnamesColumnParam = "RawNamesColumn";
    public static String matcherParamPrefix = "Matcher";
    public static String thresholdParamPrefix = "Threshold";
    public static String maxresultsParamPrefix = "MaxResults";
    public static int maxMatchersInterface = 5;
    public static int maxMatchersReal = 20;
    boolean haspostprocessed = false;
    public int rawnamescount = 0;

    public static Matcher getEnum2Matcher(YasmeenGlobalParameters.BuiltinMatchers builtinMatchers, String str, double d, int i, HashMap<String, String> hashMap) {
        switch (builtinMatchers) {
            case GSAy:
                return new GsayMatcher(str, d, i, hashMap);
            case FUZZYMATCH:
                return new FuzzyMatcher(str, d, i, hashMap);
            case LEVENSHTEIN:
                return new LevensteinMatcher(str, d, i, hashMap);
            case TRIGRAM:
                return new TrigramMatcher(str, d, i, hashMap);
            case SOUNDEX:
                return new SoundexMatcher(str, d, i, hashMap);
            case LEV_SDX_TRIG:
                return new MixedLexicalMatcher(str, d, i, hashMap);
            case NONE:
                return null;
            default:
                return null;
        }
    }

    public List<Matcher> buildMatcherList(AlgorithmConfiguration algorithmConfiguration, String str, HashMap<String, String> hashMap) {
        ArrayList arrayList = new ArrayList();
        for (int i = 1; i <= maxMatchersReal; i++) {
            String param = algorithmConfiguration.getParam(matcherParamPrefix + "_" + i);
            String param2 = algorithmConfiguration.getParam(thresholdParamPrefix + "_" + i);
            String param3 = algorithmConfiguration.getParam(maxresultsParamPrefix + "_" + i);
            if (param != null) {
                Matcher enum2Matcher = getEnum2Matcher(YasmeenGlobalParameters.BuiltinMatchers.valueOf(param), str, param2 != null ? Double.parseDouble(param2) : 0.2d, param3 != null ? Integer.parseInt(param3) : 10, hashMap);
                if (enum2Matcher != null) {
                    arrayList.add(enum2Matcher);
                }
            }
        }
        return arrayList;
    }

    public ALG_PROPS[] getProperties() {
        return new ALG_PROPS[]{ALG_PROPS.PHENOMENON_VS_PARALLEL_PHENOMENON};
    }

    public String getName() {
        return "BIONYM";
    }

    public String getDescription() {
        return "An algorithm implementing BiOnym, a flexible workflow approach to taxon name matching. The workflow allows to activate several taxa names matching algorithms and to get the list of possible transcriptions for a list of input raw species names with possible authorship indication.";
    }

    public List<StatisticalType> getInputParameters() {
        ArrayList arrayList = new ArrayList();
        arrayList.add(TableTemplates.GENERIC);
        InputTable inputTable = new InputTable(arrayList, originTableParam, "Input table containing raw taxa names that you want to match", "byonym");
        ColumnType columnType = new ColumnType(originTableParam, rawnamesColumnParam, "The column containing the raw taxa names with or without authoship information", "rawnames", false);
        ServiceType serviceType = new ServiceType(ServiceParameters.RANDOMSTRING, destinationTableParam, "name of the table that will contain the matches", "bion_");
        PrimitiveType primitiveType = new PrimitiveType(String.class.getName(), (Object) null, PrimitiveTypes.STRING, destinationTableLableParam, "Name of the table which will contain the matches", "bionout");
        PrimitiveType primitiveType2 = new PrimitiveType(Enum.class.getName(), YasmeenGlobalParameters.BuiltinDataSources.values(), PrimitiveTypes.ENUMERATED, YasmeenGlobalParameters.taxaAuthorityFileParam, "The reference dataset to use", "" + YasmeenGlobalParameters.BuiltinDataSources.FISHBASE);
        PrimitiveType primitiveType3 = new PrimitiveType(Enum.class.getName(), YasmeenGlobalParameters.BuiltinParsers.values(), PrimitiveTypes.ENUMERATED, YasmeenGlobalParameters.parserNameParam, "The Species - Authority parser", "" + YasmeenGlobalParameters.BuiltinParsers.SIMPLE);
        PrimitiveType primitiveType4 = new PrimitiveType(Boolean.class.getName(), (Object) null, PrimitiveTypes.BOOLEAN, YasmeenGlobalParameters.activatePreParsingProcessing, "Use preparsing rules to correct common errors", "true");
        PrimitiveType primitiveType5 = new PrimitiveType(Boolean.class.getName(), (Object) null, PrimitiveTypes.BOOLEAN, YasmeenGlobalParameters.useStemmedGenusAndSpecies, "Process using Genus and Species names without declension", "false");
        PrimitiveType primitiveType6 = new PrimitiveType(Enum.class.getName(), YasmeenGlobalParameters.Performance.values(), PrimitiveTypes.ENUMERATED, YasmeenGlobalParameters.performanceParam, "A trade-off between recognition speed and accuracy. Max speed corresponds to search for strings with the same length only.", "" + YasmeenGlobalParameters.Performance.MAX_ACCURACY);
        ArrayList arrayList2 = new ArrayList();
        arrayList2.add(inputTable);
        arrayList2.add(serviceType);
        arrayList2.add(columnType);
        arrayList2.add(primitiveType);
        arrayList2.add(primitiveType2);
        arrayList2.add(primitiveType3);
        arrayList2.add(primitiveType4);
        arrayList2.add(primitiveType5);
        arrayList2.add(primitiveType6);
        PrimitiveType primitiveType7 = new PrimitiveType(Enum.class.getName(), YasmeenGlobalParameters.BuiltinMatchers.values(), PrimitiveTypes.ENUMERATED, matcherParamPrefix + "_1", "Choose a Matcher", YasmeenGlobalParameters.BuiltinMatchers.GSAy.name());
        PrimitiveType primitiveType8 = new PrimitiveType(Double.class.getName(), (Object) null, PrimitiveTypes.NUMBER, thresholdParamPrefix + "_1", "Threshold", "0.6", true);
        PrimitiveType primitiveType9 = new PrimitiveType(Integer.class.getName(), (Object) null, PrimitiveTypes.NUMBER, maxresultsParamPrefix + "_1", "The maximum number of matching candidates per each raw input species", "10");
        PrimitiveType primitiveType10 = new PrimitiveType(Enum.class.getName(), YasmeenGlobalParameters.BuiltinMatchers.values(), PrimitiveTypes.ENUMERATED, matcherParamPrefix + "_2", "Choose a Matcher", YasmeenGlobalParameters.BuiltinMatchers.FUZZYMATCH.name());
        PrimitiveType primitiveType11 = new PrimitiveType(Double.class.getName(), (Object) null, PrimitiveTypes.NUMBER, thresholdParamPrefix + "_2", "Threshold", "0.6", true);
        PrimitiveType primitiveType12 = new PrimitiveType(Integer.class.getName(), (Object) null, PrimitiveTypes.NUMBER, maxresultsParamPrefix + "_2", "The maximum number of matching candidates per each raw input species", "10");
        PrimitiveType primitiveType13 = new PrimitiveType(Enum.class.getName(), YasmeenGlobalParameters.BuiltinMatchers.values(), PrimitiveTypes.ENUMERATED, matcherParamPrefix + "_3", "Choose a Matcher", YasmeenGlobalParameters.BuiltinMatchers.LEVENSHTEIN.name());
        PrimitiveType primitiveType14 = new PrimitiveType(Double.class.getName(), (Object) null, PrimitiveTypes.NUMBER, thresholdParamPrefix + "_3", "Threshold", "0.4", true);
        PrimitiveType primitiveType15 = new PrimitiveType(Integer.class.getName(), (Object) null, PrimitiveTypes.NUMBER, maxresultsParamPrefix + "_3", "The maximum number of matching candidates per each raw input species", "5");
        PrimitiveType primitiveType16 = new PrimitiveType(Enum.class.getName(), YasmeenGlobalParameters.BuiltinMatchers.values(), PrimitiveTypes.ENUMERATED, matcherParamPrefix + "_4", "Choose a Matcher", YasmeenGlobalParameters.BuiltinMatchers.TRIGRAM.name());
        PrimitiveType primitiveType17 = new PrimitiveType(Double.class.getName(), (Object) null, PrimitiveTypes.NUMBER, thresholdParamPrefix + "_4", "Threshold", "0.4");
        PrimitiveType primitiveType18 = new PrimitiveType(Integer.class.getName(), (Object) null, PrimitiveTypes.NUMBER, maxresultsParamPrefix + "_4", "The maximum number of matching candidates per each raw input species", "5");
        arrayList2.add(primitiveType7);
        arrayList2.add(primitiveType8);
        arrayList2.add(primitiveType9);
        arrayList2.add(primitiveType10);
        arrayList2.add(primitiveType11);
        arrayList2.add(primitiveType12);
        arrayList2.add(primitiveType13);
        arrayList2.add(primitiveType14);
        arrayList2.add(primitiveType15);
        arrayList2.add(primitiveType16);
        arrayList2.add(primitiveType17);
        arrayList2.add(primitiveType18);
        for (int i = 5; i <= maxMatchersInterface; i++) {
            PrimitiveType primitiveType19 = new PrimitiveType(Enum.class.getName(), YasmeenGlobalParameters.BuiltinMatchers.values(), PrimitiveTypes.ENUMERATED, matcherParamPrefix + "_" + i, "Choose a Matcher (Optional)", YasmeenGlobalParameters.BuiltinMatchers.NONE.name(), true);
            PrimitiveType primitiveType20 = new PrimitiveType(Double.class.getName(), (Object) null, PrimitiveTypes.NUMBER, thresholdParamPrefix + "_" + i, "Threshold (def. 0.2)", "0.2", true);
            PrimitiveType primitiveType21 = new PrimitiveType(Integer.class.getName(), (Object) null, PrimitiveTypes.NUMBER, maxresultsParamPrefix + "_" + i, "The maximum number of matching candidates per each raw input species", "0");
            arrayList2.add(primitiveType19);
            arrayList2.add(primitiveType20);
            arrayList2.add(primitiveType21);
        }
        DatabaseType.addDefaultDBPars(arrayList2);
        return arrayList2;
    }

    public StatisticalType getOutput() {
        ArrayList arrayList = new ArrayList();
        arrayList.add(TableTemplates.GENERIC);
        return new OutputTable(arrayList, this.destinationTableLabel, this.destinationTable, "Output  table");
    }

    public void initSingleNode(AlgorithmConfiguration algorithmConfiguration) {
    }

    public float getInternalStatus() {
        return this.status;
    }

    public int executeNode(int i, int i2, int i3, int i4, boolean z, String str, String str2, String str3) {
        String replace = ("" + UUID.randomUUID()).replace("-", "");
        String str4 = "inputParser" + replace + ".txt";
        String str5 = "outputParser" + replace + ".txt";
        try {
            try {
                this.status = 0.0f;
                long currentTimeMillis = System.currentTimeMillis();
                System.out.println("Restoring configuration");
                AlgorithmConfiguration restoreConfig = Transformations.restoreConfig(new File(str, str2).getAbsolutePath());
                restoreConfig.setConfigPath(str);
                this.dbconnection = DatabaseUtils.initDBSession(restoreConfig);
                String param = restoreConfig.getParam(destinationTableParam);
                String param2 = restoreConfig.getParam(originTableParam);
                String param3 = restoreConfig.getParam(rawnamesColumnParam);
                String param4 = restoreConfig.getParam(YasmeenGlobalParameters.parserNameParam);
                String param5 = restoreConfig.getParam(YasmeenGlobalParameters.performanceParam);
                String param6 = restoreConfig.getParam(YasmeenGlobalParameters.taxaAuthorityFileParam);
                String param7 = restoreConfig.getParam(YasmeenGlobalParameters.activatePreParsingProcessing);
                String param8 = restoreConfig.getParam(YasmeenGlobalParameters.useStemmedGenusAndSpecies);
                String param9 = restoreConfig.getParam(YasmeenGlobalParameters.overallMaxResults);
                System.out.println("Destination Table: " + param);
                System.out.println("Origin Table: " + param2);
                System.out.println("Column of names: " + param3);
                System.out.println("Parser to use: " + param4);
                System.out.println("Accuracy vs Speed: " + param5);
                System.out.println("Reference Dataset: " + param6);
                System.out.println("Do Preprocessing: " + param7);
                System.out.println("Use Stemming:" + param8);
                System.out.println("Overall MaxResults:" + param9);
                HashMap<String, String> hashMap = new HashMap<>();
                hashMap.put(YasmeenGlobalParameters.parserInputFileParam, str4);
                hashMap.put(YasmeenGlobalParameters.parserOutputFileParam, str5);
                hashMap.put(YasmeenGlobalParameters.activatePreParsingProcessing, param7);
                hashMap.put(YasmeenGlobalParameters.parserNameParam, param4);
                hashMap.put(YasmeenGlobalParameters.performanceParam, param5);
                hashMap.put(YasmeenGlobalParameters.taxaAuthorityFileParam, param6);
                hashMap.put(YasmeenGlobalParameters.useStemmedGenusAndSpecies, param8);
                System.out.println("Configuration Restored! - Time: " + (System.currentTimeMillis() - currentTimeMillis));
                long currentTimeMillis2 = System.currentTimeMillis();
                System.out.println("Retrieving names to process");
                List executeSQLQuery = DatabaseFactory.executeSQLQuery(DatabaseUtils.getDinstictElements(param2, param3, "") + " offset " + i3 + " limit " + i4, this.dbconnection);
                System.out.println("Retrieved a total of " + executeSQLQuery.size() + " species");
                int i5 = 0 + i4;
                System.out.println("Processing from 0 to " + i5);
                ArrayList arrayList = new ArrayList();
                for (int i6 = 0; i6 < i5; i6++) {
                    arrayList.add(("" + executeSQLQuery.get(i6)).replaceAll("^'", "").replaceAll("'$", ""));
                }
                int size = arrayList.size();
                System.out.println("Retrieve from DB - Time: " + (System.currentTimeMillis() - currentTimeMillis2));
                long currentTimeMillis3 = System.currentTimeMillis();
                System.out.println("Processing " + size + " species..");
                try {
                    OSCommand.ExecuteGetLine("chmod +x *", null);
                } catch (Exception e) {
                    System.out.println("WARNING: could not change the permissions");
                }
                BiOnymWF biOnymWF = new BiOnymWF(str, param9 != null ? Integer.parseInt(param9) : 10, hashMap);
                List<Matcher> buildMatcherList = buildMatcherList(restoreConfig, str, hashMap);
                if (buildMatcherList != null) {
                    biOnymWF.resetMatchers(buildMatcherList);
                }
                System.out.println("WF Initialization - Time: " + (System.currentTimeMillis() - currentTimeMillis3));
                MatcherOutput executeChainedWorkflow = biOnymWF.executeChainedWorkflow(arrayList);
                System.out.println("Workflow Executed");
                long currentTimeMillis4 = System.currentTimeMillis();
                int entriesNumber = executeChainedWorkflow.getEntriesNumber();
                ArrayList arrayList2 = new ArrayList();
                for (int i7 = 0; i7 < entriesNumber; i7++) {
                    SingleEntry entry = executeChainedWorkflow.getEntry(i7);
                    arrayList2.add(new String[]{entry.originalName, entry.targetScientificName, entry.targetAuthor, "" + entry.matchingScore, param6, entry.targetID});
                }
                DatabaseUtils.insertChunksIntoTable(param, headers, arrayList2, 5000, this.dbconnection);
                System.out.println("Write on DB - Time: " + (System.currentTimeMillis() - currentTimeMillis4));
                System.out.println("The procedure finished successfully. Processed " + size + " species.");
                System.out.println("Elapsed Time " + (System.currentTimeMillis() - currentTimeMillis) + " ms");
                if (this.dbconnection != null) {
                    try {
                        this.dbconnection.close();
                    } catch (Exception e2) {
                    }
                }
                try {
                    new File(str, str4).delete();
                    new File(str, str5).delete();
                } catch (Exception e3) {
                }
                this.status = 1.0f;
                return 0;
            } catch (Exception e4) {
                e4.printStackTrace();
                System.out.println("warning: error in node execution " + e4.getLocalizedMessage());
                System.err.println("Error in node execution " + e4.getLocalizedMessage());
                if (this.dbconnection != null) {
                    try {
                        this.dbconnection.close();
                    } catch (Exception e5) {
                    }
                }
                try {
                    new File(str, str4).delete();
                    new File(str, str5).delete();
                } catch (Exception e6) {
                }
                this.status = 1.0f;
                return -1;
            }
        } catch (Throwable th) {
            if (this.dbconnection != null) {
                try {
                    this.dbconnection.close();
                } catch (Exception e7) {
                }
            }
            try {
                new File(str, str4).delete();
                new File(str, str5).delete();
            } catch (Exception e8) {
            }
            this.status = 1.0f;
            throw th;
        }
    }

    public void setup(AlgorithmConfiguration algorithmConfiguration) throws Exception {
        this.haspostprocessed = false;
        AnalysisLogger.getLogger().info("Initializing DB Connection");
        this.dbconnection = DatabaseUtils.initDBSession(algorithmConfiguration);
        this.destinationTable = algorithmConfiguration.getParam(destinationTableParam);
        this.destinationTableLabel = algorithmConfiguration.getParam(destinationTableLableParam);
        this.rawnamescount = DatabaseFactory.executeSQLQuery(DatabaseUtils.getDinstictElements(algorithmConfiguration.getParam(originTableParam), algorithmConfiguration.getParam(rawnamesColumnParam), ""), this.dbconnection).size();
        AnalysisLogger.getLogger().info("Creating Destination Table " + this.destinationTable);
        try {
            DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(this.destinationTable), this.dbconnection);
        } catch (Exception e) {
            AnalysisLogger.getLogger().info("Table " + this.destinationTable + " did not exist");
        }
        DatabaseFactory.executeSQLUpdate(String.format(createOutputTable, this.destinationTable), this.dbconnection);
        this.prevmaxMessages = D4ScienceDistributedProcessing.maxMessagesAllowedPerJob;
        D4ScienceDistributedProcessing.maxMessagesAllowedPerJob = 50;
        this.prevbroadcastTimePeriod = QueueJobManager.broadcastTimePeriod;
        QueueJobManager.broadcastTimePeriod = 1800000;
        this.prevmaxNumberOfStages = QueueJobManager.maxNumberOfStages;
        QueueJobManager.maxNumberOfStages = 10000;
        AnalysisLogger.getLogger().info("Destination Table Created! Addressing " + this.rawnamescount + " names");
    }

    public int getNumberOfRightElements() {
        return this.rawnamescount;
    }

    public int getNumberOfLeftElements() {
        return 1;
    }

    public void stop() {
        if (this.haspostprocessed) {
            AnalysisLogger.getLogger().info("The procedure has correctly postprocessed: shutting down the connection!");
        } else {
            try {
                AnalysisLogger.getLogger().info("The procedure did NOT correctly postprocessed ....Removing Table " + this.destinationTable + " because of computation stop!");
                DatabaseFactory.executeSQLUpdate(DatabaseUtils.dropTableStatement(this.destinationTable), this.dbconnection);
            } catch (Exception e) {
                AnalysisLogger.getLogger().info("Table " + this.destinationTable + " did not exist");
            }
        }
        if (this.dbconnection != null) {
            try {
                this.dbconnection.close();
            } catch (Exception e2) {
            }
        }
    }

    public void postProcess(boolean z, boolean z2) {
        QueueJobManager.broadcastTimePeriod = this.prevbroadcastTimePeriod;
        QueueJobManager.maxNumberOfStages = this.prevmaxNumberOfStages;
        D4ScienceDistributedProcessing.maxMessagesAllowedPerJob = this.prevmaxMessages;
        this.haspostprocessed = true;
    }
}
