package org.fao.fi.comet.domain.species.tools.parsers.cli;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.ServiceLoader;
import java.util.TreeSet;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.fao.fi.comet.domain.species.InputSpeciesFactory;
import org.fao.fi.comet.domain.species.model.InputSpeciesData;
import org.fao.fi.comet.domain.species.model.ReferenceSpeciesData;
import org.fao.fi.comet.domain.species.tools.io.FileConstants;
import org.fao.fi.comet.domain.species.tools.io.readers.ParsedInputDataFileReader;
import org.fao.fi.comet.domain.species.tools.io.readers.UnstructuredInputDataFileReader;
import org.fao.fi.comet.domain.species.tools.io.support.impl.DefaultTAFReferenceDataConverter;
import org.fao.fi.comet.domain.species.tools.parsers.SpeciesNameParser;
import org.fao.fi.comet.domain.species.tools.parsers.StructuredSpeciesNameParser;
import org.fao.fi.comet.domain.species.tools.preprocess.RulesetManager;
import org.fao.fi.comet.domain.species.tools.preprocess.model.RuleTargets;
import org.fao.fi.comet.domain.species.tools.preprocess.post.PostparsingRules;
import org.fao.fi.comet.domain.species.tools.preprocess.pre.PreparsingRules;
import org.fao.vrmf.core.extensions.collections.impl.ListSet;
import org.fao.vrmf.core.helpers.singletons.lang.AssertionUtils;
import org.fao.vrmf.core.helpers.singletons.lang.objects.CollectionsUtils;
import org.fao.vrmf.core.helpers.singletons.text.StringUtils;
import org.fao.vrmf.core.helpers.singletons.text.xml.XMLBuilderUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/fao/fi/comet/domain/species/tools/parsers/cli/InputDataParser.class */
public class InputDataParser {
    private static final String HELP = "h";
    private static final String IN_FILE = "inFile";
    private static final String OUT_FILE = "outFile";
    private static final String PREPARSING_RULESET = "preParsingRuleset";
    private static final String PREPARSING_RULESET_FILE = "preParsingRulesetFile";
    private static final String POSTPARSING_RULESET = "postParsingRuleset";
    private static final String POSTPARSING_RULESET_FILE = "postParsingRulesetFile";
    private static final String PARSER = "parser";
    private static final String NO_HEADER = "noHeader";
    private static final String PROVIDER_ID = "providerId";
    private static Logger LOG = LoggerFactory.getLogger(InputDataParser.class);
    private static String VERSION = "1.2.0";
    private static final UnstructuredInputDataFileReader READER = new UnstructuredInputDataFileReader();

    private Iterable<SpeciesNameParser> getAvailableParsers() {
        return ServiceLoader.load(SpeciesNameParser.class);
    }

    private Options buildOptions() throws IOException {
        Collection<String> listPreprocessingRulesets = listPreprocessingRulesets();
        Collection<String> listPostprocessingRulesets = listPostprocessingRulesets();
        if (listPreprocessingRulesets == null) {
            listPreprocessingRulesets = new ArrayList();
        }
        if (listPostprocessingRulesets == null) {
            listPostprocessingRulesets = new ArrayList();
        }
        TreeSet treeSet = new TreeSet();
        for (SpeciesNameParser speciesNameParser : getAvailableParsers()) {
            treeSet.add(String.valueOf(speciesNameParser.getId()) + " (" + speciesNameParser.getName() + ")");
        }
        Options options = new Options();
        options.addOption(new Option(HELP, false, "Print this message"));
        options.addOption(new Option(IN_FILE, true, "Specify a path to the file containing unstructured input data (one per line)"));
        options.addOption(new Option(OUT_FILE, true, "Specify a path to the file that will contain the structured parsed results"));
        options.addOption(new Option(NO_HEADER, false, "Omit the CSV header in the produced parsed results file"));
        options.addOption(new Option(PREPARSING_RULESET, true, "Specify an embedded pre-parsing ruleset among { " + CollectionsUtils.join(listPreprocessingRulesets, ", ") + " }"));
        options.addOption(new Option(PREPARSING_RULESET_FILE, true, "Specify a file containing a pre-parsing ruleset"));
        options.addOption(new Option(POSTPARSING_RULESET, true, "Specify an embedded post-parsing ruleset among { " + CollectionsUtils.join(listPostprocessingRulesets, ", ") + " }"));
        options.addOption(new Option(POSTPARSING_RULESET_FILE, true, "Specify a file containing a post-parsing ruleset"));
        options.addOption(new Option(PARSER, true, "Specify one of the available input parsers among { " + CollectionsUtils.join(treeSet, ", ") + " }"));
        options.addOption(new Option(PROVIDER_ID, true, "Specify the identifier for the data provider originating these input data. Defaults to 'UserProvidedData' when not set"));
        return options;
    }

    private void initializeAndLaunch(CommandLine commandLine) throws Throwable {
        LOG.info("### YASMEEN - {} v{} : scientific name and authority parsing tool", InputDataParser.class.getSimpleName(), VERSION);
        String optionValue = commandLine.getOptionValue(PARSER);
        AssertionUtils.$nNull(optionValue, "Please specify a parser with the -{} option", PARSER);
        SpeciesNameParser speciesNameParser = null;
        Iterator<SpeciesNameParser> it = getAvailableParsers().iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            SpeciesNameParser next = it.next();
            if (next.getId().equals(optionValue)) {
                speciesNameParser = next;
                break;
            }
        }
        AssertionUtils.$nNull(speciesNameParser, "Please specify a valid parser with the -{} option", PARSER);
        String optionValue2 = commandLine.getOptionValue(IN_FILE);
        AssertionUtils.$nNull(optionValue2, "Please specify an input file with the -{} option", IN_FILE);
        File file = new File(optionValue2);
        AssertionUtils.$_assert(file.exists(), IllegalArgumentException.class, "Unable to find input file {}", optionValue2);
        AssertionUtils.$_assert(file.isFile() && file.canRead(), IllegalArgumentException.class, "Specified input file is not a proper file or cannot be read", new Object[0]);
        String optionValue3 = commandLine.getOptionValue(OUT_FILE);
        if (optionValue3 == null) {
            optionValue3 = String.valueOf(file.getAbsolutePath()) + FileConstants.DEFAULT_PARSED_FILE_SUFFIX;
            LOG.warn("No output file specified: using {}", optionValue3);
        }
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        String[] optionValues = commandLine.getOptionValues(PREPARSING_RULESET);
        String[] optionValues2 = commandLine.getOptionValues(POSTPARSING_RULESET);
        String[] optionValues3 = commandLine.getOptionValues(PREPARSING_RULESET_FILE);
        String[] optionValues4 = commandLine.getOptionValues(POSTPARSING_RULESET_FILE);
        ListSet listSet = new ListSet(listPreprocessingRulesets());
        ListSet listSet2 = new ListSet(listPostprocessingRulesets());
        int i = 0;
        if (optionValues != null) {
            PreparsingRules preparsingRules = null;
            for (String str : optionValues) {
                AssertionUtils.$_assert(listSet.contains(str), IllegalArgumentException.class, "Invalid pre-parsing ruleset '{}'", str);
                LOG.info(" * Attempting to load pre-parsing ruleset '{}'...", str);
                try {
                    preparsingRules = RulesetManager.readEmbeddedPreparsingRules(str);
                    if (preparsingRules != null) {
                        arrayList.add(preparsingRules);
                    }
                } catch (Throwable th) {
                    LOG.warn(" ! Pre-parsing ruleset '{}' is not well-formed [ {} ]", str, th.getMessage());
                }
                if (preparsingRules == null || preparsingRules.getRuleset() == null || preparsingRules.getRuleset().isEmpty()) {
                    LOG.warn(" ! Pre-parsing ruleset '{}' does not contain any rule", str);
                } else {
                    LOG.info(" * Pre-parsing ruleset '{}' contains {} rule(s)", str, Integer.valueOf(preparsingRules.getRuleset().size()));
                    i += preparsingRules.getRuleset().size();
                }
            }
            LOG.info("{} Pre-parsing rulesets correctly loaded, providing a total of {} pre-parsing rule(s)", Integer.valueOf(arrayList.size()), Integer.valueOf(i));
        }
        int i2 = 0;
        if (optionValues3 != null) {
            PreparsingRules preparsingRules2 = null;
            for (String str2 : optionValues3) {
                File file2 = new File(str2);
                AssertionUtils.$_assert(file2.exists(), IllegalArgumentException.class, "Pre-parsing ruleset file '{}' does not exist", str2);
                AssertionUtils.$_assert(file2.isFile(), IllegalArgumentException.class, "Pre-parsing ruleset file '{}' is not a proper file", str2);
                AssertionUtils.$_assert(file2.canRead(), IllegalArgumentException.class, "Pre-parsing ruleset file '{}' cannot be read", str2);
                LOG.info(" * Attempting to load pre-parsing ruleset from file '{}'...", str2);
                try {
                    preparsingRules2 = RulesetManager.readExternalPreparsingRules(file2);
                    if (preparsingRules2 != null) {
                        arrayList.add(preparsingRules2);
                    }
                } catch (Throwable th2) {
                    LOG.warn(" ! Pre-parsing ruleset file '{}' is not well-formed [ {} ]", str2, th2.getMessage());
                }
                if (preparsingRules2 == null || preparsingRules2.getRuleset() == null || preparsingRules2.getRuleset().isEmpty()) {
                    LOG.warn(" ! Pre-parsing ruleset file '{}' does not contain any rule", str2);
                } else {
                    LOG.info(" * Pre-parsing ruleset file '{}' contains {} rule(s)", str2, Integer.valueOf(preparsingRules2.getRuleset().size()));
                    i2 += preparsingRules2.getRuleset().size();
                }
            }
            LOG.info("{} Pre-parsing ruleset files correctly loaded, providing a total of {} pre-parsing rule(s)", Integer.valueOf(arrayList.size()), Integer.valueOf(i2));
        }
        int i3 = 0;
        if (optionValues2 != null) {
            PostparsingRules postparsingRules = null;
            for (String str3 : optionValues2) {
                AssertionUtils.$_assert(listSet2.contains(str3), IllegalArgumentException.class, "Invalid post-parsing ruleset '{}'", str3);
                LOG.info(" * Attempting to load post-parsing ruleset '{}'...", str3);
                try {
                    postparsingRules = RulesetManager.readEmbeddedPostparsingRules(str3);
                    if (postparsingRules != null) {
                        arrayList2.add(postparsingRules);
                    }
                } catch (Throwable th3) {
                    LOG.warn(" ! Post-parsing ruleset '{}' is not well-formed [ {} ]", str3, th3.getMessage());
                }
                if (postparsingRules == null || postparsingRules.getRuleset() == null || postparsingRules.getRuleset().isEmpty()) {
                    LOG.warn(" ! Post-parsing ruleset '{}' does not contain any rule", str3);
                } else {
                    LOG.info(" * Post-parsing ruleset '{}' contains {} rule(s)", str3, Integer.valueOf(postparsingRules.getRuleset().size()));
                    i3 += postparsingRules.getRuleset().size();
                }
            }
            LOG.info("{} Post-parsing rulesets correctly loaded, providing a total of {} post-parsing rule(s)", Integer.valueOf(arrayList2.size()), Integer.valueOf(i3));
        }
        int i4 = 0;
        if (optionValues4 != null) {
            PostparsingRules postparsingRules2 = null;
            for (String str4 : optionValues4) {
                File file3 = new File(str4);
                AssertionUtils.$_assert(file3.exists(), IllegalArgumentException.class, "Post-parsing ruleset file '{}' does not exist", str4);
                AssertionUtils.$_assert(file3.isFile(), IllegalArgumentException.class, "Post-parsing ruleset file '{}' is not a proper file", str4);
                AssertionUtils.$_assert(file3.canRead(), IllegalArgumentException.class, "Post-parsing ruleset file '{}' cannot be read", str4);
                LOG.info(" * Attempting to load post-parsing ruleset from file '{}'...", str4);
                try {
                    postparsingRules2 = RulesetManager.readExternalPostparsingRules(file3);
                    if (postparsingRules2 != null) {
                        arrayList2.add(postparsingRules2);
                    }
                } catch (Throwable th4) {
                    LOG.warn(" ! Post-parsing ruleset file '{}' is not well-formed [ {} ]", str4, th4.getMessage());
                }
                if (postparsingRules2 == null || postparsingRules2.getRuleset() == null || postparsingRules2.getRuleset().isEmpty()) {
                    LOG.warn(" ! Post-parsing ruleset file '{}' does not contain any rule", str4);
                } else {
                    LOG.info(" * Post-parsing ruleset file '{}' contains {} rule(s)", str4, Integer.valueOf(postparsingRules2.getRuleset().size()));
                    i4 += postparsingRules2.getRuleset().size();
                }
            }
            LOG.info("{} Post-parsing ruleset files correctly loaded, providing a total of {} post-parsing rule(s)", Integer.valueOf(arrayList.size()), Integer.valueOf(i4));
        }
        writeParsedData(new File(optionValue3), commandLine.hasOption(PROVIDER_ID) ? commandLine.getOptionValue(PROVIDER_ID) : InputSpeciesFactory.DEFAULT_DATA_SOURCE_ID, commandLine.hasOption(NO_HEADER), parseData(readData(file), speciesNameParser, arrayList), arrayList2);
    }

    private String[] readData(File file) throws Throwable {
        return READER.read(file);
    }

    private InputSpeciesData[] parseData(String[] strArr, SpeciesNameParser speciesNameParser, Collection<PreparsingRules> collection) {
        String str;
        ArrayList arrayList = new ArrayList();
        int i = 1;
        String id = speciesNameParser.getId();
        boolean z = speciesNameParser instanceof StructuredSpeciesNameParser;
        ArrayList arrayList2 = new ArrayList();
        long currentTimeMillis = System.currentTimeMillis();
        boolean z2 = (collection == null || collection.isEmpty()) ? false : true;
        if (z2 && z) {
            LOG.warn(" ! Using pre-parsing rules in combination with a structured parser like '{}' can lead to input data corruption!", id);
        }
        ArrayList arrayList3 = new ArrayList();
        for (String str2 : strArr) {
            String str3 = str2;
            if (z2) {
                Iterator<PreparsingRules> it = collection.iterator();
                while (it.hasNext()) {
                    str3 = it.next().apply(str3);
                }
                LOG.debug("Input '{}' has been preprocessed as '{}'", str2, str3);
            }
            arrayList2.add(new String[]{str2, str3});
            arrayList3.add(str3);
        }
        long currentTimeMillis2 = System.currentTimeMillis();
        if (z2) {
            LOG.info("Pre-parsing took {} mSec. on {} entries", Long.valueOf(currentTimeMillis2 - currentTimeMillis), Integer.valueOf(strArr.length));
        }
        LOG.info("< {} > : parsing {} input entries...", id, Integer.valueOf(strArr.length));
        long currentTimeMillis3 = System.currentTimeMillis();
        ReferenceSpeciesData[] parse = speciesNameParser.parse((String[]) arrayList3.toArray(new String[arrayList3.size()]));
        for (int i2 = 0; i2 < arrayList2.size(); i2++) {
            String[] strArr2 = (String[]) arrayList2.get(i2);
            ReferenceSpeciesData referenceSpeciesData = parse[i2];
            if (referenceSpeciesData != null) {
                int i3 = i;
                i++;
                arrayList.add(InputSpeciesFactory.newInstance(InputSpeciesFactory.DEFAULT_DATA_SOURCE_ID, String.valueOf(i3), strArr2[0], strArr2[1], referenceSpeciesData.getScientificName(), referenceSpeciesData.getAuthor(), referenceSpeciesData.getScientificName(), referenceSpeciesData.getAuthor(), speciesNameParser.getId()));
                str = "{ SCI_NAME: " + referenceSpeciesData.getScientificName() + ", AUTH: " + referenceSpeciesData.getAuthor() + "}";
                if (!XMLBuilderUtils.isStringValid(referenceSpeciesData.getScientificName())) {
                    LOG.warn("< {} > : Parsed entry for {} contains characters that are considererd invalid according to XML 1.0 definition. This entry will be skipped", id, strArr2[0]);
                }
            } else {
                str = "< NULL >";
                LOG.warn("< {} > : Parsed entry for '{}' resolves to a NULL result", id, strArr2[0]);
            }
            LOG.debug("Input data {} has been parsed to: {}", strArr2[0], str);
        }
        LOG.info("< {} > : overall parsing of {} entries took {} mSec.", id, Integer.valueOf(strArr.length), Long.valueOf(System.currentTimeMillis() - currentTimeMillis3));
        return (InputSpeciesData[]) arrayList.toArray(new InputSpeciesData[arrayList.size()]);
    }

    private void writeParsedData(File file, String str, boolean z, InputSpeciesData[] inputSpeciesDataArr, Collection<PostparsingRules> collection) throws Throwable {
        PrintWriter printWriter = new PrintWriter(file, "UTF-8");
        if (!z) {
            printWriter.println(ParsedInputDataFileReader.PARSED_INPUT_DATA_FILE_HEADER);
        }
        int i = 0;
        long j = 0;
        boolean z2 = (collection == null || collection.isEmpty()) ? false : true;
        for (InputSpeciesData inputSpeciesData : inputSpeciesDataArr) {
            printWriter.print("\"");
            printWriter.print(inputSpeciesData.getParser());
            printWriter.print("\"");
            printWriter.print(DefaultTAFReferenceDataConverter.SEMICOLON_SEPARATOR_REGEXP);
            printWriter.print("\"");
            printWriter.print(str.replaceAll("\\\"", "\"\""));
            printWriter.print("\"");
            printWriter.print(DefaultTAFReferenceDataConverter.SEMICOLON_SEPARATOR_REGEXP);
            printWriter.print("\"");
            printWriter.print(inputSpeciesData.getId().replaceAll("\\\"", "\"\""));
            printWriter.print("\"");
            printWriter.print(DefaultTAFReferenceDataConverter.SEMICOLON_SEPARATOR_REGEXP);
            printWriter.print("\"");
            printWriter.print(inputSpeciesData.getOriginal().replaceAll("\\\"", "\"\""));
            printWriter.print("\"");
            printWriter.print(DefaultTAFReferenceDataConverter.SEMICOLON_SEPARATOR_REGEXP);
            printWriter.print("\"");
            printWriter.print(inputSpeciesData.getPreparsedOriginal().replaceAll("\\\"", "\"\""));
            printWriter.print("\"");
            printWriter.print(DefaultTAFReferenceDataConverter.SEMICOLON_SEPARATOR_REGEXP);
            String rawTrim = StringUtils.rawTrim(inputSpeciesData.getParsedScientificName());
            if (rawTrim != null) {
                printWriter.print("\"");
                printWriter.print(rawTrim.replaceAll("\\\"", "\"\""));
                printWriter.print("\"");
            }
            printWriter.print(DefaultTAFReferenceDataConverter.SEMICOLON_SEPARATOR_REGEXP);
            String rawTrim2 = StringUtils.rawTrim(inputSpeciesData.getParsedAuthority());
            if (rawTrim2 != null) {
                printWriter.print("\"");
                printWriter.print(rawTrim2.replaceAll("\\\"", "\"\""));
                printWriter.print("\"");
            }
            printWriter.print(DefaultTAFReferenceDataConverter.SEMICOLON_SEPARATOR_REGEXP);
            String rawTrim3 = StringUtils.rawTrim(inputSpeciesData.getParsedScientificName());
            if (rawTrim3 != null && z2) {
                long currentTimeMillis = System.currentTimeMillis();
                Iterator<PostparsingRules> it = collection.iterator();
                while (it.hasNext()) {
                    rawTrim3 = it.next().apply(RuleTargets.PARSED_SCIENTIFIC_NAME, rawTrim3);
                }
                j += System.currentTimeMillis() - currentTimeMillis;
            }
            if (rawTrim3 != null) {
                printWriter.print("\"");
                printWriter.print(rawTrim3.replaceAll("\\\"", "\"\""));
                printWriter.print("\"");
            }
            printWriter.print(DefaultTAFReferenceDataConverter.SEMICOLON_SEPARATOR_REGEXP);
            String rawTrim4 = StringUtils.rawTrim(inputSpeciesData.getParsedAuthority());
            if (rawTrim4 != null && z2) {
                long currentTimeMillis2 = System.currentTimeMillis();
                Iterator<PostparsingRules> it2 = collection.iterator();
                while (it2.hasNext()) {
                    rawTrim4 = it2.next().apply(RuleTargets.PARSED_AUTHORITY, rawTrim4);
                }
                j += System.currentTimeMillis() - currentTimeMillis2;
            }
            if (rawTrim4 != null) {
                printWriter.print("\"");
                printWriter.print(rawTrim4.replaceAll("\\\"", "\"\""));
                printWriter.print("\"");
            }
            printWriter.println();
            i++;
        }
        try {
            printWriter.flush();
        } catch (Throwable th) {
        }
        try {
            printWriter.close();
        } catch (Throwable th2) {
        }
        if (z2) {
            LOG.info("Post-parsing took {} mSec. on {} entries", Long.valueOf(j), Integer.valueOf(i));
        }
        LOG.info("{} parsed entries have been written to {}", Integer.valueOf(i), file.getAbsolutePath());
    }

    private void route(CommandLine commandLine) throws Throwable {
        Options buildOptions = buildOptions();
        if (commandLine.hasOption(HELP)) {
            HelpFormatter helpFormatter = new HelpFormatter();
            helpFormatter.setWidth(128);
            helpFormatter.printHelp(" ", buildOptions);
            System.exit(0);
        }
        initializeAndLaunch(commandLine);
    }

    private CommandLine buildCommandLine(String[] strArr) throws IOException, ParseException {
        return new PosixParser().parse(buildOptions(), strArr);
    }

    private void execute(String[] strArr) throws Throwable {
        route(buildCommandLine(strArr));
    }

    private Collection<String> listRulesets(String str) throws IOException {
        return RulesetManager.DEFAULT_PREPARSING_RULESET_PACKAGE.equals(str) ? Arrays.asList("commonPreparsingRules", "otherPreparsingRules", "bionymPreparsingRules") : RulesetManager.DEFAULT_POSTPARSING_RULESET_PACKAGE.equals(str) ? Arrays.asList("bionymPostparsingRules") : new ArrayList();
    }

    private Collection<String> listPreprocessingRulesets() throws IOException {
        return listRulesets(RulesetManager.DEFAULT_PREPARSING_RULESET_PACKAGE);
    }

    private Collection<String> listPostprocessingRulesets() throws IOException {
        return listRulesets(RulesetManager.DEFAULT_POSTPARSING_RULESET_PACKAGE);
    }

    public static final void main(String[] strArr) throws Throwable {
        try {
            new InputDataParser().execute(strArr);
            System.exit(0);
        } catch (IllegalArgumentException e) {
            LOG.error("{}: [ configuration error ] : {}", InputDataParser.class.getSimpleName(), e.getMessage());
        } catch (ParseException e2) {
            LOG.error("{}: [ parser error ] : {}", InputDataParser.class.getSimpleName(), e2.getMessage());
        } catch (Throwable th) {
            LOG.error("{}: [ unexpected error ] : {}", InputDataParser.class.getSimpleName(), th.getMessage());
        }
        System.exit(-1);
    }
}
