package eu.dnetlib.dhp.collection.plugin.base;

import eu.dnetlib.dhp.actionmanager.Constants;
import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.file.AbstractSplittedRecordPlugin;
import eu.dnetlib.dhp.common.DbClient;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import java.io.IOException;
import java.sql.SQLException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Optional;
import java.util.Set;
import java.util.Spliterators;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Node;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.class */
public class BaseCollectorPlugin implements CollectorPlugin {
    private final FileSystem fs;
    private static final Logger log = LoggerFactory.getLogger(AbstractSplittedRecordPlugin.class);

    public BaseCollectorPlugin(FileSystem fileSystem) {
        this.fs = fileSystem;
    }

    @Override // eu.dnetlib.dhp.collection.plugin.CollectorPlugin
    public Stream<String> collect(ApiDescriptor apiDescriptor, AggregatorReport aggregatorReport) throws CollectorException {
        Path path = (Path) Optional.ofNullable(apiDescriptor.getBaseUrl()).map(Path::new).orElseThrow(() -> {
            return new CollectorException("missing baseUrl");
        });
        String str = (String) apiDescriptor.getParams().get("dbUrl");
        String str2 = (String) apiDescriptor.getParams().get("dbUser");
        String str3 = (String) apiDescriptor.getParams().get("dbPassword");
        String str4 = (String) apiDescriptor.getParams().get("acceptedNormTypes");
        log.info("baseUrl: {}", path);
        log.info("dbUrl: {}", str);
        log.info("dbUser: {}", str2);
        log.info("dbPassword: {}", "***");
        log.info("acceptedNormTypes: {}", str4);
        try {
            if (!this.fs.exists(path)) {
                throw new CollectorException("path does not exist: " + path);
            }
            Set<String> findAcceptedOpendoarIds = findAcceptedOpendoarIds(str, str2, str3);
            HashSet hashSet = new HashSet();
            if (StringUtils.isNotBlank(str4)) {
                for (String str5 : StringUtils.split(str4, Constants.DEFAULT_DELIMITER)) {
                    if (StringUtils.isNotBlank(str5)) {
                        hashSet.add(str5.trim());
                    }
                }
            }
            return StreamSupport.stream(Spliterators.spliteratorUnknownSize(new BaseCollectorIterator(this.fs, path, aggregatorReport), 16), false).filter(str6 -> {
                return filterXml(str6, findAcceptedOpendoarIds, hashSet);
            });
        } catch (Throwable th) {
            throw new CollectorException(th);
        }
    }

    private Set<String> findAcceptedOpendoarIds(String str, String str2, String str3) throws CollectorException {
        HashSet hashSet = new HashSet();
        try {
            DbClient dbClient = new DbClient(str, str2, str3);
            try {
                dbClient.processResults(IOUtils.toString(getClass().getResourceAsStream("/eu/dnetlib/dhp/collection/plugin/base/sql/opendoar-accepted.sql")), resultSet -> {
                    try {
                        String string = resultSet.getString("id");
                        log.info("Accepted Datasource: " + string);
                        hashSet.add(string);
                    } catch (SQLException e) {
                        log.error("Error in SQL", e);
                        throw new RuntimeException("Error in SQL", e);
                    }
                });
                dbClient.close();
                log.info("Accepted Datasources (TOTAL): " + hashSet.size());
                return hashSet;
            } finally {
            }
        } catch (IOException e) {
            log.error("Error accessong SQL", e);
            throw new CollectorException("Error accessong SQL", e);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static boolean filterXml(String str, Set<String> set, Set<String> set2) {
        try {
            Document parseText = DocumentHelper.parseText(str);
            String trim = parseText.valueOf("//*[local-name()='collection']/@opendoar_id").trim();
            if (StringUtils.isBlank(trim) || !set.contains("opendoar____::" + trim)) {
                return false;
            }
            if (set2.isEmpty()) {
                return true;
            }
            Iterator it = parseText.selectNodes("//*[local-name()='typenorm']").iterator();
            while (it.hasNext()) {
                if (set2.contains(((Node) it.next()).getText().trim())) {
                    return true;
                }
            }
            return false;
        } catch (DocumentException e) {
            log.error("Error parsing document", e);
            throw new RuntimeException("Error parsing document", e);
        }
    }
}
