/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.swh;

import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.SparkSessionSupport;
import eu.dnetlib.dhp.schema.oaf.Result;
import java.io.InputStream;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CollectSoftwareRepositoryURLs {
    private static final Logger log = LoggerFactory.getLogger(CollectSoftwareRepositoryURLs.class);

    public static <I extends Result> void main(String[] args) throws Exception {
        String jsonConfiguration = IOUtils.toString((InputStream)CollectSoftwareRepositoryURLs.class.getResourceAsStream("/eu/dnetlib/dhp/swh/input_collect_software_repository_urls.json"));
        ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
        parser.parseArgument(args);
        Boolean isSparkSessionManaged = Optional.ofNullable(parser.get("isSparkSessionManaged")).map(Boolean::valueOf).orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", (Object)isSparkSessionManaged);
        String hiveDbName = parser.get("hiveDbName");
        log.info("hiveDbName: {}", (Object)hiveDbName);
        String outputPath = parser.get("softwareCodeRepositoryURLs");
        log.info("softwareCodeRepositoryURLs: {}", (Object)outputPath);
        String hiveMetastoreUris = parser.get("hiveMetastoreUris");
        log.info("hiveMetastoreUris: {}", (Object)hiveMetastoreUris);
        Integer softwareLimit = Integer.parseInt(parser.get("softwareLimit"));
        log.info("softwareLimit: {}", (Object)softwareLimit);
        SparkConf conf = new SparkConf();
        conf.set("hive.metastore.uris", hiveMetastoreUris);
        SparkSessionSupport.runWithSparkHiveSession((SparkConf)conf, (Boolean)isSparkSessionManaged, spark -> CollectSoftwareRepositoryURLs.doRun(spark, hiveDbName, softwareLimit, outputPath));
    }

    private static <I extends Result> void doRun(SparkSession spark, String hiveDbName, Integer limit, String outputPath) {
        Object queryTemplate = "SELECT distinct coderepositoryurl.value FROM %s.software WHERE coderepositoryurl.value IS NOT NULL AND datainfo.deletedbyinference = FALSE AND datainfo.invisible = FALSE ";
        if (limit != null) {
            queryTemplate = (String)queryTemplate + String.format("LIMIT %s", limit);
        }
        String query = String.format((String)queryTemplate, hiveDbName);
        log.info("Hive query to fetch software code URLs: {}", (Object)query);
        Dataset df = spark.sql(query);
        df.write().mode(SaveMode.Overwrite).csv(outputPath);
    }
}

