package eu.dnetlib.dhp.oa.graph.clean;

import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.SparkSessionSupport;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import java.util.Map;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:eu/dnetlib/dhp/oa/graph/clean/GenerateBlacklistSparkJob.class */
public class GenerateBlacklistSparkJob {
    private static final Logger log = LoggerFactory.getLogger(GenerateBlacklistSparkJob.class);
    private ArgumentApplicationParser parser;

    public GenerateBlacklistSparkJob(ArgumentApplicationParser argumentApplicationParser) {
        this.parser = argumentApplicationParser;
    }

    public static void main(String[] strArr) throws Exception {
        ArgumentApplicationParser argumentApplicationParser = new ArgumentApplicationParser(IOUtils.toString(GenerateBlacklistSparkJob.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/generate_blacklist_parameters.json")));
        argumentApplicationParser.parseArgument(strArr);
        Boolean bool = (Boolean) Optional.ofNullable(argumentApplicationParser.get("isSparkSessionManaged")).map(Boolean::valueOf).orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", bool);
        new GenerateBlacklistSparkJob(argumentApplicationParser).run(bool);
    }

    public void run(Boolean bool) throws ISLookUpException, ClassNotFoundException {
        String str = this.parser.get("inputPath");
        log.info("inputPath: {}", str);
        String str2 = this.parser.get("outputPath");
        log.info("outputPath: {}", str2);
        String str3 = this.parser.get("zenodoWithdrawn");
        log.info("zenodoWithdrawn: {}", str3);
        SparkSessionSupport.runWithSparkSession(new SparkConf(), bool, sparkSession -> {
            HdfsSupport.remove(str2, sparkSession.sparkContext().hadoopConfiguration());
            Dataset selectExpr = sparkSession.read().option("header", true).csv(str3).selectExpr(new String[]{"explode(array(doi,parent_doi)) as doi"});
            for (Map.Entry<String, Class> entry : ModelSupport.oafTypes.entrySet()) {
                if (Result.class.isAssignableFrom(entry.getValue())) {
                    sparkSession.read().schema(Encoders.bean(Result.class).schema()).json(str + "/" + entry.getKey()).where("array_contains(instance.hostedby.value, 'ZENODO')").selectExpr(new String[]{"id", "explode(instance) as instance"}).selectExpr(new String[]{"id", "explode(instance.pid) as pid"}).join(selectExpr, selectExpr.col("doi").equalTo(new Column("pid.value")), "left_semi").select("id", new String[0]).distinct().write().mode("append").option("compression", "gzip").parquet(str2);
                }
            }
        });
    }
}
