package eu.dnetlib.dhp.oa.graph.clean;

import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.SparkSessionSupport;
import eu.dnetlib.dhp.oa.graph.raw.common.OafMapperUtils;
import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import java.lang.invoke.SerializedLambda;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob.class */
public class CleanGraphSparkJob {
    private static final Logger log = LoggerFactory.getLogger(CleanGraphSparkJob.class);
    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

    public static void main(String[] strArr) throws Exception {
        ArgumentApplicationParser argumentApplicationParser = new ArgumentApplicationParser(IOUtils.toString(CleanGraphSparkJob.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/input_clean_graph_parameters.json")));
        argumentApplicationParser.parseArgument(strArr);
        Boolean bool = (Boolean) Optional.ofNullable(argumentApplicationParser.get("isSparkSessionManaged")).map(Boolean::valueOf).orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", bool);
        String str = argumentApplicationParser.get("inputPath");
        log.info("inputPath: {}", str);
        String str2 = argumentApplicationParser.get("outputPath");
        log.info("outputPath: {}", str2);
        String str3 = argumentApplicationParser.get("isLookupUrl");
        log.info("isLookupUrl: {}", str3);
        String str4 = argumentApplicationParser.get("graphTableClassName");
        log.info("graphTableClassName: {}", str4);
        Class<?> cls = Class.forName(str4);
        VocabularyGroup loadVocsFromIS = VocabularyGroup.loadVocsFromIS(ISLookupClientFactory.getLookUpService(str3));
        SparkSessionSupport.runWithSparkSession(new SparkConf(), bool, sparkSession -> {
            removeOutputDir(sparkSession, str2);
            fixGraphTable(sparkSession, loadVocsFromIS, str, cls, str2);
        });
    }

    private static <T extends Oaf> void fixGraphTable(SparkSession sparkSession, VocabularyGroup vocabularyGroup, String str, Class<T> cls, String str2) {
        CleaningRuleMap create = CleaningRuleMap.create(vocabularyGroup);
        readTableFromPath(sparkSession, str, cls).map(oaf -> {
            return OafCleaner.apply(oaf, create);
        }, Encoders.bean(cls)).map(oaf2 -> {
            return fixDefaults(oaf2);
        }, Encoders.bean(cls)).write().mode(SaveMode.Overwrite).option("compression", "gzip").json(str2);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static <T extends Oaf> T fixDefaults(T t) {
        if (!(t instanceof Datasource) && !(t instanceof Project)) {
            if (t instanceof Organization) {
                Organization organization = (Organization) t;
                if (Objects.isNull(organization.getCountry()) || StringUtils.isBlank(organization.getCountry().getClassid())) {
                    organization.setCountry(qualifier("UNKNOWN", "Unknown", "dnet:countries"));
                }
            } else if (!(t instanceof Relation) && (t instanceof Result)) {
                Result result = (Result) t;
                if (Objects.isNull(result.getLanguage()) || StringUtils.isBlank(result.getLanguage().getClassid())) {
                    result.setLanguage(qualifier("und", "Undetermined", "dnet:languages"));
                }
                if (Objects.nonNull(result.getSubject())) {
                    result.setSubject((List) result.getSubject().stream().filter((v0) -> {
                        return Objects.nonNull(v0);
                    }).filter(structuredProperty -> {
                        return StringUtils.isNotBlank(structuredProperty.getValue());
                    }).filter(structuredProperty2 -> {
                        return Objects.nonNull(structuredProperty2.getQualifier());
                    }).filter(structuredProperty3 -> {
                        return StringUtils.isNotBlank(structuredProperty3.getQualifier().getClassid());
                    }).collect(Collectors.toList()));
                }
                if (Objects.isNull(result.getResourcetype()) || StringUtils.isBlank(result.getResourcetype().getClassid())) {
                    result.setResourcetype(qualifier("UNKNOWN", "Unknown", "dnet:dataCite_resource"));
                }
                if (Objects.isNull(result.getBestaccessright()) || StringUtils.isBlank(result.getBestaccessright().getClassid())) {
                    result.setBestaccessright(qualifier("UNKNOWN", "not available", "dnet:access_modes"));
                }
                if (Objects.nonNull(result.getInstance())) {
                    for (Instance instance : result.getInstance()) {
                        if (Objects.isNull(instance.getAccessright()) || StringUtils.isBlank(instance.getAccessright().getClassid())) {
                            instance.setAccessright(qualifier("UNKNOWN", "not available", "dnet:access_modes"));
                        }
                    }
                }
                if (!(t instanceof Publication) && !(t instanceof Dataset) && !(t instanceof OtherResearchProduct) && (t instanceof Software)) {
                }
            }
        }
        return t;
    }

    private static Qualifier qualifier(String str, String str2, String str3) {
        return OafMapperUtils.qualifier(str, str2, str3, str3);
    }

    private static <T extends Oaf> org.apache.spark.sql.Dataset<T> readTableFromPath(SparkSession sparkSession, String str, Class<T> cls) {
        log.info("Reading Graph table from: {}", str);
        return sparkSession.read().textFile(str).map(str2 -> {
            return (Oaf) OBJECT_MAPPER.readValue(str2, cls);
        }, Encoders.bean(cls));
    }

    private static void removeOutputDir(SparkSession sparkSession, String str) {
        HdfsSupport.remove(str, sparkSession.sparkContext().hadoopConfiguration());
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -627037995:
                if (implMethodName.equals("lambda$readTableFromPath$4b605957$1")) {
                    z = true;
                    break;
                }
                break;
            case 335594557:
                if (implMethodName.equals("lambda$fixGraphTable$a3698fab$1")) {
                    z = 2;
                    break;
                }
                break;
            case 915732737:
                if (implMethodName.equals("lambda$fixGraphTable$d181aaac$1")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/MapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/dhp/schema/oaf/Oaf;)Leu/dnetlib/dhp/schema/oaf/Oaf;")) {
                    return oaf2 -> {
                        return fixDefaults(oaf2);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/MapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/Class;Ljava/lang/String;)Leu/dnetlib/dhp/schema/oaf/Oaf;")) {
                    Class cls = (Class) serializedLambda.getCapturedArg(0);
                    return str2 -> {
                        return (Oaf) OBJECT_MAPPER.readValue(str2, cls);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/MapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/graph/clean/CleanGraphSparkJob") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap;Leu/dnetlib/dhp/schema/oaf/Oaf;)Leu/dnetlib/dhp/schema/oaf/Oaf;")) {
                    CleaningRuleMap cleaningRuleMap = (CleaningRuleMap) serializedLambda.getCapturedArg(0);
                    return oaf -> {
                        return OafCleaner.apply(oaf, cleaningRuleMap);
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
