package eu.dnetlib.dhp.oa.provision;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.SparkSessionSupport;
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory;
import java.lang.invoke.SerializedLambda;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.util.LongAccumulator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
import scala.collection.Iterator;
import scala.collection.JavaConverters;
import scala.collection.Seq;

/* loaded from: input_file:eu/dnetlib/dhp/oa/provision/XmlConverterJob.class */
public class XmlConverterJob {
    private static final Logger log = LoggerFactory.getLogger(XmlConverterJob.class);
    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
    public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd";

    public static void main(String[] strArr) throws Exception {
        ArgumentApplicationParser argumentApplicationParser = new ArgumentApplicationParser(IOUtils.toString(XmlConverterJob.class.getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json")));
        argumentApplicationParser.parseArgument(strArr);
        Boolean bool = (Boolean) Optional.ofNullable(argumentApplicationParser.get("isSparkSessionManaged")).map(Boolean::valueOf).orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", bool);
        String str = argumentApplicationParser.get("inputPath");
        log.info("inputPath: {}", str);
        String str2 = argumentApplicationParser.get("outputPath");
        log.info("outputPath: {}", str2);
        String str3 = argumentApplicationParser.get("isLookupUrl");
        log.info("isLookupUrl: {}", str3);
        String str4 = argumentApplicationParser.get("otherDsTypeId");
        log.info("otherDsTypeId: {}", str4);
        SparkConf sparkConf = new SparkConf();
        sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
        sparkConf.registerKryoClasses(ProvisionModelSupport.getModelClasses());
        SparkSessionSupport.runWithSparkSession(sparkConf, bool, sparkSession -> {
            removeOutputDir(sparkSession, str2);
            convertToXml(sparkSession, str, str2, ContextMapper.fromIS(str3), str4);
        });
    }

    private static void convertToXml(SparkSession sparkSession, String str, String str2, ContextMapper contextMapper, String str3) {
        XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(prepareAccumulators(sparkSession.sparkContext()), contextMapper, false, schemaLocation, str3);
        List listFiles = HdfsSupport.listFiles(str, sparkSession.sparkContext().hadoopConfiguration());
        log.info("Found paths: {}", String.join(",", listFiles));
        sparkSession.read().load(toSeq(listFiles)).as(Encoders.kryo(JoinedEntity.class)).map(joinedEntity -> {
            return new Tuple2(joinedEntity.getEntity().getId(), xmlRecordFactory.build(joinedEntity));
        }, Encoders.tuple(Encoders.STRING(), Encoders.STRING())).javaRDD().mapToPair(tuple2 -> {
            return new Tuple2(new Text((String) tuple2._1()), new Text((String) tuple2._2()));
        }).saveAsHadoopFile(str2, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
    }

    private static void removeOutputDir(SparkSession sparkSession, String str) {
        HdfsSupport.remove(str, sparkSession.sparkContext().hadoopConfiguration());
    }

    private static Seq<String> toSeq(List<String> list) {
        return ((Iterator) JavaConverters.asScalaIteratorConverter(list.iterator()).asScala()).toSeq();
    }

    private static Map<String, LongAccumulator> prepareAccumulators(SparkContext sparkContext) {
        HashMap newHashMap = Maps.newHashMap();
        newHashMap.put("resultResult_similarity_isAmongTopNSimilarDocuments", sparkContext.longAccumulator("resultResult_similarity_isAmongTopNSimilarDocuments"));
        newHashMap.put("resultResult_similarity_hasAmongTopNSimilarDocuments", sparkContext.longAccumulator("resultResult_similarity_hasAmongTopNSimilarDocuments"));
        newHashMap.put("resultResult_supplement_isSupplementTo", sparkContext.longAccumulator("resultResult_supplement_isSupplementTo"));
        newHashMap.put("resultResult_supplement_isSupplementedBy", sparkContext.longAccumulator("resultResult_supplement_isSupplementedBy"));
        newHashMap.put("resultResult_dedup_isMergedIn", sparkContext.longAccumulator("resultResult_dedup_isMergedIn"));
        newHashMap.put("resultResult_dedup_merges", sparkContext.longAccumulator("resultResult_dedup_merges"));
        newHashMap.put("resultResult_publicationDataset_isRelatedTo", sparkContext.longAccumulator("resultResult_publicationDataset_isRelatedTo"));
        newHashMap.put("resultResult_relationship_isRelatedTo", sparkContext.longAccumulator("resultResult_relationship_isRelatedTo"));
        newHashMap.put("resultProject_outcome_isProducedBy", sparkContext.longAccumulator("resultProject_outcome_isProducedBy"));
        newHashMap.put("resultProject_outcome_produces", sparkContext.longAccumulator("resultProject_outcome_produces"));
        newHashMap.put("resultOrganization_affiliation_isAuthorInstitutionOf", sparkContext.longAccumulator("resultOrganization_affiliation_isAuthorInstitutionOf"));
        newHashMap.put("resultOrganization_affiliation_hasAuthorInstitution", sparkContext.longAccumulator("resultOrganization_affiliation_hasAuthorInstitution"));
        newHashMap.put("projectOrganization_participation_hasParticipant", sparkContext.longAccumulator("projectOrganization_participation_hasParticipant"));
        newHashMap.put("projectOrganization_participation_isParticipant", sparkContext.longAccumulator("projectOrganization_participation_isParticipant"));
        newHashMap.put("organizationOrganization_dedup_isMergedIn", sparkContext.longAccumulator("organizationOrganization_dedup_isMergedIn"));
        newHashMap.put("organizationOrganization_dedup_merges", sparkContext.longAccumulator("resultProject_outcome_produces"));
        newHashMap.put("datasourceOrganization_provision_isProvidedBy", sparkContext.longAccumulator("datasourceOrganization_provision_isProvidedBy"));
        newHashMap.put("datasourceOrganization_provision_provides", sparkContext.longAccumulator("datasourceOrganization_provision_provides"));
        return newHashMap;
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -1201727598:
                if (implMethodName.equals("lambda$convertToXml$d6e0c492$1")) {
                    z = false;
                    break;
                }
                break;
            case -437354333:
                if (implMethodName.equals("lambda$convertToXml$d56f0d36$1")) {
                    z = true;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/MapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/XmlConverterJob") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory;Leu/dnetlib/dhp/oa/provision/model/JoinedEntity;)Lscala/Tuple2;")) {
                    XmlRecordFactory xmlRecordFactory = (XmlRecordFactory) serializedLambda.getCapturedArg(0);
                    return joinedEntity -> {
                        return new Tuple2(joinedEntity.getEntity().getId(), xmlRecordFactory.build(joinedEntity));
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Lscala/Tuple2;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/XmlConverterJob") && serializedLambda.getImplMethodSignature().equals("(Lscala/Tuple2;)Lscala/Tuple2;")) {
                    return tuple2 -> {
                        return new Tuple2(new Text((String) tuple2._1()), new Text((String) tuple2._2()));
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
