package eu.dnetlib.dhp.collection;

import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.SparkSessionSupport;
import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
import eu.dnetlib.dhp.model.mdstore.Provenance;
import eu.dnetlib.message.Message;
import eu.dnetlib.message.MessageManager;
import eu.dnetlib.message.MessageType;
import java.io.ByteArrayInputStream;
import java.lang.invoke.SerializedLambda;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.LinkedBlockingQueue;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.util.LongAccumulator;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.class */
public class GenerateNativeStoreSparkJob {
    private static final Logger log = LoggerFactory.getLogger(GenerateNativeStoreSparkJob.class);

    public static MetadataRecord parseRecord(String str, String str2, String str3, Provenance provenance, Long l, LongAccumulator longAccumulator, LongAccumulator longAccumulator2) {
        if (longAccumulator != null) {
            longAccumulator.add(1L);
        }
        try {
            String text = new SAXReader().read(new ByteArrayInputStream(str.getBytes(StandardCharsets.UTF_8))).selectSingleNode(str2).getText();
            if (!StringUtils.isBlank(text)) {
                return new MetadataRecord(text, str3, provenance, str, l.longValue());
            }
            if (longAccumulator2 == null) {
                return null;
            }
            longAccumulator2.add(1L);
            return null;
        } catch (Throwable th) {
            if (longAccumulator2 != null) {
                longAccumulator2.add(1L);
            }
            th.printStackTrace();
            return null;
        }
    }

    public static void main(String[] strArr) throws Exception {
        ArgumentApplicationParser argumentApplicationParser = new ArgumentApplicationParser(IOUtils.toString(GenerateNativeStoreSparkJob.class.getResourceAsStream("/eu/dnetlib/dhp/collection/collection_input_parameters.json")));
        argumentApplicationParser.parseArgument(strArr);
        Provenance provenance = (Provenance) new ObjectMapper().readValue(argumentApplicationParser.get("provenance"), Provenance.class);
        long longValue = new Long(argumentApplicationParser.get("dateOfCollection")).longValue();
        Boolean bool = (Boolean) Optional.ofNullable(argumentApplicationParser.get("isSparkSessionManaged")).map(Boolean::valueOf).orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", bool);
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        boolean booleanValue = argumentApplicationParser.get("isTest") == null ? false : Boolean.valueOf(argumentApplicationParser.get("isTest")).booleanValue();
        SparkSessionSupport.runWithSparkSession(new SparkConf(), bool, sparkSession -> {
            JavaSparkContext fromSparkContext = JavaSparkContext.fromSparkContext(sparkSession.sparkContext());
            JavaPairRDD sequenceFile = fromSparkContext.sequenceFile(argumentApplicationParser.get("input"), IntWritable.class, Text.class);
            LongAccumulator longAccumulator = fromSparkContext.sc().longAccumulator("TotalItems");
            LongAccumulator longAccumulator2 = fromSparkContext.sc().longAccumulator("InvalidRecords");
            MessageManager messageManager = new MessageManager(argumentApplicationParser.get("rabbitHost"), argumentApplicationParser.get("rabbitUser"), argumentApplicationParser.get("rabbitPassword"), false, false, (LinkedBlockingQueue) null);
            JavaRDD distinct = sequenceFile.map(tuple2 -> {
                return parseRecord(((Text) tuple2._2()).toString(), argumentApplicationParser.get("xpath"), argumentApplicationParser.get("encoding"), provenance, Long.valueOf(longValue), longAccumulator, longAccumulator2);
            }).filter((v0) -> {
                return Objects.nonNull(v0);
            }).distinct();
            hashMap.put("ongoing", "0");
            if (!booleanValue) {
                messageManager.sendMessage(new Message(argumentApplicationParser.get("workflowId"), "DataFrameCreation", MessageType.ONGOING, hashMap), argumentApplicationParser.get("rabbitOngoingQueue"), true, false);
            }
            Dataset createDataset = sparkSession.createDataset(distinct.rdd(), Encoders.bean(MetadataRecord.class));
            LongAccumulator longAccumulator3 = fromSparkContext.sc().longAccumulator("MDStoreRecords");
            longAccumulator3.add(createDataset.count());
            hashMap.put("ongoing", "" + longAccumulator.value());
            if (!booleanValue) {
                messageManager.sendMessage(new Message(argumentApplicationParser.get("workflowId"), "DataFrameCreation", MessageType.ONGOING, hashMap), argumentApplicationParser.get("rabbitOngoingQueue"), true, false);
            }
            createDataset.write().format("parquet").save(argumentApplicationParser.get("output"));
            hashMap2.put("inputItem", "" + longAccumulator.value());
            hashMap2.put("invalidRecords", "" + longAccumulator2.value());
            hashMap2.put("mdStoreSize", "" + longAccumulator3.value());
            if (booleanValue) {
                return;
            }
            messageManager.sendMessage(new Message(argumentApplicationParser.get("workflowId"), "Collection", MessageType.REPORT, hashMap2), argumentApplicationParser.get("rabbitReportQueue"), true, false);
            messageManager.close();
        });
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -151732916:
                if (implMethodName.equals("lambda$null$b82937ca$1")) {
                    z = false;
                    break;
                }
                break;
            case 2123019764:
                if (implMethodName.equals("nonNull")) {
                    z = true;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/dhp/application/ArgumentApplicationParser;Leu/dnetlib/dhp/model/mdstore/Provenance;JLorg/apache/spark/util/LongAccumulator;Lorg/apache/spark/util/LongAccumulator;Lscala/Tuple2;)Leu/dnetlib/dhp/model/mdstore/MetadataRecord;")) {
                    ArgumentApplicationParser argumentApplicationParser = (ArgumentApplicationParser) serializedLambda.getCapturedArg(0);
                    Provenance provenance = (Provenance) serializedLambda.getCapturedArg(1);
                    long longValue = ((Long) serializedLambda.getCapturedArg(2)).longValue();
                    LongAccumulator longAccumulator = (LongAccumulator) serializedLambda.getCapturedArg(3);
                    LongAccumulator longAccumulator2 = (LongAccumulator) serializedLambda.getCapturedArg(4);
                    return tuple2 -> {
                        return parseRecord(((Text) tuple2._2()).toString(), argumentApplicationParser.get("xpath"), argumentApplicationParser.get("encoding"), provenance, Long.valueOf(longValue), longAccumulator, longAccumulator2);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("java/util/Objects") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/Object;)Z")) {
                    return (v0) -> {
                        return Objects.nonNull(v0);
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
