package eu.dnetlib.dhp.oa.provision;

import com.lucidworks.spark.util.SolrSupport;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.SparkSessionSupport;
import eu.dnetlib.dhp.oa.provision.model.SerializableSolrInputDocument;
import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient;
import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import java.io.StringReader;
import java.io.StringWriter;
import java.lang.invoke.SerializedLambda;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Optional;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.io.Text;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:eu/dnetlib/dhp/oa/provision/XmlIndexingJob.class */
public class XmlIndexingJob {
    private static final Logger log = LoggerFactory.getLogger(XmlIndexingJob.class);
    private static final Integer DEFAULT_BATCH_SIZE = Integer.valueOf(ProvisionConstants.MAX_AUTHOR_FULLNAME_LENGTH);
    protected static final String DATE_FORMAT = "yyyy-MM-dd'T'hh:mm:ss'Z'";
    private String inputPath;
    private String format;
    private int batchSize;
    private OutputFormat outputFormat;
    private String outputPath;
    private SparkSession spark;

    /* loaded from: input_file:eu/dnetlib/dhp/oa/provision/XmlIndexingJob$OutputFormat.class */
    public enum OutputFormat {
        SOLR,
        HDFS
    }

    public static void main(String[] strArr) throws Exception {
        ArgumentApplicationParser argumentApplicationParser = new ArgumentApplicationParser(IOUtils.toString(XmlIndexingJob.class.getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_update_index.json")));
        argumentApplicationParser.parseArgument(strArr);
        Boolean bool = (Boolean) Optional.ofNullable(argumentApplicationParser.get("isSparkSessionManaged")).map(Boolean::valueOf).orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", bool);
        String str = argumentApplicationParser.get("inputPath");
        log.info("inputPath: {}", str);
        String str2 = argumentApplicationParser.get("format");
        log.info("format: {}", str2);
        String str3 = (String) Optional.ofNullable(argumentApplicationParser.get("outputPath")).map(StringUtils::trim).orElse(null);
        log.info("outputPath: {}", str3);
        Integer num = (Integer) Optional.ofNullable(argumentApplicationParser.get("batchSize")).map(Integer::valueOf).orElse(DEFAULT_BATCH_SIZE);
        log.info("batchSize: {}", num);
        OutputFormat outputFormat = (OutputFormat) Optional.ofNullable(argumentApplicationParser.get("outputFormat")).map(OutputFormat::valueOf).orElse(OutputFormat.SOLR);
        log.info("outputFormat: {}", outputFormat);
        SparkConf sparkConf = new SparkConf();
        sparkConf.registerKryoClasses(new Class[]{SerializableSolrInputDocument.class});
        SparkSessionSupport.runWithSparkSession(sparkConf, bool, sparkSession -> {
            String str4 = argumentApplicationParser.get("isLookupUrl");
            log.info("isLookupUrl: {}", str4);
            new XmlIndexingJob(sparkSession, str, str2, num, outputFormat, str3).run(new ISLookupClient(ISLookupClientFactory.getLookUpService(str4)));
        });
    }

    public XmlIndexingJob(SparkSession sparkSession, String str, String str2, Integer num, OutputFormat outputFormat, String str3) {
        this.spark = sparkSession;
        this.inputPath = str;
        this.format = str2;
        this.batchSize = num.intValue();
        this.outputFormat = outputFormat;
        this.outputPath = str3;
    }

    public void run(ISLookupClient iSLookupClient) throws ISLookUpException, TransformerException {
        String layoutSource = iSLookupClient.getLayoutSource(this.format);
        log.info("fields: {}", layoutSource);
        String layoutTransformer = iSLookupClient.getLayoutTransformer();
        String dsId = iSLookupClient.getDsId(this.format);
        log.info("dsId: {}", dsId);
        String zkHost = iSLookupClient.getZkHost();
        log.info("zkHost: {}", zkHost);
        String recordDatestamp = getRecordDatestamp();
        String layoutTransformer2 = getLayoutTransformer(this.format, layoutSource, layoutTransformer);
        log.info("indexRecordTransformer {}", layoutTransformer2);
        JavaRDD map = JavaSparkContext.fromSparkContext(this.spark.sparkContext()).sequenceFile(this.inputPath, Text.class, Text.class).map(tuple2 -> {
            return ((Text) tuple2._2()).toString();
        }).map(str -> {
            return toIndexRecord(SaxonTransformerFactory.newInstance(layoutTransformer2), str);
        }).map(str2 -> {
            return new StreamingInputDocumentFactory(recordDatestamp, dsId).parseDocument(str2);
        });
        switch (this.outputFormat) {
            case SOLR:
                SolrSupport.indexDocs(zkHost, ProvisionConstants.getCollectionName(this.format), this.batchSize, map.rdd());
                return;
            case HDFS:
                this.spark.createDataset(map.map(solrInputDocument -> {
                    return new SerializableSolrInputDocument(solrInputDocument);
                }).rdd(), Encoders.kryo(SerializableSolrInputDocument.class)).write().mode(SaveMode.Overwrite).parquet(this.outputPath);
                return;
            default:
                throw new IllegalArgumentException("invalid outputFormat: " + this.outputFormat);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static String toIndexRecord(Transformer transformer, String str) {
        StreamResult streamResult = new StreamResult(new StringWriter());
        try {
            transformer.transform(new StreamSource(new StringReader(str)), streamResult);
            return streamResult.getWriter().toString();
        } catch (Throwable th) {
            log.error("XPathException on record: \n {}", str, th);
            throw new IllegalArgumentException(th);
        }
    }

    protected static String getLayoutTransformer(String str, String str2, String str3) throws TransformerException {
        Transformer newInstance = SaxonTransformerFactory.newInstance(str3);
        StreamResult streamResult = new StreamResult(new StringWriter());
        newInstance.setParameter("format", str);
        newInstance.transform(new StreamSource(new StringReader(str2)), streamResult);
        return streamResult.getWriter().toString();
    }

    public static String getRecordDatestamp() {
        return new SimpleDateFormat(DATE_FORMAT).format(new Date());
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -1578905030:
                if (implMethodName.equals("lambda$run$776d648c$1")) {
                    z = false;
                    break;
                }
                break;
            case -1060180155:
                if (implMethodName.equals("lambda$run$3091e2aa$1")) {
                    z = true;
                    break;
                }
                break;
            case 963386914:
                if (implMethodName.equals("lambda$run$3a771c32$1")) {
                    z = 3;
                    break;
                }
                break;
            case 1760942968:
                if (implMethodName.equals("lambda$run$636415b8$1")) {
                    z = 2;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/XmlIndexingJob") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)Lorg/apache/solr/common/SolrInputDocument;")) {
                    String str = (String) serializedLambda.getCapturedArg(0);
                    String str2 = (String) serializedLambda.getCapturedArg(1);
                    return str22 -> {
                        return new StreamingInputDocumentFactory(str, str2).parseDocument(str22);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/XmlIndexingJob") && serializedLambda.getImplMethodSignature().equals("(Lscala/Tuple2;)Ljava/lang/String;")) {
                    return tuple2 -> {
                        return ((Text) tuple2._2()).toString();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/XmlIndexingJob") && serializedLambda.getImplMethodSignature().equals("(Lorg/apache/solr/common/SolrInputDocument;)Leu/dnetlib/dhp/oa/provision/model/SerializableSolrInputDocument;")) {
                    return solrInputDocument -> {
                        return new SerializableSolrInputDocument(solrInputDocument);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/XmlIndexingJob") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;")) {
                    String str3 = (String) serializedLambda.getCapturedArg(0);
                    return str4 -> {
                        return toIndexRecord(SaxonTransformerFactory.newInstance(str3), str4);
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
