package eu.dnetlib.dhp.datacite;

import eu.dnetlib.dhp.actionmanager.Constants;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.apache.spark.sql.SparkSession$implicits$;
import org.apache.spark.sql.expressions.Aggregator;
import org.apache.spark.sql.functions$;
import org.json4s.DefaultFormats$;
import org.json4s.JsonAST;
import org.json4s.jackson.JsonMethods$;
import org.json4s.package$;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Predef$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.immutable.StringOps;
import scala.io.Codec$;
import scala.io.Source$;
import scala.reflect.ClassTag$;
import scala.reflect.ManifestFactory$;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.TypeTags;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;
import scala.runtime.VolatileByteRef;

/* compiled from: ImportDatacite.scala */
/* loaded from: input_file:eu/dnetlib/dhp/datacite/ImportDatacite$.class */
public final class ImportDatacite$ {
    public static final ImportDatacite$ MODULE$ = null;
    private final Logger log;

    static {
        new ImportDatacite$();
    }

    public Logger log() {
        return this.log;
    }

    public DataciteType convertAPIStringToDataciteItem(String str) {
        ObjectRef zero = ObjectRef.zero();
        ObjectRef zero2 = ObjectRef.zero();
        VolatileByteRef create = VolatileByteRef.create((byte) 0);
        return new DataciteType(((String) package$.MODULE$.jvalue2extractable(package$.MODULE$.jvalue2monadic(package$.MODULE$.jvalue2monadic(json$1(str, zero2, create)).$bslash("attributes")).$bslash(Constants.DOI)).extract(formats$1(zero, create), ManifestFactory$.MODULE$.classType(String.class))).toLowerCase(), LocalDateTime.parse((String) package$.MODULE$.jvalue2extractable(package$.MODULE$.jvalue2monadic(package$.MODULE$.jvalue2monadic(json$1(str, zero2, create)).$bslash("attributes")).$bslash("updated")).extract(formats$1(zero, create), ManifestFactory$.MODULE$.classType(String.class)), DateTimeFormatter.ISO_DATE_TIME).toInstant(ZoneOffset.UTC).toEpochMilli() / 1000, BoxesRunTime.unboxToBoolean(package$.MODULE$.jvalue2extractable(package$.MODULE$.jvalue2monadic(package$.MODULE$.jvalue2monadic(json$1(str, zero2, create)).$bslash("attributes")).$bslash("isActive")).extract(formats$1(zero, create), ManifestFactory$.MODULE$.Boolean())), str);
    }

    public void main(String[] strArr) {
        ArgumentApplicationParser argumentApplicationParser = new ArgumentApplicationParser(Source$.MODULE$.fromInputStream(getClass().getResourceAsStream("/eu/dnetlib/dhp/datacite/import_from_api.json"), Codec$.MODULE$.fallbackSystemCodec()).mkString());
        argumentApplicationParser.parseArgument(strArr);
        String str = argumentApplicationParser.get("master");
        String str2 = argumentApplicationParser.get("namenode");
        log().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"namenode is ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str2})));
        String str3 = argumentApplicationParser.get("targetPath");
        log().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"targetPath is ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str3})));
        String str4 = argumentApplicationParser.get("dataciteDumpPath");
        log().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"dataciteDump is ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str4})));
        Path path = new Path(str3);
        log().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"hdfsTargetPath is ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{path})));
        int i = argumentApplicationParser.get("blocksize") == null ? 100 : new StringOps(Predef$.MODULE$.augmentString(argumentApplicationParser.get("blocksize"))).toInt();
        String str5 = argumentApplicationParser.get("skipImport");
        log().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"skipImport is ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str5})));
        final SparkSession orCreate = SparkSession$.MODULE$.builder().appName(getClass().getSimpleName()).master(str).getOrCreate();
        Configuration configuration = new Configuration();
        configuration.set("fs.defaultFS", str2);
        configuration.set("fs.hdfs.impl", DistributedFileSystem.class.getName());
        configuration.set("fs.file.impl", LocalFileSystem.class.getName());
        SparkContext sparkContext = orCreate.sparkContext();
        sparkContext.setLogLevel("ERROR");
        Aggregator<DataciteType, DataciteType, DataciteType> aggregator = new Aggregator<DataciteType, DataciteType, DataciteType>(orCreate) { // from class: eu.dnetlib.dhp.datacite.ImportDatacite$$anon$1
            private final SparkSession spark$1;

            /* renamed from: zero, reason: merged with bridge method [inline-methods] */
            public DataciteType m78zero() {
                return null;
            }

            public DataciteType reduce(DataciteType dataciteType, DataciteType dataciteType2) {
                if (dataciteType2 == null) {
                    return dataciteType;
                }
                if (dataciteType != null && dataciteType.timestamp() > dataciteType2.timestamp()) {
                    return dataciteType;
                }
                return dataciteType2;
            }

            public DataciteType merge(DataciteType dataciteType, DataciteType dataciteType2) {
                return reduce(dataciteType, dataciteType2);
            }

            public Encoder<DataciteType> bufferEncoder() {
                Predef$ predef$ = Predef$.MODULE$;
                SparkSession$implicits$ implicits = this.spark$1.implicits();
                TypeTags universe = scala.reflect.runtime.package$.MODULE$.universe();
                return (Encoder) predef$.implicitly(implicits.newProductEncoder(universe.TypeTag().apply(scala.reflect.runtime.package$.MODULE$.universe().runtimeMirror(ImportDatacite$$anon$1.class.getClassLoader()), new TypeCreator(this) { // from class: eu.dnetlib.dhp.datacite.ImportDatacite$$anon$1$$typecreator4$1
                    public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                        mirror.universe();
                        return mirror.staticClass("eu.dnetlib.dhp.datacite.DataciteType").asType().toTypeConstructor();
                    }
                })));
            }

            public Encoder<DataciteType> outputEncoder() {
                Predef$ predef$ = Predef$.MODULE$;
                SparkSession$implicits$ implicits = this.spark$1.implicits();
                TypeTags universe = scala.reflect.runtime.package$.MODULE$.universe();
                return (Encoder) predef$.implicitly(implicits.newProductEncoder(universe.TypeTag().apply(scala.reflect.runtime.package$.MODULE$.universe().runtimeMirror(ImportDatacite$$anon$1.class.getClassLoader()), new TypeCreator(this) { // from class: eu.dnetlib.dhp.datacite.ImportDatacite$$anon$1$$typecreator5$1
                    public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                        mirror.universe();
                        return mirror.staticClass("eu.dnetlib.dhp.datacite.DataciteType").asType().toTypeConstructor();
                    }
                })));
            }

            public DataciteType finish(DataciteType dataciteType) {
                return dataciteType;
            }

            {
                this.spark$1 = orCreate;
            }
        };
        Dataset as = orCreate.read().load(str4).as(orCreate.implicits().newProductEncoder(scala.reflect.runtime.package$.MODULE$.universe().TypeTag().apply(scala.reflect.runtime.package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: eu.dnetlib.dhp.datacite.ImportDatacite$$typecreator9$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("eu.dnetlib.dhp.datacite.DataciteType").asType().toTypeConstructor();
            }
        })));
        long j = ((Row) as.select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.max("timestamp")})).first()).getLong(0);
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"last Timestamp is ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToLong(j)})));
        long writeSequenceFile = "true".equalsIgnoreCase(str5) ? 1L : writeSequenceFile(path, j, configuration, i);
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Imported from Datacite API ", " documents"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToLong(writeSequenceFile)})));
        if (writeSequenceFile > 0) {
            orCreate.createDataset(sparkContext.sequenceFile(str3, Integer.TYPE, Text.class).map(new ImportDatacite$$anonfun$1(), ClassTag$.MODULE$.apply(String.class)).map(new ImportDatacite$$anonfun$2(), ClassTag$.MODULE$.apply(DataciteType.class)), orCreate.implicits().newProductEncoder(scala.reflect.runtime.package$.MODULE$.universe().TypeTag().apply(scala.reflect.runtime.package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: eu.dnetlib.dhp.datacite.ImportDatacite$$typecreator13$1
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    mirror.universe();
                    return mirror.staticClass("eu.dnetlib.dhp.datacite.DataciteType").asType().toTypeConstructor();
                }
            }))).write().mode(SaveMode.Overwrite).save(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", "_dataset"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str3})));
            as.union(orCreate.read().load(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", "_dataset"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str3}))).as(orCreate.implicits().newProductEncoder(scala.reflect.runtime.package$.MODULE$.universe().TypeTag().apply(scala.reflect.runtime.package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: eu.dnetlib.dhp.datacite.ImportDatacite$$typecreator17$1
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    mirror.universe();
                    return mirror.staticClass("eu.dnetlib.dhp.datacite.DataciteType").asType().toTypeConstructor();
                }
            })))).groupByKey(new ImportDatacite$$anonfun$main$1(), orCreate.implicits().newStringEncoder()).agg(aggregator.toColumn()).map(new ImportDatacite$$anonfun$main$2(), orCreate.implicits().newProductEncoder(scala.reflect.runtime.package$.MODULE$.universe().TypeTag().apply(scala.reflect.runtime.package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: eu.dnetlib.dhp.datacite.ImportDatacite$$typecreator25$1
                public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                    mirror.universe();
                    return mirror.staticClass("eu.dnetlib.dhp.datacite.DataciteType").asType().toTypeConstructor();
                }
            }))).repartition(4000).write().mode(SaveMode.Overwrite).save(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", "_updated"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str4})));
            FileSystem fileSystem = FileSystem.get(sparkContext.hadoopConfiguration());
            fileSystem.delete(new Path(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str4}))), true);
            fileSystem.rename(new Path(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", "_updated"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str4}))), new Path(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str4}))));
        }
    }

    /* JADX WARN: Finally extract failed */
    private long writeSequenceFile(Path path, long j, Configuration configuration, int i) {
        long currentTimeMillis = System.currentTimeMillis();
        int i2 = 0;
        try {
            SequenceFile.Writer createWriter = SequenceFile.createWriter(configuration, new SequenceFile.Writer.Option[]{SequenceFile.Writer.file(path), SequenceFile.Writer.keyClass(IntWritable.class), SequenceFile.Writer.valueClass(Text.class)});
            try {
                try {
                    long currentTimeMillis2 = System.currentTimeMillis();
                    for (long j2 = j * 1000; j2 < currentTimeMillis; j2 += 100000000) {
                        DataciteAPIImporter dataciteAPIImporter = new DataciteAPIImporter(j2, i, j2 + 100000000);
                        IntWritable intWritable = new IntWritable(i2);
                        Text text = new Text();
                        while (dataciteAPIImporter.hasNext()) {
                            i2++;
                            intWritable.set(i2 - 1);
                            text.set(dataciteAPIImporter.m55next());
                            createWriter.append(intWritable, text);
                            createWriter.hflush();
                            if (i2 % 1000 == 0) {
                                Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Imported ", " in ", " seconds"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(i2), BoxesRunTime.boxToFloat((float) ((System.currentTimeMillis() - currentTimeMillis2) / 1000.0d))})));
                                currentTimeMillis2 = System.currentTimeMillis();
                            }
                        }
                        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"updating from value: ", "  -> ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToLong(j2), BoxesRunTime.boxToLong(j2 + 100000000)})));
                    }
                } catch (Throwable th) {
                    Predef$.MODULE$.println(new Tuple2("Error", th));
                }
                if (createWriter != null) {
                    createWriter.close();
                }
            } catch (Throwable th2) {
                if (createWriter != null) {
                    createWriter.close();
                }
                throw th2;
            }
        } catch (Throwable th3) {
            log().error("Error", th3);
        }
        return i2;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v7 */
    private final DefaultFormats$ formats$lzycompute$1(ObjectRef objectRef, VolatileByteRef volatileByteRef) {
        ?? r0 = this;
        synchronized (r0) {
            if (((byte) (volatileByteRef.elem & 1)) == 0) {
                objectRef.elem = DefaultFormats$.MODULE$;
                volatileByteRef.elem = (byte) (volatileByteRef.elem | 1);
            }
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
            r0 = r0;
            return (DefaultFormats$) objectRef.elem;
        }
    }

    private final DefaultFormats$ formats$1(ObjectRef objectRef, VolatileByteRef volatileByteRef) {
        return ((byte) (volatileByteRef.elem & 1)) == 0 ? formats$lzycompute$1(objectRef, volatileByteRef) : (DefaultFormats$) objectRef.elem;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v7 */
    private final JsonAST.JValue json$lzycompute$1(String str, ObjectRef objectRef, VolatileByteRef volatileByteRef) {
        ?? r0 = this;
        synchronized (r0) {
            if (((byte) (volatileByteRef.elem & 2)) == 0) {
                objectRef.elem = JsonMethods$.MODULE$.parse(package$.MODULE$.string2JsonInput(str), JsonMethods$.MODULE$.parse$default$2(), JsonMethods$.MODULE$.parse$default$3());
                volatileByteRef.elem = (byte) (volatileByteRef.elem | 2);
            }
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
            r0 = r0;
            return (JsonAST.JValue) objectRef.elem;
        }
    }

    private final JsonAST.JValue json$1(String str, ObjectRef objectRef, VolatileByteRef volatileByteRef) {
        return ((byte) (volatileByteRef.elem & 2)) == 0 ? json$lzycompute$1(str, objectRef, volatileByteRef) : (JsonAST.JValue) objectRef.elem;
    }

    private ImportDatacite$() {
        MODULE$ = this;
        this.log = LoggerFactory.getLogger(getClass());
    }
}
