package eu.dnetlib.dhp.collection.mag;

import eu.dnetlib.dhp.application.AbstractScalaApplication;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType$;
import org.eclipse.persistence.sdo.SDOConstants;
import org.postgresql.jdbc.EscapedFunctions;
import org.slf4j.Logger;
import scala.Predef$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.immutable.C$colon$colon;
import scala.collection.immutable.Nil$;
import scala.reflect.ScalaSignature;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;
import scala.runtime.BoxesRunTime;

/* compiled from: SparkCreateMagDenormalizedTable.scala */
@ScalaSignature(bytes = "\u0006\u0001\u00154A!\u0004\b\u00013!I\u0001\u0005\u0001B\u0001B\u0003%\u0011E\f\u0005\n_\u0001\u0011\t\u0011)A\u0005aQB\u0001\"\u000e\u0001\u0003\u0002\u0003\u0006IA\u000e\u0005\u0006}\u0001!\ta\u0010\u0005\u0006\u000b\u0002!\tE\u0012\u0005\u0006\u0015\u0002!IaS\u0004\u00063:A\tA\u0017\u0004\u0006\u001b9A\ta\u0017\u0005\u0006}!!\ta\u0018\u0005\bk!\u0011\r\u0011\"\u0001a\u0011\u0019\t\u0007\u0002)A\u0005m!)!\r\u0003C\u0001G\ny2\u000b]1sW\u000e\u0013X-\u0019;f\u001b\u0006<G)\u001a8pe6\fG.\u001b>fIR\u000b'\r\\3\u000b\u0005=\u0001\u0012aA7bO*\u0011\u0011CE\u0001\u000bG>dG.Z2uS>t'BA\n\u0015\u0003\r!\u0007\u000e\u001d\u0006\u0003+Y\tq\u0001\u001a8fi2L'MC\u0001\u0018\u0003\t)Wo\u0001\u0001\u0014\u0005\u0001Q\u0002CA\u000e\u001f\u001b\u0005a\"BA\u000f\u0013\u0003-\t\u0007\u000f\u001d7jG\u0006$\u0018n\u001c8\n\u0005}a\"\u0001G!cgR\u0014\u0018m\u0019;TG\u0006d\u0017-\u00119qY&\u001c\u0017\r^5p]\u0006a\u0001O]8qKJ$\u0018\u0010U1uQB\u0011!e\u000b\b\u0003G%\u0002\"\u0001J\u0014\u000e\u0003\u0015R!A\n\r\u0002\rq\u0012xn\u001c;?\u0015\u0005A\u0013!B:dC2\f\u0017B\u0001\u0016(\u0003\u0019\u0001&/\u001a3fM&\u0011A&\f\u0002\u0007'R\u0014\u0018N\\4\u000b\u0005):\u0013B\u0001\u0011\u001f\u0003\u0011\t'oZ:\u0011\u0007E\u0012\u0014%D\u0001(\u0013\t\u0019tEA\u0003BeJ\f\u00170\u0003\u00020=\u0005\u0019An\\4\u0011\u0005]bT\"\u0001\u001d\u000b\u0005eR\u0014!B:mMRR'\"A\u001e\u0002\u0007=\u0014x-\u0003\u0002>q\t1Aj\\4hKJ\fa\u0001P5oSRtD\u0003\u0002!C\u0007\u0012\u0003\"!\u0011\u0001\u000e\u00039AQ\u0001\t\u0003A\u0002\u0005BQa\f\u0003A\u0002ABQ!\u000e\u0003A\u0002Y\n1A];o)\u00059\u0005CA\u0019I\u0013\tIuE\u0001\u0003V]&$\u0018!H4f]\u0016\u0014\u0018\r^3e\t\u0016twN]7bY&TX\rZ'B\u000fR\u000b'\r\\3\u0015\u0007\u001dcu\u000bC\u0003N\r\u0001\u0007a*A\u0003ta\u0006\u00148\u000e\u0005\u0002P+6\t\u0001K\u0003\u0002R%\u0006\u00191/\u001d7\u000b\u00055\u001b&B\u0001+;\u0003\u0019\t\u0007/Y2iK&\u0011a\u000b\u0015\u0002\r'B\f'o[*fgNLwN\u001c\u0005\u00061\u001a\u0001\r!I\u0001\f[\u0006<')Y:f!\u0006$\b.A\u0010Ta\u0006\u00148n\u0011:fCR,W*Y4EK:|'/\\1mSj,G\rV1cY\u0016\u0004\"!\u0011\u0005\u0014\u0005!a\u0006CA\u0019^\u0013\tqvE\u0001\u0004B]f\u0014VM\u001a\u000b\u00025V\ta'\u0001\u0003m_\u001e\u0004\u0013\u0001B7bS:$\"a\u00123\t\u000b=b\u0001\u0019\u0001\u0019")
/* loaded from: input_file:eu/dnetlib/dhp/collection/mag/SparkCreateMagDenormalizedTable.class */
public class SparkCreateMagDenormalizedTable extends AbstractScalaApplication {
    private final Logger log;

    public static void main(String[] strArr) {
        SparkCreateMagDenormalizedTable$.MODULE$.main(strArr);
    }

    @Override // eu.dnetlib.dhp.application.SparkScalaApplication
    public void run() {
        String str = parser().get("magBasePath");
        this.log.info("found parameters magBasePath: {}", new Object[]{str});
        generatedDenormalizedMAGTable(spark(), str);
    }

    private void generatedDenormalizedMAGTable(SparkSession sparkSession, String str) {
        StructType$.MODULE$.apply(Nil$.MODULE$.$colon$colon(new StructField("DOI", StringType$.MODULE$, StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4())));
        Dataset withColumn = MagUtility$.MODULE$.loadMagEntity(sparkSession, "Papers", str).withColumn("Doi", functions$.MODULE$.lower(functions$.MODULE$.col("Doi")));
        withColumn.cache();
        withColumn.count();
        final SparkCreateMagDenormalizedTable sparkCreateMagDenormalizedTable = null;
        Dataset withColumnRenamed = MagUtility$.MODULE$.loadMagEntity(sparkSession, "PaperAbstractsInvertedIndex", str).map(row -> {
            return new Tuple2(BoxesRunTime.boxToLong(row.getLong(0)), MagUtility$.MODULE$.convertInvertedIndexString(row.getString(1)));
        }, sparkSession.implicits().newProductEncoder(package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(SparkCreateMagDenormalizedTable.class.getClassLoader()), new TypeCreator(sparkCreateMagDenormalizedTable) { // from class: eu.dnetlib.dhp.collection.mag.SparkCreateMagDenormalizedTable$$typecreator5$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticClass("scala.Tuple2"), new C$colon$colon(mirror.staticClass("scala.Long").asType().toTypeConstructor(), new C$colon$colon(universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(mirror.staticPackage("scala").asModule().moduleClass().asType().toTypeConstructor(), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), SDOConstants.STRING), Nil$.MODULE$), Nil$.MODULE$)));
            }
        }))).withColumnRenamed("_1", "PaperId").withColumnRenamed("_2", "Abstract");
        Dataset cache = withColumn.join(withColumnRenamed, withColumn.apply("PaperId").$eq$eq$eq(withColumnRenamed.apply("PaperId")), EscapedFunctions.LEFT).select(Predef$.MODULE$.wrapRefArray(new Column[]{withColumn.apply("*"), withColumnRenamed.apply("Abstract")})).cache();
        cache.count();
        withColumn.unpersist();
        Dataset<Row> loadMagEntity = MagUtility$.MODULE$.loadMagEntity(sparkSession, "Authors", str);
        Dataset<Row> loadMagEntity2 = MagUtility$.MODULE$.loadMagEntity(sparkSession, "Affiliations", str);
        Dataset<Row> loadMagEntity3 = MagUtility$.MODULE$.loadMagEntity(sparkSession, "PaperAuthorAffiliations", str);
        Dataset select = loadMagEntity3.join(loadMagEntity, loadMagEntity3.apply("AuthorId").$eq$eq$eq(loadMagEntity.apply("AuthorId")), "inner").select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("PaperId"), functions$.MODULE$.col("AffiliationId"), functions$.MODULE$.col("AuthorSequenceNumber"), loadMagEntity.apply("DisplayName").alias("AuthorName"), loadMagEntity.apply("AuthorId")}));
        Dataset agg = select.join(loadMagEntity2, select.apply("AffiliationId").$eq$eq$eq(loadMagEntity2.apply("AffiliationId")), EscapedFunctions.LEFT).select(Predef$.MODULE$.wrapRefArray(new Column[]{select.apply("*"), loadMagEntity2.apply("DisplayName").alias("AffiliationName"), loadMagEntity2.apply("GridId")})).groupBy("PaperId", Predef$.MODULE$.wrapRefArray(new String[0])).agg(functions$.MODULE$.collect_list(functions$.MODULE$.struct("AffiliationId", Predef$.MODULE$.wrapRefArray(new String[]{"AuthorSequenceNumber", "AffiliationName", "AuthorName", "AuthorId", "GridId"}))).alias("authors"), Predef$.MODULE$.wrapRefArray(new Column[0]));
        Dataset cache2 = cache.join(agg, cache.apply("PaperId").$eq$eq$eq(agg.apply("PaperId")), EscapedFunctions.LEFT).select(Predef$.MODULE$.wrapRefArray(new Column[]{cache.apply("*"), agg.apply("authors")})).cache();
        cache2.count();
        cache.unpersist();
        Dataset select2 = MagUtility$.MODULE$.loadMagEntity(sparkSession, "ConferenceInstances", str).select(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"ConferenceInstanceId"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"DisplayName"}))).$(Nil$.MODULE$).as("conferenceName"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Location"}))).$(Nil$.MODULE$).as("conferenceLocation"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"StartDate"}))).$(Nil$.MODULE$).as("conferenceStartDate"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"EndDate"}))).$(Nil$.MODULE$).as("conferenceEndDate")}));
        Dataset cache3 = cache2.join(select2, cache2.apply("ConferenceInstanceId").$eq$eq$eq(select2.apply("ConferenceInstanceId")), EscapedFunctions.LEFT).select(Predef$.MODULE$.wrapRefArray(new Column[]{cache2.apply("*"), select2.apply("conferenceName"), select2.apply("conferenceLocation"), select2.apply("conferenceStartDate"), select2.apply("conferenceEndDate")})).cache();
        cache3.count();
        cache2.unpersist();
        Dataset select3 = MagUtility$.MODULE$.loadMagEntity(sparkSession, "Journals", str).select(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"JournalId"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"DisplayName"}))).$(Nil$.MODULE$).as("journalName"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Issn"}))).$(Nil$.MODULE$).as("journalIssn"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Publisher"}))).$(Nil$.MODULE$).as("journalPublisher")}));
        Dataset cache4 = cache3.join(select3, cache3.apply("JournalId").$eq$eq$eq(select3.apply("JournalId")), EscapedFunctions.LEFT).select(Predef$.MODULE$.wrapRefArray(new Column[]{cache3.apply("*"), select3.apply("journalName"), select3.apply("journalIssn"), select3.apply("journalPublisher")})).cache();
        cache4.count();
        Dataset cache5 = MagUtility$.MODULE$.loadMagEntity(sparkSession, "PaperUrls", str).groupBy("PaperId", Predef$.MODULE$.wrapRefArray(new String[0])).agg(functions$.MODULE$.slice(functions$.MODULE$.collect_set("SourceUrl"), 1, 6).alias("urls"), Predef$.MODULE$.wrapRefArray(new Column[0])).cache();
        cache5.count();
        cache4.join(cache5, cache4.apply("PaperId").$eq$eq$eq(cache5.apply("PaperId"))).select(Predef$.MODULE$.wrapRefArray(new Column[]{cache4.apply("*"), cache5.apply("urls")})).select(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"PaperId"}))).$(Nil$.MODULE$).as("paperId"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Doi"}))).$(Nil$.MODULE$).as("doi"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"DocType"}))).$(Nil$.MODULE$).as("docType"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"PaperTitle"}))).$(Nil$.MODULE$).as("paperTitle"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"OriginalTitle"}))).$(Nil$.MODULE$).as("originalTitle"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"BookTitle"}))).$(Nil$.MODULE$).as("bookTitle"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{SDOConstants.YEAR}))).$(Nil$.MODULE$).as(EscapedFunctions.YEAR), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Date"}))).$(Nil$.MODULE$).as("date"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"OnlineDate"}))).$(Nil$.MODULE$).as("onlineDate"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Publisher"}))).$(Nil$.MODULE$).as("publisher"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"JournalId"}))).$(Nil$.MODULE$).as("journalId"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"ConferenceSeriesId"}))).$(Nil$.MODULE$).as("conferenceSeriesId"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"ConferenceInstanceId"}))).$(Nil$.MODULE$).as("conferenceInstanceId"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Volume"}))).$(Nil$.MODULE$).as("volume"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Issue"}))).$(Nil$.MODULE$).as("issue"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"FirstPage"}))).$(Nil$.MODULE$).as("firstPage"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"LastPage"}))).$(Nil$.MODULE$).as("lastPage"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"ReferenceCount"}))).$(Nil$.MODULE$).as("referenceCount"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"CitationCount"}))).$(Nil$.MODULE$).as("citationCount"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"EstimatedCitation"}))).$(Nil$.MODULE$).as("estimatedCitation"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"OriginalVenue"}))).$(Nil$.MODULE$).as("originalVenue"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"FamilyId"}))).$(Nil$.MODULE$).as("familyId"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"FamilyRank"}))).$(Nil$.MODULE$).as("familyRank"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"DocSubTypes"}))).$(Nil$.MODULE$).as("docSubTypes"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"CreatedDate"}))).$(Nil$.MODULE$).as("createdDate"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Abstract"}))).$(Nil$.MODULE$).as("abstractText"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"authors"}))).$(Nil$.MODULE$).as("authors"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"conferenceName"}))).$(Nil$.MODULE$).as("conferenceName"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"conferenceLocation"}))).$(Nil$.MODULE$).as("conferenceLocation"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"conferenceStartDate"}))).$(Nil$.MODULE$).as("conferenceStartDate"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"conferenceEndDate"}))).$(Nil$.MODULE$).as("conferenceEndDate"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"journalName"}))).$(Nil$.MODULE$).as("journalName"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"journalIssn"}))).$(Nil$.MODULE$).as("journalIssn"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"journalPublisher"}))).$(Nil$.MODULE$).as("journalPublisher"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"urls"}))).$(Nil$.MODULE$)})).write().mode("OverWrite").save(new StringBuilder(17).append(str).append("/mag_denormalized").toString());
        cache4.unpersist();
    }

    /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
    public SparkCreateMagDenormalizedTable(String str, String[] strArr, Logger logger) {
        super(str, strArr, logger);
        this.log = logger;
    }
}
