package eu.dnetlib.dhp.provision;

import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import scala.Predef$;
import scala.runtime.BoxesRunTime;

/* compiled from: DatasetJoiner.scala */
/* loaded from: input_file:eu/dnetlib/dhp/provision/DatasetJoiner$.class */
public final class DatasetJoiner$ {
    public static final DatasetJoiner$ MODULE$ = null;

    static {
        new DatasetJoiner$();
    }

    public void startJoin(SparkSession sparkSession, String str, String str2) {
        Dataset load = sparkSession.read().load(str);
        load.where("target like '50%'").groupBy("source", Predef$.MODULE$.wrapRefArray(new String[0])).agg(functions$.MODULE$.count("target").as("publication"), Predef$.MODULE$.wrapRefArray(new Column[0])).select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("source").alias("p_source"), functions$.MODULE$.col("publication")})).join(load.where("target like '60%'").groupBy("source", Predef$.MODULE$.wrapRefArray(new String[0])).agg(functions$.MODULE$.count("target").as("dataset"), Predef$.MODULE$.wrapRefArray(new Column[0])).select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("source").alias("d_source"), functions$.MODULE$.col("dataset")})), functions$.MODULE$.col("p_source").equalTo(functions$.MODULE$.col("d_source")), "full").select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.coalesce(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("p_source"), functions$.MODULE$.col("d_source")})).alias("id"), functions$.MODULE$.col("publication"), functions$.MODULE$.col("dataset")})).join(load.where("target like '70%'").groupBy("source", Predef$.MODULE$.wrapRefArray(new String[0])).agg(functions$.MODULE$.count("target").as("unknown"), Predef$.MODULE$.wrapRefArray(new Column[0])).select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("source").alias("u_source"), functions$.MODULE$.col("unknown")})), functions$.MODULE$.col("u_source").equalTo(functions$.MODULE$.col("id")), "full").select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.coalesce(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("u_source"), functions$.MODULE$.col("id")})).alias("source"), functions$.MODULE$.coalesce(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("publication"), functions$.MODULE$.lit(BoxesRunTime.boxToInteger(0))})).alias("relatedPublication"), functions$.MODULE$.coalesce(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("dataset"), functions$.MODULE$.lit(BoxesRunTime.boxToInteger(0))})).alias("relatedDataset"), functions$.MODULE$.coalesce(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("unknown"), functions$.MODULE$.lit(BoxesRunTime.boxToInteger(0))})).alias("relatedUnknown")})).write().mode("overwrite").save(str2);
    }

    private DatasetJoiner$() {
        MODULE$ = this;
    }
}
