/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.dhp.oa.dedup;

import com.google.common.collect.Lists;
import com.google.common.hash.Hashing;
import com.kwartile.lib.cc.ConnectedComponent;
import eu.dnetlib.dhp.oa.dedup.AbstractSparkAction;
import eu.dnetlib.dhp.oa.dedup.DedupUtility;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.utils.DHPUtils;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.UUID;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.expressions.UserDefinedFunction;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import scala.Tuple2;
import scala.Tuple3;

public class OpenorgsUtility {
    public static final String GROUP_PREFIX = "group::";

    public static long hash(String id) {
        return Hashing.murmur3_128().hashString((CharSequence)id, StandardCharsets.UTF_8).asLong();
    }

    public static Dataset<Row> createFamilies(SparkSession spark, String relationPath, String relClass) {
        Dataset parentChildRels = spark.read().schema(Encoders.bean(Relation.class).schema()).json(relationPath).where(functions.col((String)"relClass").equalTo((Object)relClass)).select("source", new String[]{"target"});
        UserDefinedFunction hashUDF = functions.udf((UDF1 & Serializable)s -> OpenorgsUtility.hash(s), (DataType)DataTypes.LongType);
        Dataset vertexIdMap = parentChildRels.selectExpr(new String[]{"source as id"}).union(parentChildRels.selectExpr(new String[]{"target as id"})).distinct().withColumn("vertexId", hashUDF.apply(new Column[]{functions.col((String)"id")}));
        Dataset edges = parentChildRels.withColumn("source", hashUDF.apply(new Column[]{functions.col((String)"source")})).withColumn("target", hashUDF.apply(new Column[]{functions.col((String)"target")}));
        Dataset<Row> cliques = ConnectedComponent.runOnPairs((Dataset<Row>)edges, 50, spark);
        return cliques.join(vertexIdMap, DHPUtils.toSeq(Collections.singletonList("vertexId")).toSeq(), "inner").drop("vertexId").distinct();
    }

    public static boolean filterRels(Relation rel, String relClass, String relType, String subRelType) {
        return rel.getRelClass().equals(relClass) && rel.getRelType().equals(relType) && rel.getSubRelType().equals(subRelType);
    }

    public static JavaRDD<Tuple2<Tuple2<String, String>, String>> collectRels(SparkSession spark, String relationPath, String relClass, String relType, String subRelType, boolean bestAtSource) {
        JavaRDD filteredRels = spark.read().schema(Encoders.bean(Relation.class).schema()).json(relationPath).as(Encoders.bean(Relation.class)).map(AbstractSparkAction.patchRelFn(), Encoders.bean(Relation.class)).toJavaRDD().filter((Function & Serializable)r -> OpenorgsUtility.filterRels(r, relClass, relType, subRelType));
        return filteredRels.map((Function & Serializable)rel -> {
            if (!bestAtSource || DedupUtility.compareOpenOrgIds(rel.getSource(), rel.getTarget()) < 0) {
                return new Tuple2((Object)new Tuple2((Object)rel.getSource(), (Object)rel.getTarget()), (Object)relClass);
            }
            return new Tuple2((Object)new Tuple2((Object)rel.getTarget(), (Object)rel.getSource()), (Object)relClass);
        }).distinct();
    }

    public static JavaRDD<Tuple3<String, String, String>> processMergeRels(SparkSession spark, String mergeRelsPath, JavaRDD<Tuple2<Tuple2<String, String>, String>> diffRels, JavaRDD<Tuple2<Tuple2<String, String>, String>> parentChildRels) {
        JavaRDD rawOpenorgsRels = spark.read().load(mergeRelsPath).as(Encoders.bean(Relation.class)).where("relClass == 'merges'").toJavaRDD().mapToPair((PairFunction & Serializable)r -> new Tuple2((Object)r.getSource(), (Object)r.getTarget())).filter((Function & Serializable)t -> !((String)t._2()).contains("openorgsmesh")).groupByKey().map((Function & Serializable)g -> Lists.newArrayList((Iterable)((Iterable)g._2()))).filter((Function & Serializable)l -> l.size() > 1).flatMap((FlatMapFunction & Serializable)l -> {
            String groupId = GROUP_PREFIX + String.valueOf(UUID.randomUUID());
            List<String> ids = OpenorgsUtility.sortIds(l);
            ArrayList<Tuple2> rels = new ArrayList<Tuple2>();
            String source = ids.get(0);
            for (String target : ids) {
                rels.add(new Tuple2((Object)new Tuple2((Object)source, (Object)target), (Object)groupId));
            }
            return rels.iterator();
        });
        JavaRDD openorgsRels = rawOpenorgsRels.union(diffRels).union(parentChildRels).mapToPair((PairFunction & Serializable)t -> new Tuple2((Object)((String)((Tuple2)t._1())._1() + "@@@" + (String)((Tuple2)t._1())._2()), (Object)((String)t._2()))).groupByKey().map((Function & Serializable)g -> new Tuple2((Object)((String)g._1()), StreamSupport.stream(((Iterable)g._2()).spliterator(), false).collect(Collectors.toList()))).filter((Function & Serializable)g -> ((List)g._2()).size() == 1 && ((String)((List)g._2()).get(0)).contains(GROUP_PREFIX)).map((Function & Serializable)t -> new Tuple3((Object)((String)t._1()).split("@@@")[0], (Object)((String)t._1()).split("@@@")[1], (Object)((String)((List)t._2()).get(0))));
        return openorgsRels;
    }

    public static List<String> sortIds(List<String> ids) {
        ids.sort(DedupUtility::compareOpenOrgIds);
        return ids;
    }
}

