package eu.dnetlib;

import eu.dnetlib.graph.GraphProcessor;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.utils.PaceUtils;
import eu.dnetlib.reporter.SparkBlockProcessor;
import eu.dnetlib.reporter.SparkReporter;
import java.io.IOException;
import java.lang.invoke.SerializedLambda;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.graphx.Edge;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.util.LongAccumulator;
import scala.Tuple2;

/* loaded from: input_file:eu/dnetlib/SparkTest.class */
public class SparkTest {
    public static void main(String[] strArr) throws IOException {
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2] + "_groups";
        String str4 = strArr[2] + "_output";
        JavaSparkContext javaSparkContext = new JavaSparkContext(SparkSession.builder().appName("Deduplication").master("yarn").getOrCreate().sparkContext());
        JavaRDD<String> loadDataFromHDFS = Utility.loadDataFromHDFS(str, javaSparkContext);
        DedupConfig loadConfigFromHDFS = Utility.loadConfigFromHDFS(str2);
        Map<String, LongAccumulator> constructAccumulator = Utility.constructAccumulator(loadConfigFromHDFS, javaSparkContext.sc());
        JavaPairRDD mapToPair = loadDataFromHDFS.mapToPair(str5 -> {
            MapDocument asMapDocument = PaceUtils.asMapDocument(loadConfigFromHDFS, str5);
            return new Tuple2(asMapDocument.getIdentifier(), asMapDocument);
        });
        RDD rdd = mapToPair.mapToPair(tuple2 -> {
            return new Tuple2(Long.valueOf(((String) tuple2._1()).hashCode()), tuple2._2());
        }).rdd();
        JavaPairRDD groupByKey = mapToPair.reduceByKey((mapDocument, mapDocument2) -> {
            return mapDocument;
        }).flatMapToPair(tuple22 -> {
            MapDocument mapDocument3 = (MapDocument) tuple22._2();
            return ((List) Utility.getGroupingKeys(loadConfigFromHDFS, mapDocument3).stream().map(str6 -> {
                return new Tuple2(str6, mapDocument3);
            }).collect(Collectors.toList())).iterator();
        }).groupByKey();
        Utility.deleteIfExists(str3);
        groupByKey.map(tuple23 -> {
            return new DocumentsBlock((String) tuple23._1(), (Iterable<MapDocument>) tuple23._2());
        }).saveAsTextFile(str3);
        JavaRDD javaRDD = GraphProcessor.findCCs(rdd, groupByKey.flatMapToPair(tuple24 -> {
            SparkReporter sparkReporter = new SparkReporter();
            new SparkBlockProcessor(loadConfigFromHDFS).process((String) tuple24._1(), (Iterable) tuple24._2(), sparkReporter, constructAccumulator);
            return sparkReporter.getReport().iterator();
        }).map(tuple25 -> {
            return new Edge(((String) tuple25._1()).hashCode(), ((String) tuple25._2()).hashCode(), "similarTo");
        }).rdd(), 20).toJavaRDD();
        Utility.deleteIfExists(str4);
        javaRDD.saveAsTextFile(str4);
        JavaRDD filter = javaRDD.filter(connectedComponent -> {
            return Boolean.valueOf(connectedComponent.getDocs().size() > 1);
        });
        System.out.println("Non duplicates: " + javaRDD.filter(connectedComponent2 -> {
            return Boolean.valueOf(connectedComponent2.getDocs().size() == 1);
        }).count());
        System.out.println("Duplicates: " + filter.flatMap(connectedComponent3 -> {
            return connectedComponent3.getDocs().iterator();
        }).count());
        System.out.println("Connected Components: " + filter.count());
        constructAccumulator.forEach((str6, longAccumulator) -> {
            System.out.println(str6 + " -> " + longAccumulator.value());
        });
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -798836197:
                if (implMethodName.equals("lambda$main$19e3a193$1")) {
                    z = 3;
                    break;
                }
                break;
            case -693156095:
                if (implMethodName.equals("lambda$main$e892faba$1")) {
                    z = 9;
                    break;
                }
                break;
            case -595183236:
                if (implMethodName.equals("lambda$main$810aa9cb$1")) {
                    z = 4;
                    break;
                }
                break;
            case -574188916:
                if (implMethodName.equals("lambda$main$1bacabfc$1")) {
                    z = 5;
                    break;
                }
                break;
            case -407264011:
                if (implMethodName.equals("lambda$main$789d02f2$1")) {
                    z = 8;
                    break;
                }
                break;
            case 911688256:
                if (implMethodName.equals("lambda$main$79f9e2cf$1")) {
                    z = 2;
                    break;
                }
                break;
            case 1132685795:
                if (implMethodName.equals("lambda$main$87fabb8f$1")) {
                    z = false;
                    break;
                }
                break;
            case 1548010178:
                if (implMethodName.equals("lambda$main$f3d6ffb$1")) {
                    z = true;
                    break;
                }
                break;
            case 1634031215:
                if (implMethodName.equals("lambda$main$f3629f31$1")) {
                    z = 6;
                    break;
                }
                break;
            case 1691666574:
                if (implMethodName.equals("lambda$main$4ece6149$1")) {
                    z = 7;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFlatMapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/util/Iterator;") && serializedLambda.getImplClass().equals("eu/dnetlib/SparkTest") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/pace/config/DedupConfig;Ljava/util/Map;Lscala/Tuple2;)Ljava/util/Iterator;")) {
                    DedupConfig dedupConfig = (DedupConfig) serializedLambda.getCapturedArg(0);
                    Map map = (Map) serializedLambda.getCapturedArg(1);
                    return tuple24 -> {
                        SparkReporter sparkReporter = new SparkReporter();
                        new SparkBlockProcessor(dedupConfig).process((String) tuple24._1(), (Iterable) tuple24._2(), sparkReporter, map);
                        return sparkReporter.getReport().iterator();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/SparkTest") && serializedLambda.getImplMethodSignature().equals("(Lscala/Tuple2;)Lorg/apache/spark/graphx/Edge;")) {
                    return tuple25 -> {
                        return new Edge(((String) tuple25._1()).hashCode(), ((String) tuple25._2()).hashCode(), "similarTo");
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFlatMapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/util/Iterator;") && serializedLambda.getImplClass().equals("eu/dnetlib/SparkTest") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/pace/config/DedupConfig;Lscala/Tuple2;)Ljava/util/Iterator;")) {
                    DedupConfig dedupConfig2 = (DedupConfig) serializedLambda.getCapturedArg(0);
                    return tuple22 -> {
                        MapDocument mapDocument3 = (MapDocument) tuple22._2();
                        return ((List) Utility.getGroupingKeys(dedupConfig2, mapDocument3).stream().map(str6 -> {
                            return new Tuple2(str6, mapDocument3);
                        }).collect(Collectors.toList())).iterator();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/SparkTest") && serializedLambda.getImplMethodSignature().equals("(Lscala/Tuple2;)Leu/dnetlib/DocumentsBlock;")) {
                    return tuple23 -> {
                        return new DocumentsBlock((String) tuple23._1(), (Iterable<MapDocument>) tuple23._2());
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Lscala/Tuple2;") && serializedLambda.getImplClass().equals("eu/dnetlib/SparkTest") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/pace/config/DedupConfig;Ljava/lang/String;)Lscala/Tuple2;")) {
                    DedupConfig dedupConfig3 = (DedupConfig) serializedLambda.getCapturedArg(0);
                    return str5 -> {
                        MapDocument asMapDocument = PaceUtils.asMapDocument(dedupConfig3, str5);
                        return new Tuple2(asMapDocument.getIdentifier(), asMapDocument);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function2") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/SparkTest") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/pace/model/MapDocument;Leu/dnetlib/pace/model/MapDocument;)Leu/dnetlib/pace/model/MapDocument;")) {
                    return (mapDocument, mapDocument2) -> {
                        return mapDocument;
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/SparkTest") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/ConnectedComponent;)Ljava/lang/Boolean;")) {
                    return connectedComponent -> {
                        return Boolean.valueOf(connectedComponent.getDocs().size() > 1);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/SparkTest") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/ConnectedComponent;)Ljava/lang/Boolean;")) {
                    return connectedComponent2 -> {
                        return Boolean.valueOf(connectedComponent2.getDocs().size() == 1);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Lscala/Tuple2;") && serializedLambda.getImplClass().equals("eu/dnetlib/SparkTest") && serializedLambda.getImplMethodSignature().equals("(Lscala/Tuple2;)Lscala/Tuple2;")) {
                    return tuple2 -> {
                        return new Tuple2(Long.valueOf(((String) tuple2._1()).hashCode()), tuple2._2());
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/FlatMapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/util/Iterator;") && serializedLambda.getImplClass().equals("eu/dnetlib/SparkTest") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/ConnectedComponent;)Ljava/util/Iterator;")) {
                    return connectedComponent3 -> {
                        return connectedComponent3.getDocs().iterator();
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
