package eu.dnetlib.dhp.oa.provision;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.common.SparkSessionSupport;
import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport;
import eu.dnetlib.dhp.oa.provision.model.SortableRelationKey;
import eu.dnetlib.dhp.oa.provision.utils.RelationPartitioner;
import eu.dnetlib.dhp.schema.oaf.Relation;
import java.lang.invoke.SerializedLambda;
import java.util.HashSet;
import java.util.Optional;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.expressions.Aggregator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

/* loaded from: input_file:eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.class */
public class PrepareRelationsJob {
    private static final Logger log = LoggerFactory.getLogger(PrepareRelationsJob.class);
    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
    public static final int MAX_RELS = 100;
    public static final int DEFAULT_NUM_PARTITIONS = 3000;

    /* loaded from: input_file:eu/dnetlib/dhp/oa/provision/PrepareRelationsJob$RelationAggregator.class */
    public static class RelationAggregator extends Aggregator<Relation, RelationList, RelationList> {
        private final int maxRelations;

        public RelationAggregator(int i) {
            this.maxRelations = i;
        }

        /* renamed from: zero, reason: merged with bridge method [inline-methods] */
        public RelationList m5zero() {
            return new RelationList();
        }

        public RelationList reduce(RelationList relationList, Relation relation) {
            relationList.getRelations().add(relation);
            return getSortableRelationList(relationList);
        }

        public RelationList merge(RelationList relationList, RelationList relationList2) {
            relationList.getRelations().addAll(relationList2.getRelations());
            return getSortableRelationList(relationList);
        }

        public RelationList finish(RelationList relationList) {
            return getSortableRelationList(relationList);
        }

        private RelationList getSortableRelationList(RelationList relationList) {
            RelationList relationList2 = new RelationList();
            relationList2.setRelations((Queue) relationList.getRelations().stream().limit(this.maxRelations).collect(Collectors.toCollection(() -> {
                return new PriorityQueue(new RelationComparator());
            })));
            return relationList2;
        }

        public Encoder<RelationList> bufferEncoder() {
            return Encoders.kryo(RelationList.class);
        }

        public Encoder<RelationList> outputEncoder() {
            return Encoders.kryo(RelationList.class);
        }
    }

    public static void main(String[] strArr) throws Exception {
        ArgumentApplicationParser argumentApplicationParser = new ArgumentApplicationParser(IOUtils.toString(PrepareRelationsJob.class.getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_prepare_relations.json")));
        argumentApplicationParser.parseArgument(strArr);
        Boolean bool = (Boolean) Optional.ofNullable(argumentApplicationParser.get("isSparkSessionManaged")).map(Boolean::valueOf).orElse(Boolean.TRUE);
        log.info("isSparkSessionManaged: {}", bool);
        String str = argumentApplicationParser.get("inputRelationsPath");
        log.info("inputRelationsPath: {}", str);
        String str2 = argumentApplicationParser.get("outputPath");
        log.info("outputPath: {}", str2);
        int intValue = ((Integer) Optional.ofNullable(argumentApplicationParser.get("relPartitions")).map(Integer::valueOf).orElse(Integer.valueOf(DEFAULT_NUM_PARTITIONS))).intValue();
        log.info("relPartitions: {}", Integer.valueOf(intValue));
        Set set = (Set) Optional.ofNullable(argumentApplicationParser.get("relationFilter")).map((v0) -> {
            return v0.toLowerCase();
        }).map(str3 -> {
            return Sets.newHashSet(Splitter.on(",").split(str3));
        }).orElse(new HashSet());
        log.info("relationFilter: {}", set);
        int intValue2 = ((Integer) Optional.ofNullable(argumentApplicationParser.get("sourceMaxRelations")).map(Integer::valueOf).orElse(100)).intValue();
        log.info("sourceMaxRelations: {}", Integer.valueOf(intValue2));
        int intValue3 = ((Integer) Optional.ofNullable(argumentApplicationParser.get("targetMaxRelations")).map(Integer::valueOf).orElse(100)).intValue();
        log.info("targetMaxRelations: {}", Integer.valueOf(intValue3));
        SparkConf sparkConf = new SparkConf();
        sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
        sparkConf.registerKryoClasses(ProvisionModelSupport.getModelClasses());
        SparkSessionSupport.runWithSparkSession(sparkConf, bool, sparkSession -> {
            removeOutputDir(sparkSession, str2);
            prepareRelationsRDD(sparkSession, str, str2, set, intValue2, intValue3, intValue);
        });
    }

    private static void prepareRelationsRDD(SparkSession sparkSession, String str, String str2, Set<String> set, int i, int i2, int i3) {
        sparkSession.createDataset(pruneRels(pruneRels(readPathRelationRDD(sparkSession, str).filter(relation -> {
            return Boolean.valueOf(!relation.getDataInfo().getDeletedbyinference().booleanValue());
        }).filter(relation2 -> {
            return Boolean.valueOf(!set.contains(StringUtils.lowerCase(relation2.getRelClass())));
        }), i, i3, relation3 -> {
            return relation3.getSource();
        }), i2, i3, relation4 -> {
            return relation4.getTarget();
        }).rdd(), Encoders.bean(Relation.class)).repartition(i3).write().mode(SaveMode.Overwrite).parquet(str2);
    }

    private static JavaRDD<Relation> pruneRels(JavaRDD<Relation> javaRDD, int i, int i2, Function<Relation, String> function) {
        return javaRDD.mapToPair(relation -> {
            return new Tuple2(SortableRelationKey.create(relation, (String) function.call(relation)), relation);
        }).repartitionAndSortWithinPartitions(new RelationPartitioner(i2)).groupBy((v0) -> {
            return v0._1();
        }).map((v0) -> {
            return v0._2();
        }).map(iterable -> {
            return Iterables.limit(iterable, i);
        }).flatMap((v0) -> {
            return v0.iterator();
        }).map((v0) -> {
            return v0._2();
        });
    }

    private static void prepareRelationsDataset(SparkSession sparkSession, String str, String str2, Set<String> set, int i, int i2) {
        sparkSession.read().textFile(str).repartition(i2).map(str3 -> {
            return (Relation) OBJECT_MAPPER.readValue(str3, Relation.class);
        }, Encoders.kryo(Relation.class)).filter(relation -> {
            return !relation.getDataInfo().getDeletedbyinference().booleanValue();
        }).filter(relation2 -> {
            return !set.contains(relation2.getRelClass());
        }).groupByKey((v0) -> {
            return v0.getSource();
        }, Encoders.STRING()).agg(new RelationAggregator(i).toColumn()).flatMap(tuple2 -> {
            return Iterables.limit(((RelationList) tuple2._2()).getRelations(), i).iterator();
        }, Encoders.bean(Relation.class)).repartition(i2).write().mode(SaveMode.Overwrite).parquet(str2);
    }

    private static JavaRDD<Relation> readPathRelationRDD(SparkSession sparkSession, String str) {
        return JavaSparkContext.fromSparkContext(sparkSession.sparkContext()).textFile(str).map(str2 -> {
            return (Relation) OBJECT_MAPPER.readValue(str2, Relation.class);
        });
    }

    private static void removeOutputDir(SparkSession sparkSession, String str) {
        HdfsSupport.remove(str, sparkSession.sparkContext().hadoopConfiguration());
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -1909957208:
                if (implMethodName.equals("lambda$prepareRelationsRDD$9accc865$1")) {
                    z = false;
                    break;
                }
                break;
            case -1801021893:
                if (implMethodName.equals("lambda$prepareRelationsRDD$b3719c60$1")) {
                    z = 13;
                    break;
                }
                break;
            case -1801021892:
                if (implMethodName.equals("lambda$prepareRelationsRDD$b3719c60$2")) {
                    z = 11;
                    break;
                }
                break;
            case -846813123:
                if (implMethodName.equals("lambda$prepareRelationsDataset$bd84ba60$1")) {
                    z = 12;
                    break;
                }
                break;
            case -834827218:
                if (implMethodName.equals("lambda$prepareRelationsDataset$960a9069$1")) {
                    z = 9;
                    break;
                }
                break;
            case -757493158:
                if (implMethodName.equals("lambda$pruneRels$19031da3$1")) {
                    z = true;
                    break;
                }
                break;
            case -529101081:
                if (implMethodName.equals("lambda$prepareRelationsRDD$98ea0aa0$1")) {
                    z = 3;
                    break;
                }
                break;
            case 2994:
                if (implMethodName.equals("_1")) {
                    z = 4;
                    break;
                }
                break;
            case 2995:
                if (implMethodName.equals("_2")) {
                    z = 7;
                    break;
                }
                break;
            case 95697702:
                if (implMethodName.equals("lambda$readPathRelationRDD$af2dd0b$1")) {
                    z = 6;
                    break;
                }
                break;
            case 799509265:
                if (implMethodName.equals("getSource")) {
                    z = 10;
                    break;
                }
                break;
            case 1182533742:
                if (implMethodName.equals("iterator")) {
                    z = 5;
                    break;
                }
                break;
            case 1745515705:
                if (implMethodName.equals("lambda$prepareRelationsDataset$cdfa491c$1")) {
                    z = 8;
                    break;
                }
                break;
            case 1969860202:
                if (implMethodName.equals("lambda$pruneRels$bd06fc83$1")) {
                    z = 2;
                    break;
                }
                break;
            case 1976882667:
                if (implMethodName.equals("lambda$prepareRelationsDataset$31cd3669$1")) {
                    z = 14;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/PrepareRelationsJob") && serializedLambda.getImplMethodSignature().equals("(Ljava/util/Set;Leu/dnetlib/dhp/schema/oaf/Relation;)Ljava/lang/Boolean;")) {
                    Set set = (Set) serializedLambda.getCapturedArg(0);
                    return relation2 -> {
                        return Boolean.valueOf(!set.contains(StringUtils.lowerCase(relation2.getRelClass())));
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Lscala/Tuple2;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/PrepareRelationsJob") && serializedLambda.getImplMethodSignature().equals("(Lorg/apache/spark/api/java/function/Function;Leu/dnetlib/dhp/schema/oaf/Relation;)Lscala/Tuple2;")) {
                    Function function = (Function) serializedLambda.getCapturedArg(0);
                    return relation -> {
                        return new Tuple2(SortableRelationKey.create(relation, (String) function.call(relation)), relation);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/PrepareRelationsJob") && serializedLambda.getImplMethodSignature().equals("(ILjava/lang/Iterable;)Ljava/lang/Iterable;")) {
                    int intValue = ((Integer) serializedLambda.getCapturedArg(0)).intValue();
                    return iterable -> {
                        return Iterables.limit(iterable, intValue);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/PrepareRelationsJob") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/dhp/schema/oaf/Relation;)Ljava/lang/Boolean;")) {
                    return relation3 -> {
                        return Boolean.valueOf(!relation3.getDataInfo().getDeletedbyinference().booleanValue());
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 5 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("scala/Tuple2") && serializedLambda.getImplMethodSignature().equals("()Ljava/lang/Object;")) {
                    return (v0) -> {
                        return v0._1();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 9 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/FlatMapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/util/Iterator;") && serializedLambda.getImplClass().equals("java/lang/Iterable") && serializedLambda.getImplMethodSignature().equals("()Ljava/util/Iterator;")) {
                    return (v0) -> {
                        return v0.iterator();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/PrepareRelationsJob") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;)Leu/dnetlib/dhp/schema/oaf/Relation;")) {
                    return str2 -> {
                        return (Relation) OBJECT_MAPPER.readValue(str2, Relation.class);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 5 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("scala/Tuple2") && serializedLambda.getImplMethodSignature().equals("()Ljava/lang/Object;")) {
                    return (v0) -> {
                        return v0._2();
                    };
                }
                if (serializedLambda.getImplMethodKind() == 5 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("scala/Tuple2") && serializedLambda.getImplMethodSignature().equals("()Ljava/lang/Object;")) {
                    return (v0) -> {
                        return v0._2();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/FilterFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Z") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/PrepareRelationsJob") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/dhp/schema/oaf/Relation;)Z")) {
                    return relation4 -> {
                        return !relation4.getDataInfo().getDeletedbyinference().booleanValue();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/FilterFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Z") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/PrepareRelationsJob") && serializedLambda.getImplMethodSignature().equals("(Ljava/util/Set;Leu/dnetlib/dhp/schema/oaf/Relation;)Z")) {
                    Set set2 = (Set) serializedLambda.getCapturedArg(0);
                    return relation22 -> {
                        return !set2.contains(relation22.getRelClass());
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 5 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/MapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/schema/oaf/Relation") && serializedLambda.getImplMethodSignature().equals("()Ljava/lang/String;")) {
                    return (v0) -> {
                        return v0.getSource();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/PrepareRelationsJob") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/dhp/schema/oaf/Relation;)Ljava/lang/String;")) {
                    return relation42 -> {
                        return relation42.getTarget();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/MapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/PrepareRelationsJob") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;)Leu/dnetlib/dhp/schema/oaf/Relation;")) {
                    return str3 -> {
                        return (Relation) OBJECT_MAPPER.readValue(str3, Relation.class);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/PrepareRelationsJob") && serializedLambda.getImplMethodSignature().equals("(Leu/dnetlib/dhp/schema/oaf/Relation;)Ljava/lang/String;")) {
                    return relation32 -> {
                        return relation32.getSource();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/FlatMapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/util/Iterator;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/provision/PrepareRelationsJob") && serializedLambda.getImplMethodSignature().equals("(ILscala/Tuple2;)Ljava/util/Iterator;")) {
                    int intValue2 = ((Integer) serializedLambda.getCapturedArg(0)).intValue();
                    return tuple2 -> {
                        return Iterables.limit(((RelationList) tuple2._2()).getRelations(), intValue2).iterator();
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
