package eu.dnetlib.dhp.oa.dedup;

import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import eu.dnetlib.pace.util.MapDocumentUtil;
import java.io.IOException;
import java.lang.invoke.SerializedLambda;
import java.net.URI;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.Optional;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.dom4j.DocumentException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import scala.Tuple2;

/* loaded from: input_file:eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.class */
public class SparkUpdateEntity extends AbstractSparkAction {
    private static final Logger log = LoggerFactory.getLogger(SparkUpdateEntity.class);
    private static final String IDJSONPATH = "$.id";

    public SparkUpdateEntity(ArgumentApplicationParser argumentApplicationParser, SparkSession sparkSession) {
        super(argumentApplicationParser, sparkSession);
    }

    public static void main(String[] strArr) throws Exception {
        ArgumentApplicationParser argumentApplicationParser = new ArgumentApplicationParser(IOUtils.toString(SparkUpdateEntity.class.getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json")));
        argumentApplicationParser.parseArgument(strArr);
        SparkConf sparkConf = new SparkConf();
        sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
        sparkConf.registerKryoClasses(ModelSupport.getOafModelClasses());
        new SparkUpdateEntity(argumentApplicationParser, getSparkSession(sparkConf)).run(ISLookupClientFactory.getLookUpService(argumentApplicationParser.get("isLookUpUrl")));
    }

    @Override // eu.dnetlib.dhp.oa.dedup.AbstractSparkAction
    public void run(ISLookUpService iSLookUpService) throws IOException {
        String str = this.parser.get("graphBasePath");
        String str2 = this.parser.get("workingPath");
        String str3 = this.parser.get("dedupGraphPath");
        log.info("graphBasePath:  '{}'", str);
        log.info("workingPath:    '{}'", str2);
        log.info("dedupGraphPath: '{}'", str3);
        JavaSparkContext fromSparkContext = JavaSparkContext.fromSparkContext(this.spark.sparkContext());
        for (Map.Entry entry : ModelSupport.entityTypes.entrySet()) {
            EntityType entityType = (EntityType) entry.getKey();
            Class cls = (Class) entry.getValue();
            String str4 = str3 + "/" + entityType;
            removeOutputDir(this.spark, str4);
            if (HdfsSupport.exists(DedupUtility.createEntityPath(str, entityType.toString()), fromSparkContext.hadoopConfiguration())) {
                JavaRDD textFile = fromSparkContext.textFile(DedupUtility.createEntityPath(str, entityType.toString()));
                if (mergeRelExists(str2, entityType.toString())) {
                    String createMergeRelPath = DedupUtility.createMergeRelPath(str2, "*", entityType.toString());
                    String createDedupRecordPath = DedupUtility.createDedupRecordPath(str2, "*", entityType.toString());
                    Dataset as = this.spark.read().load(createMergeRelPath).as(Encoders.bean(Relation.class));
                    JavaPairRDD mapToPair = as.where("relClass == 'merges'").where("source != target").select(new Column[]{as.col("target")}).distinct().toJavaRDD().mapToPair(row -> {
                        return new Tuple2(row.getString(0), "d");
                    });
                    JavaPairRDD<String, String> mapToPair2 = textFile.mapToPair(str5 -> {
                        return new Tuple2(MapDocumentUtil.getJPathString(IDJSONPATH, str5), str5);
                    });
                    if (entityType == EntityType.organization) {
                        mapToPair2 = excludeRootOrgs(mapToPair2, as);
                    }
                    textFile = mapToPair2.leftOuterJoin(mapToPair).map(tuple2 -> {
                        return ((Optional) ((Tuple2) tuple2._2())._2()).isPresent() ? updateDeletedByInference((String) ((Tuple2) tuple2._2())._1(), cls) : (String) ((Tuple2) tuple2._2())._1();
                    }).union(fromSparkContext.textFile(createDedupRecordPath));
                }
                textFile.saveAsTextFile(str4, GzipCodec.class);
            }
        }
    }

    public boolean mergeRelExists(String str, String str2) throws IOException {
        boolean z = false;
        FileSystem fileSystem = FileSystem.get(URI.create(str), new Configuration());
        for (FileStatus fileStatus : fileSystem.listStatus(new Path(str))) {
            Path path = new Path(DedupUtility.createMergeRelPath(str, fileStatus.getPath().getName(), str2));
            if (fileStatus.isDirectory() && fileSystem.exists(path)) {
                z = true;
            }
        }
        return z;
    }

    private static <T extends OafEntity> String updateDeletedByInference(String str, Class<T> cls) {
        try {
            Oaf oaf = (Oaf) OBJECT_MAPPER.readValue(str, cls);
            if (oaf.getDataInfo() == null) {
                oaf.setDataInfo(new DataInfo());
            }
            oaf.getDataInfo().setDeletedbyinference(true);
            return OBJECT_MAPPER.writeValueAsString(oaf);
        } catch (IOException e) {
            throw new RuntimeException("Unable to convert json", e);
        }
    }

    private static JavaPairRDD<String, String> excludeRootOrgs(JavaPairRDD<String, String> javaPairRDD, Dataset<Relation> dataset) {
        return javaPairRDD.leftOuterJoin(dataset.where("relClass == 'merges'").select(new Column[]{dataset.col("source")}).distinct().toJavaRDD().mapToPair(row -> {
            return new Tuple2(row.getString(0), "root");
        })).filter(tuple2 -> {
            return Boolean.valueOf(!((Optional) ((Tuple2) tuple2._2())._2()).isPresent());
        }).mapToPair(tuple22 -> {
            return new Tuple2((String) tuple22._1(), (String) ((Tuple2) tuple22._2())._1());
        });
    }

    @Override // eu.dnetlib.dhp.oa.dedup.AbstractSparkAction
    public /* bridge */ /* synthetic */ List getConfigurations(ISLookUpService iSLookUpService, String str) throws ISLookUpException, DocumentException, IOException, SAXException {
        return super.getConfigurations(iSLookUpService, str);
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -1906786653:
                if (implMethodName.equals("lambda$excludeRootOrgs$e95cfeed$1")) {
                    z = 4;
                    break;
                }
                break;
            case -1847979920:
                if (implMethodName.equals("lambda$run$acbee9b6$1")) {
                    z = 3;
                    break;
                }
                break;
            case -564736546:
                if (implMethodName.equals("lambda$run$e4cb5b$1")) {
                    z = true;
                    break;
                }
                break;
            case -167426712:
                if (implMethodName.equals("lambda$excludeRootOrgs$dcb83d21$1")) {
                    z = 5;
                    break;
                }
                break;
            case 947301508:
                if (implMethodName.equals("lambda$excludeRootOrgs$8170ac5b$1")) {
                    z = false;
                    break;
                }
                break;
            case 969887190:
                if (implMethodName.equals("lambda$run$54027191$1")) {
                    z = 2;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity") && serializedLambda.getImplMethodSignature().equals("(Lscala/Tuple2;)Ljava/lang/Boolean;")) {
                    return tuple2 -> {
                        return Boolean.valueOf(!((Optional) ((Tuple2) tuple2._2())._2()).isPresent());
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Lscala/Tuple2;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity") && serializedLambda.getImplMethodSignature().equals("(Lorg/apache/spark/sql/Row;)Lscala/Tuple2;")) {
                    return row -> {
                        return new Tuple2(row.getString(0), "d");
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Lscala/Tuple2;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;)Lscala/Tuple2;")) {
                    return str5 -> {
                        return new Tuple2(MapDocumentUtil.getJPathString(IDJSONPATH, str5), str5);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/Class;Lscala/Tuple2;)Ljava/lang/String;")) {
                    Class cls = (Class) serializedLambda.getCapturedArg(0);
                    return tuple22 -> {
                        return ((Optional) ((Tuple2) tuple22._2())._2()).isPresent() ? updateDeletedByInference((String) ((Tuple2) tuple22._2())._1(), cls) : (String) ((Tuple2) tuple22._2())._1();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Lscala/Tuple2;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity") && serializedLambda.getImplMethodSignature().equals("(Lorg/apache/spark/sql/Row;)Lscala/Tuple2;")) {
                    return row2 -> {
                        return new Tuple2(row2.getString(0), "root");
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Lscala/Tuple2;") && serializedLambda.getImplClass().equals("eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity") && serializedLambda.getImplMethodSignature().equals("(Lscala/Tuple2;)Lscala/Tuple2;")) {
                    return tuple222 -> {
                        return new Tuple2((String) tuple222._1(), (String) ((Tuple2) tuple222._2())._1());
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
