package eu.dnetlib;

import com.google.common.collect.Sets;
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.MapDocument;
import java.io.IOException;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.util.LongAccumulator;

/* loaded from: input_file:eu/dnetlib/Utility.class */
public class Utility {
    public static Map<String, LongAccumulator> constructAccumulator(DedupConfig dedupConfig, SparkContext sparkContext) {
        HashMap hashMap = new HashMap();
        String format = String.format("%s::%s", dedupConfig.getWf().getEntityType(), "records per hash key = 1");
        hashMap.put(format, sparkContext.longAccumulator(format));
        String format2 = String.format("%s::%s", dedupConfig.getWf().getEntityType(), "missing " + dedupConfig.getWf().getOrderField());
        hashMap.put(format2, sparkContext.longAccumulator(format2));
        String format3 = String.format("%s::%s", dedupConfig.getWf().getEntityType(), String.format("Skipped records for count(%s) >= %s", dedupConfig.getWf().getOrderField(), Integer.valueOf(dedupConfig.getWf().getGroupMaxSize())));
        hashMap.put(format3, sparkContext.longAccumulator(format3));
        String format4 = String.format("%s::%s", dedupConfig.getWf().getEntityType(), "skip list");
        hashMap.put(format4, sparkContext.longAccumulator(format4));
        String format5 = String.format("%s::%s", dedupConfig.getWf().getEntityType(), "dedupSimilarity (x2)");
        hashMap.put(format5, sparkContext.longAccumulator(format5));
        String format6 = String.format("%s::%s", dedupConfig.getWf().getEntityType(), "d < " + dedupConfig.getWf().getThreshold());
        hashMap.put(format6, sparkContext.longAccumulator(format6));
        return hashMap;
    }

    public static JavaRDD<String> loadDataFromHDFS(String str, JavaSparkContext javaSparkContext) {
        return javaSparkContext.textFile(str);
    }

    public static void deleteIfExists(String str) throws IOException {
        FileSystem fileSystem = FileSystem.get(new Configuration());
        if (fileSystem.exists(new Path(str))) {
            fileSystem.delete(new Path(str), true);
        }
    }

    public static DedupConfig loadConfigFromHDFS(String str) throws IOException {
        return DedupConfig.load(IOUtils.toString(new FSDataInputStream(FileSystem.get(new Configuration()).open(new Path(str))), StandardCharsets.UTF_8.name()));
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static <T> String readFromClasspath(String str, Class<T> cls) {
        StringWriter stringWriter = new StringWriter();
        try {
            IOUtils.copy(cls.getResourceAsStream(str), stringWriter);
            return stringWriter.toString();
        } catch (IOException e) {
            throw new RuntimeException("cannot load resource from classpath: " + str);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static Set<String> getGroupingKeys(DedupConfig dedupConfig, MapDocument mapDocument) {
        return Sets.newHashSet(BlacklistAwareClusteringCombiner.filterAndCombine(mapDocument, dedupConfig));
    }
}
