package eu.dnetlib.jobs;

import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.support.ArgumentApplicationParser;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;

/* loaded from: input_file:eu/dnetlib/jobs/AbstractSparkJob.class */
public abstract class AbstractSparkJob implements Serializable {
    protected static final int NUM_PARTITIONS = 1000;
    protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
    public ArgumentApplicationParser parser;
    public SparkSession spark;

    public AbstractSparkJob() {
    }

    public AbstractSparkJob(ArgumentApplicationParser argumentApplicationParser, SparkSession sparkSession) {
        this.parser = argumentApplicationParser;
        this.spark = sparkSession;
    }

    abstract void run() throws IOException;

    /* JADX INFO: Access modifiers changed from: protected */
    public static SparkSession getSparkSession(SparkConf sparkConf) {
        return SparkSession.builder().config(sparkConf).getOrCreate();
    }

    protected static <T> void save(Dataset<T> dataset, String str, SaveMode saveMode) {
        dataset.write().option("compression", "gzip").mode(saveMode).json(str);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static DedupConfig loadDedupConfig(String str) throws IOException {
        return DedupConfig.load(readFileFromHDFS(str));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static String readFileFromHDFS(String str) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader((InputStream) FileSystem.get(new Configuration()).open(new Path(str)), StandardCharsets.UTF_8));
        try {
            String join = String.join("", (Iterable<? extends CharSequence>) bufferedReader.lines().collect(Collectors.toList()));
            bufferedReader.close();
            return join;
        } catch (Throwable th) {
            bufferedReader.close();
            throw th;
        }
    }

    public static String readResource(String str, Class<? extends AbstractSparkJob> cls) throws IOException {
        return IOUtils.toString(cls.getResourceAsStream(str));
    }
}
