package eu.dnetlib.dhp.collection.orcid;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:eu/dnetlib/dhp/collection/orcid/ORCIDExtractor.class */
public class ORCIDExtractor extends Thread {
    private static final Logger log = LoggerFactory.getLogger(ORCIDExtractor.class);
    private final FileSystem fileSystem;
    private final String id;
    private final Path sourcePath;
    private final String baseOutputPath;

    public ORCIDExtractor(FileSystem fileSystem, String str, Path path, String str2) {
        this.fileSystem = fileSystem;
        this.id = str;
        this.sourcePath = path;
        this.baseOutputPath = str2;
    }

    private Map<String, SequenceFile.Writer> createMap() {
        try {
            log.info("Thread {} Creating sequence files starting from this input Path {}", this.id, this.sourcePath.getName());
            HashMap hashMap = new HashMap();
            if (this.sourcePath.getName().contains("summaries")) {
                String format = String.format("%s/summaries_%s", this.baseOutputPath, this.id);
                SequenceFile.Writer createWriter = SequenceFile.createWriter(this.fileSystem.getConf(), new SequenceFile.Writer.Option[]{SequenceFile.Writer.file(new Path(format)), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class)});
                log.info("Thread {} Creating only summary path here {}", this.id, format);
                hashMap.put("summary", createWriter);
                return hashMap;
            }
            String format2 = String.format("%s/employments_%s", this.baseOutputPath, this.id);
            hashMap.put("employments", SequenceFile.createWriter(this.fileSystem.getConf(), new SequenceFile.Writer.Option[]{SequenceFile.Writer.file(new Path(format2)), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class)}));
            log.info("Thread {} Creating employments path here {}", this.id, format2);
            String format3 = String.format("%s/works_%s", this.baseOutputPath, this.id);
            hashMap.put("works", SequenceFile.createWriter(this.fileSystem.getConf(), new SequenceFile.Writer.Option[]{SequenceFile.Writer.file(new Path(format3)), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class)}));
            log.info("Thread {} Creating works path here {}", this.id, format3);
            return hashMap;
        } catch (Throwable th) {
            throw new RuntimeException(th);
        }
    }

    @Override // java.lang.Thread, java.lang.Runnable
    public void run() {
        CompressionCodec codec = new CompressionCodecFactory(this.fileSystem.getConf()).getCodec(this.sourcePath);
        if (codec == null) {
            System.err.println("No codec found for " + this.sourcePath.getName());
            System.exit(1);
        }
        InputStream inputStream = null;
        try {
            try {
                inputStream = codec.createInputStream(this.fileSystem.open(this.sourcePath));
                iterateTar(createMap(), inputStream);
                log.info("Closing gzip stream");
                IOUtils.closeStream(inputStream);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        } catch (Throwable th) {
            log.info("Closing gzip stream");
            IOUtils.closeStream(inputStream);
            throw th;
        }
    }

    private SequenceFile.Writer retrieveFile(Map<String, SequenceFile.Writer> map, String str) {
        if (this.sourcePath.getName().contains("summaries")) {
            return map.get("summary");
        }
        if (str.contains("works")) {
            return map.get("works");
        }
        if (str.contains("employments")) {
            return map.get("employments");
        }
        return null;
    }

    private void iterateTar(Map<String, SequenceFile.Writer> map, InputStream inputStream) throws IOException {
        int i = 0;
        try {
            TarArchiveInputStream tarArchiveInputStream = new TarArchiveInputStream(inputStream);
            Throwable th = null;
            while (true) {
                try {
                    try {
                        TarArchiveEntry nextTarEntry = tarArchiveInputStream.getNextTarEntry();
                        if (nextTarEntry == null) {
                            break;
                        }
                        if (nextTarEntry.isFile()) {
                            SequenceFile.Writer retrieveFile = retrieveFile(map, nextTarEntry.getName());
                            if (retrieveFile != null) {
                                retrieveFile.append(new Text(nextTarEntry.getName()), new Text(org.apache.commons.io.IOUtils.toString(new BufferedReader(new InputStreamReader(tarArchiveInputStream)))));
                                i++;
                                if (i % 100000 == 0) {
                                    log.info("Thread {}: Extracted {} items", this.id, Integer.valueOf(i));
                                }
                            }
                        }
                    } finally {
                    }
                } finally {
                }
            }
            if (tarArchiveInputStream != null) {
                if (0 != 0) {
                    try {
                        tarArchiveInputStream.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                } else {
                    tarArchiveInputStream.close();
                }
            }
        } finally {
            for (SequenceFile.Writer writer : map.values()) {
                log.info("Thread {}: Completed processed {} items", this.id, Integer.valueOf(i));
                writer.hflush();
                writer.close();
            }
        }
    }
}
