package eu.dnetlib.data.collector.plugins.archive.targz;

import eu.dnetlib.data.collector.ThreadSafeIterator;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Set;
import java.util.zip.GZIPInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/* loaded from: input_file:WEB-INF/lib/dnet-data-services-2.0.1-SAXONHE-SOLR772-20240527.145923-20.jar:eu/dnetlib/data/collector/plugins/archive/targz/TarGzIterator.class */
public class TarGzIterator extends ThreadSafeIterator {
    private static final Log log = LogFactory.getLog(TarGzIterator.class);
    private TarArchiveInputStream tarInputStream;
    private String current;
    private Set<String> extensionSet;

    public TarGzIterator(String str, Set<String> set) {
        try {
            this.extensionSet = set;
            this.tarInputStream = new TarArchiveInputStream(new BufferedInputStream(new GZIPInputStream(new FileInputStream(str))));
            this.current = findNext();
        } catch (FileNotFoundException e) {
            log.error("Tar.gz file not found: " + str, e);
        } catch (IOException e2) {
            log.error("Problem opening tar.gz file " + str, e2);
        }
    }

    public TarGzIterator(File file, Set<String> set) {
        try {
            this.extensionSet = set;
            this.tarInputStream = new TarArchiveInputStream(new BufferedInputStream(new GZIPInputStream(new FileInputStream(file))));
            this.current = findNext();
        } catch (FileNotFoundException e) {
            log.error("Tar.gz file not found: " + file.getAbsolutePath(), e);
        } catch (IOException e2) {
            log.error("Problem opening tar.gz file " + file.getAbsolutePath(), e2);
        }
    }

    @Override // eu.dnetlib.data.collector.ThreadSafeIterator
    public boolean doHasNext() {
        return this.current != null;
    }

    @Override // eu.dnetlib.data.collector.ThreadSafeIterator
    public String doNext() {
        String str = new String(this.current);
        this.current = findNext();
        return str;
    }

    private synchronized String findNext() {
        TarArchiveEntry tarArchiveEntry = null;
        while (true) {
            try {
                TarArchiveEntry nextTarEntry = this.tarInputStream.getNextTarEntry();
                tarArchiveEntry = nextTarEntry;
                if (null == nextTarEntry || accept(tarArchiveEntry)) {
                    break;
                }
                log.debug("Skipping TAR entry " + tarArchiveEntry.getName());
            } catch (IOException e) {
                log.error("Error during tar.gz extraction", e);
            }
        }
        if (tarArchiveEntry == null) {
            return null;
        }
        log.debug("Extracting " + tarArchiveEntry.getName());
        byte[] bArr = new byte[(int) tarArchiveEntry.getSize()];
        try {
            this.tarInputStream.read(bArr, 0, bArr.length);
            return new String(bArr);
        } catch (IOException e2) {
            log.error("Impossible to extract file " + tarArchiveEntry.getName(), e2);
            return null;
        }
    }

    private boolean accept(TarArchiveEntry tarArchiveEntry) {
        return tarArchiveEntry.isFile() && (this.extensionSet.isEmpty() || this.extensionSet.contains(StringUtils.substringAfterLast(tarArchiveEntry.getName(), ".")));
    }
}
