package eu.dnetlib.data.collector.plugins.parthenos.ehri;

import com.ximpleware.AutoPilot;
import com.ximpleware.VTDGen;
import com.ximpleware.VTDNav;
import com.ximpleware.XMLModifier;
import eu.dnetlib.data.collector.ThreadSafeIterator;
import eu.dnetlib.rmi.data.CollectorServiceRuntimeException;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URL;
import java.util.Iterator;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/* loaded from: input_file:eu/dnetlib/data/collector/plugins/parthenos/ehri/EHRIIterator.class */
public class EHRIIterator extends ThreadSafeIterator {
    private static final Log log = LogFactory.getLog(EHRIIterator.class);
    private static int MAX_FAILED = 100;
    private Iterator<String> identifiers;
    private String baseUrl;
    private String suffix;
    private int failed = 0;

    public EHRIIterator(Iterator<String> it, String str, String str2) {
        this.identifiers = it;
        this.baseUrl = str;
        this.suffix = str2;
    }

    public boolean doHasNext() {
        return this.identifiers.hasNext();
    }

    public String doNext() {
        String str = this.baseUrl + "/" + this.identifiers.next() + "/" + this.suffix;
        log.debug("Getting " + str);
        try {
            return removeDefaultEADNamespace(IOUtils.toString(new URL(str), "UTF-8"));
        } catch (IOException e) {
            log.error("Unable to get " + str, e);
            this.failed++;
            if (this.failed > MAX_FAILED) {
                throw new CollectorServiceRuntimeException("Could not download more than " + MAX_FAILED + "documents from EHRI. Stopping.");
            }
            return hasNext() ? next() : "";
        }
    }

    protected String removeDefaultEADNamespace(String str) {
        try {
            VTDGen vTDGen = new VTDGen();
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            vTDGen.setDoc(str.getBytes());
            vTDGen.parse(false);
            VTDNav nav = vTDGen.getNav();
            AutoPilot autoPilot = new AutoPilot(nav);
            XMLModifier xMLModifier = new XMLModifier(nav);
            autoPilot.selectXPath("/ead/@xmlns");
            while (autoPilot.evalXPath() != -1) {
                xMLModifier.remove();
            }
            xMLModifier.output(byteArrayOutputStream);
            return byteArrayOutputStream.toString();
        } catch (Exception e) {
            log.error("Cannot remove default namespace from ead element: " + str);
            throw new CollectorServiceRuntimeException("Cannot remove default namespace from ead element", e);
        }
    }

    public Iterator<String> getIdentifiers() {
        return this.identifiers;
    }

    public void setIdentifiers(Iterator<String> it) {
        this.identifiers = it;
    }

    public String getBaseUrl() {
        return this.baseUrl;
    }

    public void setBaseUrl(String str) {
        this.baseUrl = str;
    }

    public String getSuffix() {
        return this.suffix;
    }

    public void setSuffix(String str) {
        this.suffix = str;
    }
}
