package eu.dnetlib.data.collector.plugins.httpfilename;

import com.lowagie.text.html.HtmlTags;
import com.lowagie.text.xml.xmp.PdfSchema;
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
import eu.dnetlib.data.mdstore.modular.mongodb.MDStoreDaoImpl;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.json.JSONObject;
import org.json.XML;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;

/* loaded from: input_file:WEB-INF/lib/dnet-collector-plugins-1.6.3.jar:eu/dnetlib/data/collector/plugins/httpfilename/HTTPWithFileNameCollectorIterable.class */
public class HTTPWithFileNameCollectorIterable implements Iterable<String> {
    private static final Log log = LogFactory.getLog(HTTPWithFileNameCollectorIterable.class);
    private static final String JUNK = "<resource><url>%s</url><DOI>JUNK</DOI></resource>";
    public static final String APP_JSON = "application/json";
    public static final String APP_XML = "application/xml";
    public static final String TEXT_HTML = "text/html";
    private String filterParam;
    private final ArrayBlockingQueue<String> queue = new ArrayBlockingQueue<>(100);
    int total = 0;
    int filtered = 0;

    /* loaded from: input_file:WEB-INF/lib/dnet-collector-plugins-1.6.3.jar:eu/dnetlib/data/collector/plugins/httpfilename/HTTPWithFileNameCollectorIterable$FillMetaQueue.class */
    private class FillMetaQueue implements Runnable {
        final Connector c = new Connector();
        private final List<String> metas = Collections.synchronizedList(new ArrayList());
        private final List<String> urls = Collections.synchronizedList(new ArrayList());

        public FillMetaQueue(String str) {
            if (str.isEmpty()) {
                return;
            }
            this.urls.add(str);
        }

        public void fillQueue() {
            while (true) {
                if (this.metas.size() <= 0 && this.urls.size() <= 0) {
                    try {
                        HTTPWithFileNameCollectorIterable.this.queue.put(HttpWithFileNameCollectorIterator.TERMINATOR);
                        return;
                    } catch (InterruptedException e) {
                        throw new IllegalStateException(String.format("could not add element to queue for more than %s%s", 60L, TimeUnit.SECONDS), e);
                    }
                }
                HTTPWithFileNameCollectorIterable.log.debug("metas.size() = " + this.metas.size() + " urls.size() = " + this.urls.size() + " queue.size() = " + HTTPWithFileNameCollectorIterable.this.queue.size());
                if (this.metas.size() > 0) {
                    String remove = this.metas.remove(0);
                    try {
                        this.c.get(remove);
                    } catch (CollectorServiceException e2) {
                        HTTPWithFileNameCollectorIterable.log.info("Impossible to collect url: " + remove + " error: " + e2.getMessage());
                    }
                    if (this.c.isStatusOk()) {
                        try {
                            String response = this.c.getResponse();
                            if (response != null && response.length() > 0) {
                                if (containsFilter(response)) {
                                    HTTPWithFileNameCollectorIterable.this.filtered++;
                                } else {
                                    HTTPWithFileNameCollectorIterable.this.queue.put(addFilePath(response, remove, remove.endsWith(".json")));
                                }
                                HTTPWithFileNameCollectorIterable.this.total++;
                            }
                        } catch (InterruptedException e3) {
                            HTTPWithFileNameCollectorIterable.log.info("not inserted in queue element associate to url " + remove + " error: " + e3.getMessage());
                        }
                    }
                } else {
                    String remove2 = this.urls.remove(0);
                    try {
                        this.c.get(remove2);
                    } catch (CollectorServiceException e4) {
                        HTTPWithFileNameCollectorIterable.log.info("Impossible to collect url: " + remove2 + " error: " + e4.getMessage());
                    }
                    if (this.c.isStatusOk()) {
                        if (this.c.responseTypeContains("text/html")) {
                            recurFolder(this.c.getResponse(), remove2);
                        } else if (this.c.responseTypeContains("application/json") || this.c.responseTypeContains("application/xml")) {
                            try {
                                HTTPWithFileNameCollectorIterable.this.queue.put(addFilePath(this.c.getResponse(), remove2, this.c.responseTypeContains("application/json")));
                            } catch (InterruptedException e5) {
                                HTTPWithFileNameCollectorIterable.log.info("not inserted in queue element associate to url " + remove2 + " error: " + e5.getMessage());
                            }
                        }
                    }
                }
            }
        }

        private boolean containsFilter(String str) {
            if (HTTPWithFileNameCollectorIterable.this.filterParam == null || HTTPWithFileNameCollectorIterable.this.filterParam.isEmpty()) {
                return false;
            }
            for (String str2 : HTTPWithFileNameCollectorIterable.this.filterParam.split(";")) {
                if (str.contains(str2)) {
                    return true;
                }
            }
            return false;
        }

        private String addFilePath(String str, String str2, boolean z) {
            String replace = str2.replace(MDStoreDaoImpl.METADATA_NAME, PdfSchema.DEFAULT_XPATH_ID);
            try {
                if (z) {
                    str = str.substring(0, str.length() - 1) + ",'downloadFileUrl':'" + replace.substring(0, replace.indexOf(".json")) + ".pdf'}";
                } else {
                    if (str.contains("<!DOCTYPE")) {
                        String substring = str.substring(str.indexOf("<!DOCTYPE"));
                        str = substring.substring(substring.indexOf(">") + 1);
                    }
                    int lastIndexOf = str.lastIndexOf("</");
                    str = str.substring(0, lastIndexOf) + "<downloadFileUrl>" + replace.substring(0, replace.indexOf(".xml")) + ".pdf</downloadFileUrl>" + str.substring(lastIndexOf);
                }
            } catch (Exception e) {
                HTTPWithFileNameCollectorIterable.log.info("not file with extension .json or .xml");
            }
            if (!z) {
                return str;
            }
            try {
                return XML.toString(new JSONObject("{'resource':" + str + "}"));
            } catch (Exception e2) {
                HTTPWithFileNameCollectorIterable.log.fatal("Impossible to transform json object to xml \n" + str + "\n " + e2.getMessage() + "\n" + str2);
                String format = String.format(HTTPWithFileNameCollectorIterable.JUNK, str2);
                HTTPWithFileNameCollectorIterable.log.warn("returning " + format);
                return format;
            }
        }

        private void recurFolder(String str, String str2) {
            Iterator<Element> it = Jsoup.parse(str).select(HtmlTags.ANCHOR).iterator();
            while (it.hasNext()) {
                Element next = it.next();
                if (!next.text().equals("../")) {
                    String attr = next.attr("href");
                    if (attr.endsWith(".json") || attr.endsWith(".xml")) {
                        this.metas.add(str2 + attr);
                    } else {
                        this.urls.add(str2 + attr);
                    }
                }
            }
        }

        @Override // java.lang.Runnable
        public void run() {
            fillQueue();
        }
    }

    public HTTPWithFileNameCollectorIterable(String str, String str2) {
        this.filterParam = str2;
        new Thread(new FillMetaQueue(str)).start();
    }

    @Override // java.lang.Iterable
    public Iterator<String> iterator() {
        return new HttpWithFileNameCollectorIterator(this.queue);
    }
}
