package org.gcube.resource.discovery.api;

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.concurrent.LinkedBlockingQueue;
import net.matuschek.http.DownloadRuleSet;
import net.matuschek.http.HttpException;
import net.matuschek.spider.WebRobot;
import org.gcube.resource.discovery.crawler.Crawler;
import org.gcube.resource.discovery.urlFilter.UrlFilter;
import org.gcube.resource.discovery.util.StringUtils;

/* loaded from: input_file:org/gcube/resource/discovery/api/ExecutorThread.class */
public class ExecutorThread extends Thread {
    private String documentURL;
    int maxDepth;
    DocumentInfos documentInfos;
    private LinkedBlockingQueue<DocumentInfos> pdfURIs;
    private List<String> mimeTypes;
    private boolean ignoreRobotsTxt = false;
    private Set<URL> blacklistedHosts = new HashSet();

    public ExecutorThread(DocumentInfos documentInfos, int i, List<String> list, LinkedBlockingQueue<DocumentInfos> linkedBlockingQueue) {
        this.mimeTypes = null;
        this.documentURL = documentInfos.getInputURI();
        this.documentInfos = documentInfos;
        this.pdfURIs = linkedBlockingQueue;
        this.maxDepth = i;
        this.mimeTypes = list;
    }

    @Override // java.lang.Thread, java.lang.Runnable
    public void run() {
        System.out.println("I am a thread! " + this.documentURL);
        ArrayList arrayList = new ArrayList();
        try {
            Crawler crawler = setupCrawler();
            String str = new String(this.documentURL);
            String str2 = new String(this.documentInfos.getReferenceURI());
            synchronized (this.documentURL) {
                try {
                    this.documentURL = StringUtils.unescapeHTML(this.documentURL);
                    URL url = new URL(this.documentURL);
                    if (this.blacklistedHosts.contains(getHostURL(url))) {
                        System.out.println("Host " + url.getHost() + " is blacklisted. Ignoring");
                        return;
                    }
                    System.out.println("Thread resolving redirections: " + this.documentURL);
                    this.documentURL = UrlFilter.resolveRedirections(this.documentURL);
                    System.out.println("success " + this.documentURL);
                    if (!this.mimeTypes.contains(UrlFilter.getMimeType(this.documentURL))) {
                        Iterator<String> it = crawler.getLinks(this.documentURL).iterator();
                        while (true) {
                            if (!it.hasNext()) {
                                break;
                            }
                            String next = it.next();
                            System.out.println("Looking for resources in " + next);
                            URL url2 = new URL(next);
                            if (this.blacklistedHosts.contains(getHostURL(url2))) {
                                System.out.println("Host " + url2.getHost() + " is blacklisted. Ignoring");
                                break;
                            } else if (this.mimeTypes.contains(UrlFilter.getMimeType(next))) {
                                System.out.println("Resource found in " + next);
                                arrayList.add(next);
                                break;
                            } else {
                                System.out.println("Adding not found resource");
                                arrayList.add("not_found");
                            }
                        }
                    } else {
                        arrayList.add(this.documentURL);
                    }
                    System.out.println("Finished with resources");
                    synchronized (this.pdfURIs) {
                        boolean z = false;
                        int i = 0;
                        while (true) {
                            if (i >= arrayList.size()) {
                                break;
                            }
                            if (!((String) arrayList.get(i)).equals("not_found")) {
                                DocumentInfos documentInfos = new DocumentInfos();
                                documentInfos.setInputURI(str);
                                documentInfos.setOutputURI((String) arrayList.get(i));
                                documentInfos.setReferenceURI(str2);
                                this.pdfURIs.add(documentInfos);
                                System.out.println("The size is: " + this.pdfURIs.size());
                                z = true;
                                break;
                            }
                            i++;
                        }
                        if (!z) {
                            DocumentInfos documentInfos2 = new DocumentInfos();
                            documentInfos2.setInputURI(str);
                            documentInfos2.setReferenceURI(str2);
                            documentInfos2.setOutputURI("not_found");
                            this.pdfURIs.add(documentInfos2);
                            System.out.println("The size is: " + this.pdfURIs.size());
                        }
                        Thread.currentThread().interrupt();
                    }
                    Thread.currentThread().interrupt();
                } catch (Exception e) {
                    e.printStackTrace();
                    Thread.currentThread().interrupt();
                }
            }
        } catch (IOException e2) {
            e2.printStackTrace();
            Thread.currentThread().interrupt();
        } catch (HttpException e3) {
            e3.printStackTrace();
            Thread.currentThread().interrupt();
        }
    }

    public void setBlacklistedHosts(Collection<String> collection) {
        this.blacklistedHosts.clear();
        for (String str : collection) {
            try {
                this.blacklistedHosts.add(getHostURL(new URL(str)));
            } catch (MalformedURLException e) {
                System.out.println("Error using url " + str);
                e.printStackTrace();
            }
        }
    }

    private URL getHostURL(URL url) throws MalformedURLException {
        return new URL(url.getProtocol() + "://" + url.getHost());
    }

    private Crawler setupCrawler() throws IOException, HttpException {
        Crawler crawler = new Crawler();
        WebRobot crawler2 = crawler.getCrawler();
        crawler2.setMaxDepth(this.maxDepth);
        crawler2.setIgnoreRobotsTxt(this.ignoreRobotsTxt);
        DownloadRuleSet downloadRuleSet = new DownloadRuleSet();
        downloadRuleSet.addRule("text", "html", 1, 104857600, true);
        downloadRuleSet.addRule("application", "xhtml+xml", 1, 104857600, true);
        for (String str : this.mimeTypes) {
            String[] split = str.split("/");
            if (split.length < 2) {
                System.out.println("WARNING: Requested mimetype " + str + " seems to be malformed");
            } else {
                downloadRuleSet.addRule(split[0], split[1], 1, 104857600, true);
            }
        }
        downloadRuleSet.addRule("*", "*", 1, 104857600, false);
        crawler2.setDownloadRuleSet(downloadRuleSet);
        return crawler;
    }

    private boolean isURL(String str) {
        URLConnection uRLConnection = null;
        HttpURLConnection httpURLConnection = null;
        try {
            uRLConnection = new URL(str).openConnection();
            uRLConnection.setConnectTimeout(120000);
            if (uRLConnection instanceof HttpURLConnection) {
                httpURLConnection = (HttpURLConnection) uRLConnection;
                httpURLConnection.connect();
                InputStream inputStream = httpURLConnection.getInputStream();
                if (inputStream.read(new byte[128]) != -1) {
                    inputStream.close();
                }
            }
            if (uRLConnection == null) {
                return true;
            }
            httpURLConnection.disconnect();
            return true;
        } catch (Exception e) {
            if (uRLConnection != null) {
                httpURLConnection.disconnect();
            }
            return false;
        } catch (Throwable th) {
            if (uRLConnection != null) {
                httpURLConnection.disconnect();
            }
            throw th;
        }
    }
}
