/*
 * Decompiled with CFR 0.152.
 */
package org.gcube.resource.discovery.api;

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.Vector;
import java.util.concurrent.LinkedBlockingQueue;
import net.matuschek.http.DownloadRuleSet;
import net.matuschek.http.HttpException;
import net.matuschek.spider.WebRobot;
import org.gcube.resource.discovery.api.DocumentInfos;
import org.gcube.resource.discovery.crawler.Crawler;
import org.gcube.resource.discovery.urlFilter.UrlFilter;
import org.gcube.resource.discovery.util.StringUtils;

public class ExecutorThread
extends Thread {
    private String documentURL;
    int maxDepth;
    DocumentInfos documentInfos;
    private LinkedBlockingQueue<DocumentInfos> pdfURIs;
    private List<String> mimeTypes = null;
    private boolean ignoreRobotsTxt = false;
    private Set<URL> blacklistedHosts = new HashSet<URL>();

    public ExecutorThread(DocumentInfos docInfo, int maxDepth, List<String> mimeTypes, LinkedBlockingQueue<DocumentInfos> queue) {
        this.documentURL = docInfo.getInputURI();
        this.documentInfos = docInfo;
        this.pdfURIs = queue;
        this.maxDepth = maxDepth;
        this.mimeTypes = mimeTypes;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public void run() {
        System.out.println("I am a thread! " + this.documentURL);
        ArrayList<String> resources = new ArrayList<String>();
        Crawler crawler = null;
        try {
            crawler = this.setupCrawler();
        }
        catch (IOException e1) {
            e1.printStackTrace();
            Thread.currentThread().interrupt();
            return;
        }
        catch (HttpException e1) {
            e1.printStackTrace();
            Thread.currentThread().interrupt();
            return;
        }
        String initialURL = new String(this.documentURL);
        String reference = new String(this.documentInfos.getReferenceURI());
        Object object = this.documentURL;
        synchronized (object) {
            block18: {
                try {
                    this.documentURL = StringUtils.unescapeHTML(this.documentURL);
                    URL u = new URL(this.documentURL);
                    if (this.blacklistedHosts.contains(this.getHostURL(u))) {
                        System.out.println("Host " + u.getHost() + " is blacklisted. Ignoring");
                        return;
                    }
                    System.out.println("Thread resolving redirections: " + this.documentURL);
                    this.documentURL = UrlFilter.resolveRedirections(this.documentURL);
                    System.out.println("success " + this.documentURL);
                    if (this.mimeTypes.contains(UrlFilter.getMimeType(this.documentURL))) {
                        resources.add(this.documentURL);
                        break block18;
                    }
                    Vector<String> links = crawler.getLinks(this.documentURL);
                    for (String link : links) {
                        System.out.println("Looking for resources in " + link);
                        u = new URL(link);
                        if (this.blacklistedHosts.contains(this.getHostURL(u))) {
                            System.out.println("Host " + u.getHost() + " is blacklisted. Ignoring");
                            break;
                        }
                        if (this.mimeTypes.contains(UrlFilter.getMimeType(link))) {
                            System.out.println("Resource found in " + link);
                            resources.add(link);
                            break;
                        }
                        System.out.println("Adding not found resource");
                        resources.add("not_found");
                    }
                }
                catch (Exception e) {
                    e.printStackTrace();
                    Thread.currentThread().interrupt();
                    return;
                }
            }
        }
        System.out.println("Finished with resources");
        object = this.pdfURIs;
        synchronized (object) {
            boolean found = false;
            for (int j = 0; j < resources.size(); ++j) {
                if (((String)resources.get(j)).equals("not_found")) continue;
                DocumentInfos newDoc = new DocumentInfos();
                newDoc.setInputURI(initialURL);
                newDoc.setOutputURI((String)resources.get(j));
                newDoc.setReferenceURI(reference);
                this.pdfURIs.add(newDoc);
                System.out.println("The size is: " + this.pdfURIs.size());
                found = true;
                break;
            }
            if (!found) {
                DocumentInfos newDoc = new DocumentInfos();
                newDoc.setInputURI(initialURL);
                newDoc.setReferenceURI(reference);
                newDoc.setOutputURI("not_found");
                this.pdfURIs.add(newDoc);
                System.out.println("The size is: " + this.pdfURIs.size());
            }
            Thread.currentThread().interrupt();
        }
        Thread.currentThread().interrupt();
    }

    public void setBlacklistedHosts(Collection<String> hosts) {
        this.blacklistedHosts.clear();
        for (String host : hosts) {
            try {
                URL u = new URL(host);
                this.blacklistedHosts.add(this.getHostURL(u));
            }
            catch (MalformedURLException e) {
                System.out.println("Error using url " + host);
                e.printStackTrace();
            }
        }
    }

    private URL getHostURL(URL url) throws MalformedURLException {
        return new URL(url.getProtocol() + "://" + url.getHost());
    }

    private Crawler setupCrawler() throws IOException, HttpException {
        Crawler crawler = new Crawler();
        WebRobot robot = crawler.getCrawler();
        robot.setMaxDepth(this.maxDepth);
        robot.setIgnoreRobotsTxt(this.ignoreRobotsTxt);
        DownloadRuleSet rules = new DownloadRuleSet();
        int minSize = 1;
        int maxSize = 0x6400000;
        rules.addRule("text", "html", minSize, maxSize, true);
        rules.addRule("application", "xhtml+xml", minSize, maxSize, true);
        for (String mimeType : this.mimeTypes) {
            String[] parts = mimeType.split("/");
            if (parts.length < 2) {
                System.out.println("WARNING: Requested mimetype " + mimeType + " seems to be malformed");
                continue;
            }
            rules.addRule(parts[0], parts[1], minSize, maxSize, true);
        }
        rules.addRule("*", "*", minSize, maxSize, false);
        robot.setDownloadRuleSet(rules);
        return crawler;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private boolean isURL(String val) {
        URLConnection connection = null;
        HttpURLConnection httpConnection = null;
        try {
            URL url = new URL(val);
            connection = url.openConnection();
            connection.setConnectTimeout(120000);
            if (connection instanceof HttpURLConnection) {
                httpConnection = (HttpURLConnection)connection;
                httpConnection.connect();
                InputStream is = httpConnection.getInputStream();
                byte[] buffer = new byte[128];
                int numberOfBytes = is.read(buffer);
                if (numberOfBytes != -1) {
                    is.close();
                }
            }
        }
        catch (Exception e) {
            boolean bl = false;
            return bl;
        }
        finally {
            if (connection != null) {
                httpConnection.disconnect();
            }
        }
        return true;
    }
}

