/*
 * Decompiled with CFR 0.152.
 */
package org.gcube.resource.discovery.api;

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.Vector;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import net.matuschek.http.DownloadRuleSet;
import net.matuschek.http.HttpException;
import net.matuschek.spider.WebRobot;
import org.apache.log4j.BasicConfigurator;
import org.apache.log4j.Logger;
import org.gcube.resource.discovery.api.DocumentInfos;
import org.gcube.resource.discovery.api.ExecutorThread;
import org.gcube.resource.discovery.crawler.Crawler;
import org.gcube.resource.discovery.urlFilter.UrlFilter;

public class ResourceDiscoveryAPI {
    private Logger logger = Logger.getLogger(ResourceDiscoveryAPI.class);
    private int maxDepth = 1;
    private List<String> mimeTypes = null;
    private List<String> urls = null;
    private ArrayList<DocumentInfos> documentInfos = new ArrayList();
    ExecutorService executor = Executors.newFixedThreadPool(50);
    private boolean ignoreRobotsTxt = false;

    public ResourceDiscoveryAPI() {
    }

    public ResourceDiscoveryAPI(String url, List<String> mimeTypes) {
        this.mimeTypes = mimeTypes;
        this.urls = new ArrayList<String>();
        this.urls.add(url);
    }

    public ResourceDiscoveryAPI(List<String> urls, List<String> mimeTypes) {
        this.mimeTypes = mimeTypes;
        this.urls = urls;
    }

    public void setDocumentInfos(ArrayList<DocumentInfos> docInfos) {
        this.documentInfos = docInfos;
        this.urls = new ArrayList<String>();
        for (int i = 0; i < docInfos.size(); ++i) {
            this.urls.add(docInfos.get(i).getInputURI());
        }
    }

    public ArrayList<DocumentInfos> getDocumentInfos() {
        return this.documentInfos;
    }

    public List<String> getResourcesInitial() throws Exception {
        ArrayList<String> resources = new ArrayList<String>();
        Crawler crawler = new Crawler();
        WebRobot robot = crawler.getCrawler();
        robot.setMaxDepth(this.maxDepth);
        DownloadRuleSet rules = new DownloadRuleSet();
        int minSize = 1;
        int maxSize = 0x6400000;
        rules.addRule("text", "html", minSize, maxSize, true);
        for (String mimeType : this.mimeTypes) {
            String[] parts = mimeType.split("/");
            if (parts.length < 2) {
                this.logger.error((Object)("WARNING: Requested mimetype " + mimeType + " seems to be malformed"));
                continue;
            }
            rules.addRule(parts[0], parts[1], minSize, maxSize, true);
        }
        rules.addRule("*", "*", minSize, maxSize, false);
        robot.setDownloadRuleSet(rules);
        for (String url : this.urls) {
            if (!this.isURL(url = UrlFilter.resolveRedirections(url))) continue;
            this.logger.debug((Object)("Looking for resources in " + url));
            Vector<String> links = crawler.getLinks(url);
            for (String link : links) {
                this.logger.debug((Object)("Looking for resources in " + link));
                if (!this.mimeTypes.contains(UrlFilter.getMimeType(link))) continue;
                this.logger.debug((Object)("Resource found in " + link));
                resources.add(link);
            }
        }
        return resources;
    }

    public List<String> getResources() throws Exception {
        ArrayList<String> resources = new ArrayList<String>();
        Crawler crawler = this.setupCrawler();
        for (String url : this.urls) {
            url = UrlFilter.resolveRedirections(url);
            this.logger.debug((Object)("Looking for resources in " + url));
            if (this.mimeTypes.contains(UrlFilter.getMimeType(url))) {
                resources.add(url);
                continue;
            }
            Vector<String> links = crawler.getLinks(url);
            for (String link : links) {
                this.logger.debug((Object)("Looking for resources in " + link));
                if (!this.mimeTypes.contains(UrlFilter.getMimeType(link))) continue;
                this.logger.debug((Object)("Resource found in " + link));
                resources.add(link);
            }
        }
        return resources;
    }

    public List<DocumentInfos> getResourcesParallel() throws Exception {
        BasicConfigurator.configure();
        LinkedBlockingQueue<DocumentInfos> docQueue = new LinkedBlockingQueue<DocumentInfos>();
        ArrayList<DocumentInfos> resources = new ArrayList<DocumentInfos>();
        long start = System.currentTimeMillis();
        int counter = 0;
        System.out.println("Resource Discovery Initial number of URIs: " + this.urls.size() + " " + this.documentInfos.size());
        String blackListFileName = System.getenv("GLOBUS_LOCATION") + "/restrictedSites.txt";
        ArrayList<String> blackList = ResourceDiscoveryAPI.getRestrictedSites(blackListFileName);
        for (DocumentInfos doc : this.documentInfos) {
            if (doc.getInputURI() != null && !doc.getInputURI().equals("")) {
                System.out.println("Not null");
                ExecutorThread executioner = new ExecutorThread(doc, this.maxDepth, this.mimeTypes, docQueue);
                executioner.setBlacklistedHosts(blackList);
                executioner.setDaemon(true);
                this.executor.execute(executioner);
                ++counter;
                continue;
            }
            System.out.println("it is null");
        }
        System.out.println("Executor shutting down");
        this.executor.shutdown();
        this.executor.awaitTermination(18000L, TimeUnit.SECONDS);
        this.executor.shutdownNow();
        System.out.println("Executor done");
        long end = System.currentTimeMillis();
        long diff = end - start;
        System.out.println("Resource Discovery time needed: " + diff);
        System.out.println("Resource Discovery URIs checked: " + counter);
        System.out.println("Number of resources in queue: " + docQueue.size());
        resources.addAll(docQueue);
        System.out.println("Returning: " + resources.size());
        return resources;
    }

    public static ArrayList<String> getRestrictedSites(String path) throws IOException {
        ArrayList<String> restrictionList = new ArrayList<String>();
        return restrictionList;
    }

    private Crawler setupCrawler() throws IOException, HttpException {
        Crawler crawler = new Crawler();
        WebRobot robot = crawler.getCrawler();
        robot.setMaxDepth(this.maxDepth);
        robot.setIgnoreRobotsTxt(this.ignoreRobotsTxt);
        DownloadRuleSet rules = new DownloadRuleSet();
        int minSize = 1;
        int maxSize = 0x6400000;
        rules.addRule("text", "html", minSize, maxSize, true);
        rules.addRule("application", "xhtml+xml", minSize, maxSize, true);
        for (String mimeType : this.mimeTypes) {
            String[] parts = mimeType.split("/");
            if (parts.length < 2) {
                this.logger.error((Object)("WARNING: Requested mimetype " + mimeType + " seems to be malformed"));
                continue;
            }
            rules.addRule(parts[0], parts[1], minSize, maxSize, true);
        }
        rules.addRule("*", "*", minSize, maxSize, false);
        robot.setDownloadRuleSet(rules);
        return crawler;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private boolean isURL(String val) {
        URLConnection connection = null;
        HttpURLConnection httpConnection = null;
        try {
            URL url = new URL(val);
            connection = url.openConnection();
            connection.setConnectTimeout(120000);
            if (connection instanceof HttpURLConnection) {
                httpConnection = (HttpURLConnection)connection;
                httpConnection.connect();
                InputStream is = httpConnection.getInputStream();
                byte[] buffer = new byte[128];
                int numberOfBytes = is.read(buffer);
                if (numberOfBytes != -1) {
                    is.close();
                }
            }
        }
        catch (Exception e) {
            boolean bl = false;
            return bl;
        }
        finally {
            if (connection != null) {
                httpConnection.disconnect();
            }
        }
        return true;
    }

    public int getMaxDepth() {
        return this.maxDepth;
    }

    public void setMaxDepth(int maxDepth) {
        this.maxDepth = maxDepth;
    }

    public List<String> getMimeTypes() {
        return this.mimeTypes;
    }

    public void setMimeTypes(List<String> mimeTypes) {
        this.mimeTypes = mimeTypes;
    }

    public List<String> getUrls() {
        return this.urls;
    }

    public void setUrls(List<String> urls) {
        this.urls = urls;
    }

    public static void main(String[] args) throws Exception {
        BasicConfigurator.configure();
        ArrayList<String> urls = new ArrayList<String>();
        DocumentInfos docInfo = new DocumentInfos();
        docInfo.setInputURI("http://ads.ccsd.cnrs.fr/ads-00103531/en/");
        docInfo.setReferenceURI("lala");
        ArrayList<DocumentInfos> list = new ArrayList<DocumentInfos>();
        list.add(docInfo);
        urls.add("http://www.euqos.eu/documents/md_1018_plpq.pdf");
        ArrayList<String> mimetypes = new ArrayList<String>();
        mimetypes.add("application/pdf");
        mimetypes.add("image/jpeg");
        ResourceDiscoveryAPI api = new ResourceDiscoveryAPI();
        mimetypes.add("application/pdf");
        api.setMimeTypes(mimetypes);
        api.setDocumentInfos(list);
        List<DocumentInfos> results = api.getResourcesParallel();
        for (int i = 0; i < results.size(); ++i) {
            System.out.println("Result: [" + results.get(i).getOutputURI() + "]");
        }
    }
}

