package org.gcube.resource.discovery.api;

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import net.matuschek.http.DownloadRuleSet;
import net.matuschek.http.HttpException;
import net.matuschek.spider.WebRobot;
import org.apache.log4j.BasicConfigurator;
import org.apache.log4j.Logger;
import org.gcube.resource.discovery.crawler.Crawler;
import org.gcube.resource.discovery.urlFilter.UrlFilter;

/* loaded from: input_file:org/gcube/resource/discovery/api/ResourceDiscoveryAPI.class */
public class ResourceDiscoveryAPI {
    private Logger logger;
    private int maxDepth;
    private List<String> mimeTypes;
    private List<String> urls;
    private ArrayList<DocumentInfos> documentInfos;
    ExecutorService executor;
    private boolean ignoreRobotsTxt;

    public ResourceDiscoveryAPI() {
        this.logger = Logger.getLogger(ResourceDiscoveryAPI.class);
        this.maxDepth = 1;
        this.mimeTypes = null;
        this.urls = null;
        this.documentInfos = new ArrayList<>();
        this.executor = Executors.newFixedThreadPool(50);
        this.ignoreRobotsTxt = false;
    }

    public ResourceDiscoveryAPI(String str, List<String> list) {
        this.logger = Logger.getLogger(ResourceDiscoveryAPI.class);
        this.maxDepth = 1;
        this.mimeTypes = null;
        this.urls = null;
        this.documentInfos = new ArrayList<>();
        this.executor = Executors.newFixedThreadPool(50);
        this.ignoreRobotsTxt = false;
        this.mimeTypes = list;
        this.urls = new ArrayList();
        this.urls.add(str);
    }

    public ResourceDiscoveryAPI(List<String> list, List<String> list2) {
        this.logger = Logger.getLogger(ResourceDiscoveryAPI.class);
        this.maxDepth = 1;
        this.mimeTypes = null;
        this.urls = null;
        this.documentInfos = new ArrayList<>();
        this.executor = Executors.newFixedThreadPool(50);
        this.ignoreRobotsTxt = false;
        this.mimeTypes = list2;
        this.urls = list;
    }

    public void setDocumentInfos(ArrayList<DocumentInfos> arrayList) {
        this.documentInfos = arrayList;
        this.urls = new ArrayList();
        for (int i = 0; i < arrayList.size(); i++) {
            this.urls.add(arrayList.get(i).getInputURI());
        }
    }

    public ArrayList<DocumentInfos> getDocumentInfos() {
        return this.documentInfos;
    }

    public List<String> getResourcesInitial() throws Exception {
        ArrayList arrayList = new ArrayList();
        Crawler crawler = new Crawler();
        WebRobot crawler2 = crawler.getCrawler();
        crawler2.setMaxDepth(this.maxDepth);
        DownloadRuleSet downloadRuleSet = new DownloadRuleSet();
        downloadRuleSet.addRule("text", "html", 1, 104857600, true);
        for (String str : this.mimeTypes) {
            String[] split = str.split("/");
            if (split.length < 2) {
                this.logger.error("WARNING: Requested mimetype " + str + " seems to be malformed");
            } else {
                downloadRuleSet.addRule(split[0], split[1], 1, 104857600, true);
            }
        }
        downloadRuleSet.addRule("*", "*", 1, 104857600, false);
        crawler2.setDownloadRuleSet(downloadRuleSet);
        Iterator<String> it = this.urls.iterator();
        while (it.hasNext()) {
            String resolveRedirections = UrlFilter.resolveRedirections(it.next());
            if (isURL(resolveRedirections)) {
                this.logger.debug("Looking for resources in " + resolveRedirections);
                Iterator<String> it2 = crawler.getLinks(resolveRedirections).iterator();
                while (it2.hasNext()) {
                    String next = it2.next();
                    this.logger.debug("Looking for resources in " + next);
                    if (this.mimeTypes.contains(UrlFilter.getMimeType(next))) {
                        this.logger.debug("Resource found in " + next);
                        arrayList.add(next);
                    }
                }
            }
        }
        return arrayList;
    }

    public List<String> getResources() throws Exception {
        ArrayList arrayList = new ArrayList();
        Crawler crawler = setupCrawler();
        Iterator<String> it = this.urls.iterator();
        while (it.hasNext()) {
            String resolveRedirections = UrlFilter.resolveRedirections(it.next());
            this.logger.debug("Looking for resources in " + resolveRedirections);
            if (this.mimeTypes.contains(UrlFilter.getMimeType(resolveRedirections))) {
                arrayList.add(resolveRedirections);
            } else {
                for (String str : crawler.getLinks(resolveRedirections)) {
                    this.logger.debug("Looking for resources in " + str);
                    if (this.mimeTypes.contains(UrlFilter.getMimeType(str))) {
                        this.logger.debug("Resource found in " + str);
                        arrayList.add(str);
                    }
                }
            }
        }
        return arrayList;
    }

    public List<DocumentInfos> getResourcesParallel() throws Exception {
        BasicConfigurator.configure();
        LinkedBlockingQueue linkedBlockingQueue = new LinkedBlockingQueue();
        ArrayList arrayList = new ArrayList();
        long currentTimeMillis = System.currentTimeMillis();
        int i = 0;
        System.out.println("Resource Discovery Initial number of URIs: " + this.urls.size() + " " + this.documentInfos.size());
        ArrayList<String> restrictedSites = getRestrictedSites(System.getenv("GLOBUS_LOCATION") + "/restrictedSites.txt");
        Iterator<DocumentInfos> it = this.documentInfos.iterator();
        while (it.hasNext()) {
            DocumentInfos next = it.next();
            if (next.getInputURI() == null || next.getInputURI().equals("")) {
                System.out.println("it is null");
            } else {
                System.out.println("Not null");
                ExecutorThread executorThread = new ExecutorThread(next, this.maxDepth, this.mimeTypes, linkedBlockingQueue);
                executorThread.setBlacklistedHosts(restrictedSites);
                executorThread.setDaemon(true);
                this.executor.execute(executorThread);
                i++;
            }
        }
        System.out.println("Executor shutting down");
        this.executor.shutdown();
        this.executor.awaitTermination(18000L, TimeUnit.SECONDS);
        this.executor.shutdownNow();
        System.out.println("Executor done");
        System.out.println("Resource Discovery time needed: " + (System.currentTimeMillis() - currentTimeMillis));
        System.out.println("Resource Discovery URIs checked: " + i);
        System.out.println("Number of resources in queue: " + linkedBlockingQueue.size());
        arrayList.addAll(linkedBlockingQueue);
        System.out.println("Returning: " + arrayList.size());
        return arrayList;
    }

    public static ArrayList<String> getRestrictedSites(String str) throws IOException {
        return new ArrayList<>();
    }

    private Crawler setupCrawler() throws IOException, HttpException {
        Crawler crawler = new Crawler();
        WebRobot crawler2 = crawler.getCrawler();
        crawler2.setMaxDepth(this.maxDepth);
        crawler2.setIgnoreRobotsTxt(this.ignoreRobotsTxt);
        DownloadRuleSet downloadRuleSet = new DownloadRuleSet();
        downloadRuleSet.addRule("text", "html", 1, 104857600, true);
        downloadRuleSet.addRule("application", "xhtml+xml", 1, 104857600, true);
        for (String str : this.mimeTypes) {
            String[] split = str.split("/");
            if (split.length < 2) {
                this.logger.error("WARNING: Requested mimetype " + str + " seems to be malformed");
            } else {
                downloadRuleSet.addRule(split[0], split[1], 1, 104857600, true);
            }
        }
        downloadRuleSet.addRule("*", "*", 1, 104857600, false);
        crawler2.setDownloadRuleSet(downloadRuleSet);
        return crawler;
    }

    private boolean isURL(String str) {
        URLConnection uRLConnection = null;
        HttpURLConnection httpURLConnection = null;
        try {
            uRLConnection = new URL(str).openConnection();
            uRLConnection.setConnectTimeout(120000);
            if (uRLConnection instanceof HttpURLConnection) {
                httpURLConnection = (HttpURLConnection) uRLConnection;
                httpURLConnection.connect();
                InputStream inputStream = httpURLConnection.getInputStream();
                if (inputStream.read(new byte[128]) != -1) {
                    inputStream.close();
                }
            }
            if (uRLConnection == null) {
                return true;
            }
            httpURLConnection.disconnect();
            return true;
        } catch (Exception e) {
            if (uRLConnection != null) {
                httpURLConnection.disconnect();
            }
            return false;
        } catch (Throwable th) {
            if (uRLConnection != null) {
                httpURLConnection.disconnect();
            }
            throw th;
        }
    }

    public int getMaxDepth() {
        return this.maxDepth;
    }

    public void setMaxDepth(int i) {
        this.maxDepth = i;
    }

    public List<String> getMimeTypes() {
        return this.mimeTypes;
    }

    public void setMimeTypes(List<String> list) {
        this.mimeTypes = list;
    }

    public List<String> getUrls() {
        return this.urls;
    }

    public void setUrls(List<String> list) {
        this.urls = list;
    }

    public static void main(String[] strArr) throws Exception {
        BasicConfigurator.configure();
        ArrayList arrayList = new ArrayList();
        DocumentInfos documentInfos = new DocumentInfos();
        documentInfos.setInputURI("http://ads.ccsd.cnrs.fr/ads-00103531/en/");
        documentInfos.setReferenceURI("lala");
        ArrayList<DocumentInfos> arrayList2 = new ArrayList<>();
        arrayList2.add(documentInfos);
        arrayList.add("http://www.euqos.eu/documents/md_1018_plpq.pdf");
        ArrayList arrayList3 = new ArrayList();
        arrayList3.add("application/pdf");
        arrayList3.add("image/jpeg");
        ResourceDiscoveryAPI resourceDiscoveryAPI = new ResourceDiscoveryAPI();
        arrayList3.add("application/pdf");
        resourceDiscoveryAPI.setMimeTypes(arrayList3);
        resourceDiscoveryAPI.setDocumentInfos(arrayList2);
        List<DocumentInfos> resourcesParallel = resourceDiscoveryAPI.getResourcesParallel();
        for (int i = 0; i < resourcesParallel.size(); i++) {
            System.out.println("Result: [" + resourcesParallel.get(i).getOutputURI() + "]");
        }
    }
}
