/*
 * Decompiled with CFR 0.152.
 */
package gr.forth.ics.isl.xsearch.pagesretriever;

import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
import gr.forth.ics.isl.xsearch.IOSLog;
import gr.forth.ics.isl.xsearch.SearchResult;
import gr.forth.ics.isl.xsearch.util.HTMLTag;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;

public class GetPagesContent
extends Thread {
    private ArrayList<SearchResult> pages;
    public boolean finish = false;

    public GetPagesContent(ArrayList<SearchResult> pages) {
        this.pages = new ArrayList<SearchResult>(pages);
    }

    @Override
    public void run() {
        for (int i = 0; i < this.pages.size(); ++i) {
            String url = ((SearchResult)this.pages.get(i)).getUrl().toLowerCase();
            if (url.endsWith(".ppt") || url.endsWith(".doc") || url.endsWith(".pptx") || url.endsWith(".docx")) continue;
            ((SearchResult)this.pages.get(i)).addContent(this.getContent(url));
        }
        this.finish = true;
    }

    private String getContent(String url) {
        String source = "";
        try {
            URL the_url = new URL(url);
            URLConnection urlConn = the_url.openConnection();
            if (urlConn.getContentType().equalsIgnoreCase("application/pdf")) {
                System.out.println("# Reading PDF file!");
                PdfReader reader = new PdfReader(url);
                int n = reader.getNumberOfPages();
                for (int i = 1; i <= n; ++i) {
                    source = source + PdfTextExtractor.getTextFromPage((PdfReader)reader, (int)i) + "\n";
                }
                reader.close();
                System.out.println("# PDF file was successfully read!");
            } else {
                HTMLTag tagger = new HTMLTag(the_url);
                source = tagger.getSourceCode();
            }
            if (source == null) {
                return "";
            }
            source = source.replace("<?xml ", "<html ");
        }
        catch (Exception ex) {
            IOSLog.writeErrorToLog((Exception)ex, (String)"GetPagesContent");
            System.out.println("*** ERROR RETRIEVING CONTENT OF: " + url);
            return "";
        }
        return source;
    }

    public ArrayList<SearchResult> getPages() {
        return this.pages;
    }

    public void setPages(ArrayList<SearchResult> pages) {
        this.pages = pages;
    }
}

