package gr.forth.ics.isl.xsearch.pagesretriever;

import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
import gr.forth.ics.isl.xsearch.IOSLog;
import gr.forth.ics.isl.xsearch.SearchResult;
import gr.forth.ics.isl.xsearch.util.HTMLTag;
import java.net.URL;
import java.util.ArrayList;

/* loaded from: input_file:WEB-INF/classes/gr/forth/ics/isl/xsearch/pagesretriever/GetPagesContent.class */
public class GetPagesContent extends Thread {
    private ArrayList<SearchResult> pages;
    public boolean finish = false;

    public GetPagesContent(ArrayList<SearchResult> arrayList) {
        this.pages = new ArrayList<>(arrayList);
    }

    @Override // java.lang.Thread, java.lang.Runnable
    public void run() {
        for (int i = 0; i < this.pages.size(); i++) {
            String lowerCase = this.pages.get(i).getUrl().toLowerCase();
            if (!lowerCase.endsWith(".ppt") && !lowerCase.endsWith(".doc") && !lowerCase.endsWith(".pptx") && !lowerCase.endsWith(".docx")) {
                this.pages.get(i).addContent(getContent(lowerCase));
            }
        }
        this.finish = true;
    }

    private String getContent(String str) {
        String str2 = "";
        try {
            URL url = new URL(str);
            if (url.openConnection().getContentType().equalsIgnoreCase("application/pdf")) {
                System.out.println("# Reading PDF file!");
                PdfReader pdfReader = new PdfReader(str);
                int numberOfPages = pdfReader.getNumberOfPages();
                for (int i = 1; i <= numberOfPages; i++) {
                    str2 = str2 + PdfTextExtractor.getTextFromPage(pdfReader, i) + "\n";
                }
                pdfReader.close();
                System.out.println("# PDF file was successfully read!");
            } else {
                str2 = new HTMLTag(url).getSourceCode();
            }
            return str2 == null ? "" : str2.replace("<?xml ", "<html ");
        } catch (Exception e) {
            IOSLog.writeErrorToLog(e, "GetPagesContent");
            System.out.println("*** ERROR RETRIEVING CONTENT OF: " + str);
            return "";
        }
    }

    public ArrayList<SearchResult> getPages() {
        return this.pages;
    }

    public void setPages(ArrayList<SearchResult> arrayList) {
        this.pages = arrayList;
    }
}
