package gr.forth.ics.isl.stellaclustering;

import gr.forth.ics.isl.stellaclustering.lexicalanalyzer.LexicalAnalyzer;
import gr.forth.ics.isl.stellaclustering.lexicalanalyzer.LexicalAnalyzerProperties;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Locale;

/* loaded from: input_file:gr/forth/ics/isl/stellaclustering/Input.class */
public class Input {
    private LinkedHashMap<Integer, Snippet> snippets;
    private LinkedHashMap<Integer, Float> docIds;
    private boolean BLOCK_STOPWORDS;
    private boolean WITH_STEMMING;
    private boolean SENTENCE_SEPERATION;
    private boolean USE_TITLES;
    private HashMap<String, Float> idfs;

    public Input() throws Exception {
        this.docIds = new LinkedHashMap<>();
        this.snippets = new LinkedHashMap<>();
        this.BLOCK_STOPWORDS = true;
        this.WITH_STEMMING = false;
        this.SENTENCE_SEPERATION = true;
        this.USE_TITLES = true;
        this.idfs = null;
    }

    public Input(LinkedHashMap<Integer, Float> linkedHashMap) throws Exception {
        this.docIds = new LinkedHashMap<>();
        this.snippets = new LinkedHashMap<>();
        this.BLOCK_STOPWORDS = true;
        this.WITH_STEMMING = false;
        this.SENTENCE_SEPERATION = false;
        this.USE_TITLES = true;
        this.idfs = null;
        setDocIds(linkedHashMap);
    }

    public Input(LinkedHashMap<Integer, Float> linkedHashMap, boolean z, boolean z2, boolean z3, boolean z4) {
        this.docIds = new LinkedHashMap<>();
        this.snippets = new LinkedHashMap<>();
        this.BLOCK_STOPWORDS = z;
        this.WITH_STEMMING = z2;
        this.SENTENCE_SEPERATION = z3;
        this.USE_TITLES = z4;
        this.idfs = null;
        setDocIds(linkedHashMap);
    }

    public void setDocIds(LinkedHashMap<Integer, Float> linkedHashMap) {
        this.docIds = linkedHashMap;
    }

    public void setSnippets(LinkedHashMap<Integer, Snippet> linkedHashMap) {
        this.snippets = linkedHashMap;
    }

    public void setIdfs(HashMap<String, Float> hashMap) {
        this.idfs = hashMap;
    }

    public LinkedHashMap<Integer, Float> getDocIds() {
        return this.docIds;
    }

    public LinkedHashMap<Integer, Snippet> getSnippets() {
        return this.snippets;
    }

    public HashMap<String, Float> getIdfs() {
        return this.idfs;
    }

    private ArrayList<String> splitInto2Sentences(String str) {
        ArrayList<String> arrayList = new ArrayList<>();
        for (String str2 : str.split("(\\.\\.\\.)")) {
            arrayList.add(str2);
        }
        return arrayList;
    }

    private ArrayList<String> getQueryAsVector(String str) throws FileNotFoundException, IOException {
        ArrayList<String> arrayList = new ArrayList<>();
        for (String str2 : str.split(" ")) {
            arrayList.add(str2);
        }
        return arrayList;
    }

    public int snippetsSize() {
        return this.snippets.size();
    }

    public void printIDFs() {
        for (String str : this.idfs.keySet()) {
            System.out.println("term=" + str + " idf=" + this.idfs.get(str).floatValue());
        }
    }

    public static ArrayList<Integer> getTopLFilesIds(ArrayList<Integer> arrayList, int i) {
        if (arrayList == null || arrayList.isEmpty()) {
            System.out.println("Documents' List is empty.");
            return null;
        }
        if (arrayList.size() <= i) {
            return arrayList;
        }
        ArrayList<Integer> arrayList2 = new ArrayList<>();
        Iterator<Integer> it = arrayList.iterator();
        for (int i2 = 0; it.hasNext() && i2 < i; i2++) {
            arrayList2.add(it.next());
        }
        return arrayList2;
    }

    public Snippet preprocesing(String str, String str2) throws Exception {
        LexicalAnalyzerProperties lexicalAnalyzerProperties = new LexicalAnalyzerProperties();
        lexicalAnalyzerProperties.setBlockStopWords(this.BLOCK_STOPWORDS);
        lexicalAnalyzerProperties.setUseStemmer(this.WITH_STEMMING);
        lexicalAnalyzerProperties.setStoreTxt(false);
        lexicalAnalyzerProperties.setBlockTermsWithPunctuation(true);
        LexicalAnalyzer lexicalAnalyzer = new LexicalAnalyzer(lexicalAnalyzerProperties);
        ArrayList<String> arrayList = new ArrayList<>();
        ArrayList<String> arrayList2 = new ArrayList<>();
        new ArrayList();
        ArrayList<String> arrayList3 = new ArrayList<>();
        ArrayList<String> arrayList4 = new ArrayList<>();
        Snippet snippet = null;
        if (this.SENTENCE_SEPERATION) {
            BreakIterator sentenceInstance = BreakIterator.getSentenceInstance(new Locale("en", "US"));
            new String();
            if (this.USE_TITLES && !str.isEmpty()) {
                boolean z = false;
                if (str.startsWith("http:") || str.startsWith("https:") || str.startsWith("untitled")) {
                    str = "";
                } else if (str.startsWith("Microsoft Word")) {
                    str = str.substring(17);
                    if (str.endsWith(".doc") || str.endsWith(".DOC")) {
                        str = str.substring(0, str.length() - 4);
                    }
                    z = true;
                } else if (str.startsWith("Microsoft PowerPoint")) {
                    str = str.substring(23);
                    if (str.endsWith(".ppt") || str.endsWith(".PPT")) {
                        str = str.substring(0, str.length() - 4);
                    }
                    z = true;
                }
                int i = 0;
                if (!z) {
                    sentenceInstance.setText(str);
                    int first = sentenceInstance.first();
                    int next = sentenceInstance.next();
                    while (true) {
                        int i2 = next;
                        if (i2 == -1) {
                            break;
                        }
                        String substring = str.substring(first, i2);
                        if (!substring.isEmpty()) {
                            int i3 = i;
                            i++;
                            arrayList.add(i3, substring);
                        }
                        first = i2;
                        next = sentenceInstance.next();
                    }
                } else {
                    String[] split = str.split("_");
                    StringBuffer stringBuffer = new StringBuffer();
                    for (String str3 : split) {
                        stringBuffer.append(str3);
                        stringBuffer.append(" ");
                    }
                    String trim = stringBuffer.toString().trim();
                    if (!trim.isEmpty()) {
                        int i4 = 0 + 1;
                        arrayList.add(0, trim);
                    }
                }
                if (!arrayList.isEmpty()) {
                    arrayList3 = lexicalAnalyzer.stringAnalyze(arrayList);
                }
            }
            ArrayList<String> splitInto2Sentences = splitInto2Sentences(str2);
            int i5 = 0;
            for (int i6 = 0; i6 < splitInto2Sentences.size(); i6++) {
                String str4 = splitInto2Sentences.get(i6);
                if (!str4.isEmpty()) {
                    sentenceInstance.setText(str4);
                    int first2 = sentenceInstance.first();
                    int next2 = sentenceInstance.next();
                    while (true) {
                        int i7 = next2;
                        if (i7 != -1) {
                            String substring2 = str4.substring(first2, i7);
                            if (!substring2.isEmpty()) {
                                int i8 = i5;
                                i5++;
                                arrayList2.add(i8, substring2);
                            }
                            first2 = i7;
                            next2 = sentenceInstance.next();
                        }
                    }
                }
            }
            if (!arrayList2.isEmpty()) {
                arrayList4 = lexicalAnalyzer.stringAnalyze(arrayList2);
            }
            snippet = new Snippet(arrayList3, arrayList4);
        }
        return snippet;
    }
}
