package gr.forth.ics.isl.stellaclustering.lexicalanalyzer;

import gr.forth.ics.isl.stellaclustering.stemmer.Stemmer;
import gr.forth.ics.isl.stellaclustering.util.Pair;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Pattern;

/* loaded from: input_file:gr/forth/ics/isl/stellaclustering/lexicalanalyzer/FileTerms.class */
public class FileTerms {
    private HashMap<String, Pair<Float, ArrayList<Integer>>> wordsMap;
    private LexicalAnalyzerProperties properties;
    Pattern patternTermsWithDigits = Pattern.compile("\\d");
    Pattern patternTermsWithPunct = Pattern.compile("\\p{Punct}");
    private float maxTF = 0.0f;
    private int numberOfWords = 0;
    private StringBuffer sb = new StringBuffer();

    public FileTerms(LexicalAnalyzerProperties lexicalAnalyzerProperties) {
        this.wordsMap = null;
        this.wordsMap = new HashMap<>();
        this.properties = lexicalAnalyzerProperties;
    }

    private boolean validTermToAdd(String str) {
        char charAt;
        if (str == null || str.length() == 0 || str.length() <= this.properties.getMinWord() || str.length() >= this.properties.getMaxWord()) {
            return false;
        }
        if (this.properties.getBlockStopWords() && this.properties.isStopWord(str)) {
            return false;
        }
        if (this.properties.getBlockTermsStartingWithNumbers() && Character.isDigit(str.charAt(0))) {
            return false;
        }
        if (this.properties.getBlockTermsWithPunctuation() && this.patternTermsWithPunct.matcher(str).find()) {
            return false;
        }
        if (this.properties.getBlockTermsWithNumbers() && this.patternTermsWithDigits.matcher(str).find()) {
            return false;
        }
        if (!this.properties.getBlockGreekTerms() || (charAt = str.charAt(0)) < 880 || charAt > 1023) {
            return (this.properties.getBlockTermsWithManySameCharacters() && hasManySameCharacters(str)) ? false : true;
        }
        return false;
    }

    public HashMap<String, Pair<Float, ArrayList<Integer>>> getWordsMap() {
        return this.wordsMap;
    }

    public String getText() {
        return this.sb.toString();
    }

    public void addToMap(String str, int i) {
        Float first;
        ArrayList<Integer> second;
        if (str == null || str.length() == 0) {
            return;
        }
        String lowerCase = str.toLowerCase();
        if (validTermToAdd(lowerCase)) {
            int length = this.sb.toString().length() + 1;
            this.sb.append(lowerCase + " ");
            if (this.properties.getUseStemmer()) {
                lowerCase = Stemmer.Stem(lowerCase);
                if (this.properties.getHoldBestUnstemmed()) {
                    addUnstemmed(lowerCase, lowerCase);
                }
            }
            if (lowerCase.isEmpty()) {
                System.out.println("STEMMER ERROR! Trying to insert stemmed empty string. Unstemmed: " + lowerCase);
                return;
            }
            Pair<Float, ArrayList<Integer>> pair = this.wordsMap.get(lowerCase);
            this.numberOfWords++;
            if (pair == null) {
                first = new Float(0.0f);
                second = new ArrayList<>();
            } else {
                first = pair.getFirst();
                second = pair.getSecond();
            }
            Float valueOf = Float.valueOf(first.floatValue() + i);
            if (valueOf.floatValue() > this.maxTF) {
                this.maxTF = valueOf.floatValue();
            }
            Pair<Float, ArrayList<Integer>> from = Pair.from(valueOf, second);
            if (this.properties.getStoreTermPos()) {
                second.add(Integer.valueOf(length));
            }
            this.wordsMap.put(lowerCase, from);
        }
    }

    public int getNumberOfWords() {
        return this.numberOfWords;
    }

    public void printMap(Map<String, Pair<Float, ArrayList<Integer>>> map) {
        if (map == null) {
            return;
        }
        for (String str : map.keySet()) {
            System.out.print(((Object) str) + "\t");
            Pair<Float, ArrayList<Integer>> pair = map.get(str);
            Float first = pair.getFirst();
            ArrayList<Integer> second = pair.getSecond();
            System.out.print("TF: " + first + "\tPositions:");
            if (this.properties.getStoreTermPos()) {
                for (int i = 0; i < second.size(); i++) {
                    System.out.print(second.get(i));
                    System.out.print(",");
                }
            }
            System.out.println("");
        }
    }

    private boolean hasManySameCharacters(String str) {
        if (str == null || str.length() < this.properties.getMaxSameCharacters()) {
            return false;
        }
        int i = 1;
        char charAt = str.charAt(0);
        for (int i2 = 1; i2 < str.length(); i2++) {
            if (charAt == str.charAt(i2)) {
                i++;
                if (i == this.properties.getMaxSameCharacters()) {
                    return true;
                }
            } else {
                charAt = str.charAt(i2);
                i = 1;
            }
        }
        return false;
    }

    public void normalizeTF() {
        Iterator<String> it = this.wordsMap.keySet().iterator();
        float f = 1.0f / this.maxTF;
        while (it.hasNext()) {
            Pair<Float, ArrayList<Integer>> pair = this.wordsMap.get(it.next());
            pair.setFirst(new Float(pair.getFirst().floatValue() * f));
        }
    }

    private void addUnstemmed(String str, String str2) {
        TreeMap<String, TreeMap<String, Integer>> unstemmedWords = this.properties.getUnstemmedWords();
        if (!unstemmedWords.containsKey(str2)) {
            TreeMap<String, Integer> treeMap = new TreeMap<>();
            treeMap.put(str, new Integer(1));
            unstemmedWords.put(str2, treeMap);
        } else {
            TreeMap<String, Integer> treeMap2 = unstemmedWords.get(str2);
            if (treeMap2.containsKey(str)) {
                treeMap2.put(str, new Integer(treeMap2.get(str).intValue() + 1));
            } else {
                treeMap2.put(str, new Integer(1));
            }
        }
    }

    public String getValidToken(String str) {
        if (!validTermToAdd(str)) {
            return null;
        }
        if (this.properties.getUseStemmer()) {
            str = Stemmer.Stem(str);
            if (this.properties.getHoldBestUnstemmed()) {
                addUnstemmed(str, str);
            }
        }
        if (!str.isEmpty()) {
            return str;
        }
        System.out.println("STEMMER ERROR! Trying to insert stemmed empty string. Unstemmed: " + str);
        return null;
    }
}
