package net.olivo.lc4j;

import cern.colt.GenericSorting;
import cern.colt.Swapper;
import cern.colt.function.IntComparator;
import gnu.getopt.Getopt;
import gnu.getopt.LongOpt;
import it.unimi.dsi.fastutil.bytes.ByteArrayList;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/* loaded from: input_file:net/olivo/lc4j/LanguageCategorization.class */
public class LanguageCategorization {
    private static final boolean DEBUG = false;
    private static final int BUFFER_SIZE = 16384;
    private int MAX_LANGUAGES = 10;
    private int NUM_CHARS_TO_EXAMINE = 1000;
    private int USE_TOPMOST_NGRAMS = 400;
    private float UNKNOWN_THRESHOLD = 1.01f;
    private String LANGUAGE_MODELS_DIR = "models/";
    private LanguageModel[] language = null;
    private String[] languageName = null;
    private ByteArrayList wordSeparators = new ByteArrayList();

    public LanguageCategorization() {
        this.wordSeparators.add((byte) 32);
        this.wordSeparators.add((byte) 9);
        this.wordSeparators.add((byte) 13);
        this.wordSeparators.add((byte) 10);
        this.wordSeparators.add((byte) 48);
        this.wordSeparators.add((byte) 49);
        this.wordSeparators.add((byte) 50);
        this.wordSeparators.add((byte) 51);
        this.wordSeparators.add((byte) 52);
        this.wordSeparators.add((byte) 53);
        this.wordSeparators.add((byte) 54);
        this.wordSeparators.add((byte) 55);
        this.wordSeparators.add((byte) 56);
        this.wordSeparators.add((byte) 57);
    }

    public void setMaxLanguages(int i) {
        this.MAX_LANGUAGES = i;
    }

    public void setNumCharsToExamine(int i) {
        this.NUM_CHARS_TO_EXAMINE = i;
    }

    public void setUseTopmostNgrams(int i) {
        this.USE_TOPMOST_NGRAMS = i;
    }

    public void setUnknownThreshold(float f) {
        this.UNKNOWN_THRESHOLD = f;
    }

    public void setLanguageModelsDir(String str) {
        this.LANGUAGE_MODELS_DIR = str;
    }

    public int getMaxLanguages() {
        return this.MAX_LANGUAGES;
    }

    public int getNumCharsToExamine() {
        return this.NUM_CHARS_TO_EXAMINE;
    }

    public int getUseTopmostNgrams() {
        return this.USE_TOPMOST_NGRAMS;
    }

    public float getUnknownThreshold() {
        return this.UNKNOWN_THRESHOLD;
    }

    public String getLanguageModelsDir() {
        return this.LANGUAGE_MODELS_DIR;
    }

    public LanguageModel createLanguageModel(ByteArrayList byteArrayList) {
        long currentTimeMillis = System.currentTimeMillis();
        IncrementalInt2IntMap incrementalInt2IntMap = new IncrementalInt2IntMap();
        LanguageModel languageModel = new LanguageModel();
        byteArrayList.add((byte) 32);
        ByteArrayList byteArrayList2 = new ByteArrayList();
        byteArrayList2.add((byte) 95);
        int min = this.NUM_CHARS_TO_EXAMINE > 0 ? Math.min(byteArrayList.size(), this.NUM_CHARS_TO_EXAMINE) : byteArrayList.size();
        for (int i = DEBUG; i < min; i++) {
            byte b = byteArrayList.getByte(i);
            if (this.wordSeparators.indexOf(b) == -1) {
                byteArrayList2.add(b);
            } else {
                byteArrayList2.add((byte) 95);
                int size = byteArrayList2.size();
                int i2 = size;
                for (int i3 = DEBUG; i3 < size; i3++) {
                    byte[] elements = byteArrayList2.elements();
                    if (i2 > 4) {
                        incrementalInt2IntMap.inc(new ByteArrayList(elements, i3, 5).hashCode(), 1);
                    }
                    if (i2 > 3) {
                        incrementalInt2IntMap.inc(new ByteArrayList(elements, i3, 4).hashCode(), 1);
                    }
                    if (i2 > 2) {
                        incrementalInt2IntMap.inc(new ByteArrayList(elements, i3, 3).hashCode(), 1);
                    }
                    if (i2 > 1) {
                        incrementalInt2IntMap.inc(new ByteArrayList(elements, i3, 2).hashCode(), 1);
                    }
                    incrementalInt2IntMap.inc(new ByteArrayList(elements, i3, 1).hashCode(), 1);
                    i2--;
                }
                byteArrayList2.clear();
                byteArrayList2.add((byte) 95);
            }
        }
        int[] orderedKeysByScore = incrementalInt2IntMap.getOrderedKeysByScore();
        int min2 = this.USE_TOPMOST_NGRAMS > 0 ? Math.min(orderedKeysByScore.length, this.USE_TOPMOST_NGRAMS) : orderedKeysByScore.length;
        for (int i4 = DEBUG; i4 < min2; i4++) {
            try {
                languageModel.add(orderedKeysByScore[i4], incrementalInt2IntMap.get(orderedKeysByScore[i4]));
            } catch (IllegalArgumentException e) {
                System.err.println(e);
                System.err.println("WARNING: resulting language-model will be very likely invalid!");
            }
        }
        System.err.println(new StringBuffer().append("time taken to create language-model from input: ").append((System.currentTimeMillis() - currentTimeMillis) / 1000.0d).append("s").toString());
        return languageModel;
    }

    public int calcDistance(LanguageModel languageModel, LanguageModel languageModel2) {
        int i;
        int i2;
        int i3 = DEBUG;
        int size = languageModel.size();
        for (int i4 = DEBUG; i4 < size; i4++) {
            int pos = languageModel2.getPos(languageModel.getNgram(i4));
            if (pos != -1) {
                i = i3;
                i2 = Math.abs(pos - i4);
            } else {
                i = i3;
                i2 = this.USE_TOPMOST_NGRAMS;
            }
            i3 = i + i2;
        }
        return i3;
    }

    public void loadLanguages(String str) throws IOException, FileNotFoundException {
        if (this.language == null) {
            long currentTimeMillis = System.currentTimeMillis();
            File[] listFiles = new File(str).listFiles();
            int length = listFiles.length;
            this.language = new LanguageModel[length];
            this.languageName = new String[length];
            if (length == 0) {
                System.err.println(new StringBuffer().append("WARNING: no language-model files were found in the specified path (").append(str).append("). Please check.").toString());
            }
            for (int i = DEBUG; i < length; i++) {
                this.language[i] = new LanguageModel();
                this.languageName[i] = listFiles[i].getName();
                DataInputStream dataInputStream = new DataInputStream(new FastBufferedInputStream(new FileInputStream(listFiles[i]), BUFFER_SIZE));
                for (int i2 = DEBUG; i2 < this.USE_TOPMOST_NGRAMS; i2++) {
                    try {
                        this.language[i].add(dataInputStream.readInt(), dataInputStream.readInt());
                    } catch (EOFException e) {
                    } catch (IllegalArgumentException e2) {
                        System.err.println(e2);
                    }
                }
                dataInputStream.close();
            }
            System.err.println(new StringBuffer().append("time taken to load all available language-models: ").append((System.currentTimeMillis() - currentTimeMillis) / 1000.0d).append("s").toString());
        }
        if (this.language == null || this.language.length == 0) {
            System.err.println("No language-model loaded.");
        }
    }

    public List findLanguage(ByteArrayList byteArrayList) {
        ArrayList arrayList = new ArrayList();
        LanguageModel createLanguageModel = createLanguageModel(byteArrayList);
        try {
            loadLanguages(this.LANGUAGE_MODELS_DIR);
            long currentTimeMillis = System.currentTimeMillis();
            int length = this.language.length;
            int[] iArr = new int[length];
            int[] iArr2 = new int[length];
            for (int i = DEBUG; i < length; i++) {
                iArr[i] = calcDistance(createLanguageModel, this.language[i]);
                iArr2[i] = i;
            }
            GenericSorting.mergeSort(DEBUG, length, new IntComparator(this, iArr) { // from class: net.olivo.lc4j.LanguageCategorization.1
                private final int[] val$prob;
                private final LanguageCategorization this$0;

                {
                    this.this$0 = this;
                    this.val$prob = iArr;
                }

                public int compare(int i2, int i3) {
                    if (this.val$prob[i2] > this.val$prob[i3]) {
                        return 1;
                    }
                    if (this.val$prob[i2] < this.val$prob[i3]) {
                        return -1;
                    }
                    return LanguageCategorization.DEBUG;
                }
            }, new Swapper(this, iArr, iArr2) { // from class: net.olivo.lc4j.LanguageCategorization.2
                private final int[] val$prob;
                private final int[] val$langIndex;
                private final LanguageCategorization this$0;

                {
                    this.this$0 = this;
                    this.val$prob = iArr;
                    this.val$langIndex = iArr2;
                }

                public void swap(int i2, int i3) {
                    int i4 = this.val$prob[i2];
                    this.val$prob[i2] = this.val$prob[i3];
                    this.val$prob[i3] = i4;
                    int i5 = this.val$langIndex[i2];
                    this.val$langIndex[i2] = this.val$langIndex[i3];
                    this.val$langIndex[i3] = i5;
                }
            });
            int i2 = iArr[DEBUG];
            int i3 = DEBUG;
            for (int i4 = DEBUG; i4 < length && (iArr[i4] < this.UNKNOWN_THRESHOLD * i2 || iArr[i4] == 0); i4++) {
                i3++;
                arrayList.add(this.languageName[iArr2[i4]]);
            }
            if (i3 > this.MAX_LANGUAGES) {
                arrayList.clear();
                arrayList.add("UNKNOWN");
            }
            System.err.println(new StringBuffer().append("time taken to effectively determine the language: ").append((System.currentTimeMillis() - currentTimeMillis) / 1000.0d).append("s").toString());
            return arrayList;
        } catch (Exception e) {
            System.err.println("An exception was thrown when trying to load languages. Returning null.");
            e.printStackTrace(System.err);
            return null;
        }
    }

    public static void main(String[] strArr) throws IOException {
        boolean z = DEBUG;
        LanguageCategorization languageCategorization = new LanguageCategorization();
        Getopt getopt = new Getopt("LanguageCategorization", strArr, "m:n:u:n:t:d:ch", new LongOpt[]{new LongOpt("help", DEBUG, (StringBuffer) null, 104), new LongOpt("max-languages", 1, (StringBuffer) null, 109), new LongOpt("num-chars-to-examine", 1, (StringBuffer) null, 110), new LongOpt("use-topmost-ngrams", 1, (StringBuffer) null, 116), new LongOpt("unknown-threshold", 1, (StringBuffer) null, 117), new LongOpt("languageModel-dir", 1, (StringBuffer) null, 100), new LongOpt("create-new-languageModel", DEBUG, (StringBuffer) null, 99)});
        getopt.setOpterr(true);
        while (true) {
            int i = getopt.getopt();
            if (i != -1) {
                switch (i) {
                    case 63:
                        return;
                    case 99:
                        z = true;
                        getopt.getOptarg();
                        break;
                    case 100:
                        languageCategorization.setLanguageModelsDir(getopt.getOptarg());
                        break;
                    case 104:
                        System.err.println("Usage: LanguageCategorization [OPTIONS]");
                        System.err.println("Determines the language in which stdin text is written.");
                        System.err.println("");
                        System.err.println("Optional arguments:");
                        System.err.println(new StringBuffer().append("  -m, --max-languages            the maximum number of languages to be determined (default: ").append(languageCategorization.getMaxLanguages()).append(")").toString());
                        System.err.println(new StringBuffer().append("  -n, --num-chars-to-examine     the number of characters to examine in the input (default: ").append(languageCategorization.getNumCharsToExamine()).append(")").toString());
                        System.err.println("  -t, --use-topmost-ngrams       forces the usage of n-grams up to this length (default: any length)");
                        System.err.println(new StringBuffer().append("  -u, --unknown-threshold        determines how much worse result must be in order not to be mentioned as an alternative (default: ").append(languageCategorization.getUnknownThreshold()).append(")").toString());
                        System.err.println("  -d, --languageModel-dir        use the given folder as the directory where to store/retrieve language-model files");
                        System.err.println("  -c, --create-new-languageModel creates a new language-model using the input text. The argument value is used as the name for the new language. Output goes to stdout");
                        System.err.println("");
                        System.err.println("Help:");
                        System.err.println("  -h, --help                     print this help screen");
                        System.err.println("");
                        return;
                    case 109:
                        languageCategorization.setMaxLanguages(Integer.parseInt(getopt.getOptarg()));
                        break;
                    case 110:
                        languageCategorization.setNumCharsToExamine(Integer.parseInt(getopt.getOptarg()));
                        break;
                    case 116:
                        languageCategorization.setUseTopmostNgrams(Integer.parseInt(getopt.getOptarg()));
                        break;
                    case 117:
                        languageCategorization.setUnknownThreshold(Float.parseFloat(getopt.getOptarg()));
                        break;
                }
            } else {
                ByteArrayList byteArrayList = new ByteArrayList();
                DataInputStream dataInputStream = new DataInputStream(new FastBufferedInputStream(System.in, BUFFER_SIZE));
                while (true) {
                    try {
                        byteArrayList.add(dataInputStream.readByte());
                    } catch (EOFException e) {
                        dataInputStream.close();
                        if (!z) {
                            System.out.println(new StringBuffer().append("probable language(s): ").append(languageCategorization.findLanguage(byteArrayList)).toString());
                            return;
                        }
                        LanguageModel createLanguageModel = languageCategorization.createLanguageModel(byteArrayList);
                        DataOutputStream dataOutputStream = new DataOutputStream(new FastBufferedOutputStream(System.out, BUFFER_SIZE));
                        for (int i2 = DEBUG; i2 < createLanguageModel.size(); i2++) {
                            int ngram = createLanguageModel.getNgram(i2);
                            int freq = createLanguageModel.getFreq(i2);
                            dataOutputStream.writeInt(ngram);
                            dataOutputStream.writeInt(freq);
                        }
                        dataOutputStream.close();
                        return;
                    }
                }
            }
        }
    }
}
