package opennlp.tools.sentdetect;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import opennlp.maxent.GIS;
import opennlp.maxent.GISModel;
import opennlp.model.MaxentModel;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.sentdetect.lang.Factory;
import opennlp.tools.util.HashSumEventStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
import opennlp.tools.util.StringUtil;
import opennlp.tools.util.model.BaseModel;
import opennlp.tools.util.model.ModelUtil;

/* loaded from: input_file:WEB-INF/lib/marytts-d4science-5.0.0.jar:opennlp/tools/sentdetect/SentenceDetectorME.class */
public class SentenceDetectorME implements SentenceDetector {
    public static final String SPLIT = "s";
    public static final String NO_SPLIT = "n";
    private static final Double ONE = new Double(1.0d);
    private MaxentModel model;
    private final SDContextGenerator cgen;
    private final EndOfSentenceScanner scanner;
    private List<Double> sentProbs;
    protected boolean useTokenEnd;

    public SentenceDetectorME(SentenceModel sentenceModel) {
        this(sentenceModel, new Factory());
    }

    public SentenceDetectorME(SentenceModel sentenceModel, Factory factory) {
        this.sentProbs = new ArrayList();
        this.model = sentenceModel.getMaxentModel();
        this.cgen = factory.createSentenceContextGenerator(sentenceModel.getLanguage());
        this.scanner = factory.createEndOfSentenceScanner(sentenceModel.getLanguage());
        this.useTokenEnd = sentenceModel.useTokenEnd();
    }

    @Override // opennlp.tools.sentdetect.SentenceDetector
    public String[] sentDetect(String str) {
        String[] strArr;
        Span[] sentPosDetect = sentPosDetect(str);
        if (sentPosDetect.length != 0) {
            strArr = new String[sentPosDetect.length];
            for (int i = 0; i < sentPosDetect.length; i++) {
                strArr[i] = sentPosDetect[i].getCoveredText(str).toString();
            }
        } else {
            strArr = new String[0];
        }
        return strArr;
    }

    private int getFirstWS(String str, int i) {
        while (i < str.length() && !StringUtil.isWhitespace(str.charAt(i))) {
            i++;
        }
        return i;
    }

    private int getFirstNonWS(String str, int i) {
        while (i < str.length() && StringUtil.isWhitespace(str.charAt(i))) {
            i++;
        }
        return i;
    }

    @Override // opennlp.tools.sentdetect.SentenceDetector
    public Span[] sentPosDetect(String str) {
        int i;
        double d = 1.0d;
        this.sentProbs.clear();
        StringBuffer stringBuffer = new StringBuffer(str);
        List<Integer> positions = this.scanner.getPositions(str);
        ArrayList arrayList = new ArrayList(positions.size());
        int size = positions.size();
        int i2 = 0;
        for (int i3 = 0; i3 < size; i3++) {
            int intValue = positions.get(i3).intValue();
            int firstWS = getFirstWS(str, intValue + 1);
            if (i3 + 1 >= size || positions.get(i3 + 1).intValue() >= firstWS) {
                double[] eval = this.model.eval(this.cgen.getContext(stringBuffer, intValue));
                String bestOutcome = this.model.getBestOutcome(eval);
                d *= eval[this.model.getIndex(bestOutcome)];
                if (bestOutcome.equals("s") && isAcceptableBreak(str, i2, intValue)) {
                    if (i2 != intValue) {
                        if (this.useTokenEnd) {
                            arrayList.add(Integer.valueOf(getFirstNonWS(str, getFirstWS(str, intValue + 1))));
                        } else {
                            arrayList.add(Integer.valueOf(getFirstNonWS(str, intValue)));
                        }
                        this.sentProbs.add(new Double(eval[this.model.getIndex(bestOutcome)]));
                    }
                    i2 = intValue + 1;
                }
            }
        }
        int[] iArr = new int[arrayList.size()];
        for (int i4 = 0; i4 < iArr.length; i4++) {
            iArr[i4] = ((Integer) arrayList.get(i4)).intValue();
        }
        if (iArr.length == 0) {
            int i5 = 0;
            int length = str.length();
            while (i5 < str.length() && Character.isWhitespace(str.charAt(i5))) {
                i5++;
            }
            while (length > 0 && Character.isWhitespace(str.charAt(length - 1))) {
                length--;
            }
            return length - i5 > 0 ? new Span[]{new Span(i5, length)} : new Span[0];
        }
        boolean z = iArr[iArr.length - 1] != str.length();
        Span[] spanArr = new Span[z ? iArr.length + 1 : iArr.length];
        for (int i6 = 0; i6 < iArr.length; i6++) {
            if (i6 == 0) {
                i = 0;
                while (i6 < iArr.length && Character.isWhitespace(str.charAt(i))) {
                    i++;
                }
            } else {
                i = iArr[i6 - 1];
            }
            int i7 = iArr[i6];
            while (i7 > 0 && Character.isWhitespace(str.charAt(i7 - 1))) {
                i7--;
            }
            spanArr[i6] = new Span(i, i7);
        }
        if (z) {
            spanArr[spanArr.length - 1] = new Span(iArr[iArr.length - 1], str.length());
            this.sentProbs.add(ONE);
        }
        return spanArr;
    }

    public double[] getSentenceProbabilities() {
        double[] dArr = new double[this.sentProbs.size()];
        for (int i = 0; i < dArr.length; i++) {
            dArr[i] = this.sentProbs.get(i).doubleValue();
        }
        return dArr;
    }

    protected boolean isAcceptableBreak(String str, int i, int i2) {
        return true;
    }

    public static SentenceModel train(String str, ObjectStream<SentenceSample> objectStream, boolean z, Dictionary dictionary) throws IOException {
        return train(str, objectStream, z, dictionary, 5, 100);
    }

    public static SentenceModel train(String str, ObjectStream<SentenceSample> objectStream, boolean z, Dictionary dictionary, int i, int i2) throws IOException {
        HashMap hashMap = new HashMap();
        ModelUtil.addCutoffAndIterations(hashMap, i, i2);
        Factory factory = new Factory();
        HashSumEventStream hashSumEventStream = new HashSumEventStream(new SDEventStream(objectStream, factory.createSentenceContextGenerator(str), factory.createEndOfSentenceScanner(str)));
        GISModel trainModel = GIS.trainModel(hashSumEventStream, i2, i);
        hashMap.put(BaseModel.TRAINING_EVENTHASH_PROPERTY, hashSumEventStream.calculateHashSum().toString(16));
        return new SentenceModel(str, trainModel, z, dictionary, hashMap);
    }

    private static void usage() {
        System.err.println("Usage: SentenceDetectorME -encoding charset -lang language trainData modelName [cutoff iterations]");
        System.err.println("-encoding charset specifies the encoding which should be used ");
        System.err.println("                  for reading and writing text.");
        System.err.println("-lang language    specifies the language which ");
        System.err.println("                  is being processed.");
        System.err.println("trainData         specifies the name of the input training file");
        System.err.println("                  to train the resulting model.");
        System.err.println("modelName         specifies the resulting saved model after");
        System.err.println("                  training.");
        System.exit(1);
    }

    public static void main(String[] strArr) throws IOException {
        int i = 0;
        String str = null;
        String str2 = null;
        if (strArr.length == 0) {
            usage();
        }
        while (strArr[i].startsWith("-")) {
            if (strArr[i].equals("-encoding")) {
                i++;
                if (i < strArr.length) {
                    str = strArr[i];
                    i++;
                } else {
                    usage();
                }
            } else if (strArr[i].equals("-lang")) {
                i++;
                if (i < strArr.length) {
                    str2 = strArr[i];
                    i++;
                } else {
                    usage();
                }
            } else {
                usage();
            }
        }
        int i2 = i;
        int i3 = i + 1;
        File file = new File(strArr[i2]);
        int i4 = i3 + 1;
        File file2 = new File(strArr[i3]);
        if (str2 == null || str == null) {
            try {
                usage();
            } catch (Exception e) {
                e.printStackTrace();
                return;
            }
        }
        SentenceModel train = train(str2, new SentenceSampleStream(new PlainTextByLineStream(new InputStreamReader(new FileInputStream(file), str))), true, null);
        System.out.println("Saving the model as: " + file2);
        train.serialize(new FileOutputStream(file2));
    }
}
