/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.namefind;

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.maxent.GIS;
import opennlp.maxent.GISModel;
import opennlp.model.EventStream;
import opennlp.model.MaxentModel;
import opennlp.model.TwoPassDataIndexer;
import opennlp.tools.namefind.BigramNameFeatureGenerator;
import opennlp.tools.namefind.DefaultNameContextGenerator;
import opennlp.tools.namefind.NameContextGenerator;
import opennlp.tools.namefind.NameFinderEventStream;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.namefind.NameSampleDataStream;
import opennlp.tools.namefind.TokenNameFinder;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.util.BeamSearch;
import opennlp.tools.util.HashSumEventStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Sequence;
import opennlp.tools.util.SequenceValidator;
import opennlp.tools.util.Span;
import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
import opennlp.tools.util.featuregen.AdditionalContextFeatureGenerator;
import opennlp.tools.util.featuregen.CachedFeatureGenerator;
import opennlp.tools.util.featuregen.OutcomePriorFeatureGenerator;
import opennlp.tools.util.featuregen.PreviousMapFeatureGenerator;
import opennlp.tools.util.featuregen.SentenceFeatureGenerator;
import opennlp.tools.util.featuregen.TokenClassFeatureGenerator;
import opennlp.tools.util.featuregen.TokenFeatureGenerator;
import opennlp.tools.util.featuregen.WindowFeatureGenerator;
import opennlp.tools.util.model.ModelUtil;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class NameFinderME
implements TokenNameFinder {
    private static String[][] EMPTY = new String[0][0];
    public static final int DEFAULT_BEAM_SIZE = 3;
    private static final Pattern typedOutcomePattern = Pattern.compile("(.+)-\\w+");
    public static final String START = "start";
    public static final String CONTINUE = "cont";
    public static final String OTHER = "other";
    protected MaxentModel model;
    protected NameContextGenerator contextGenerator;
    private Sequence bestSequence;
    private BeamSearch<String> beam;
    private AdditionalContextFeatureGenerator additionalContextFeatureGenerator = new AdditionalContextFeatureGenerator();

    public NameFinderME(TokenNameFinderModel model) {
        this(model, 3);
    }

    public NameFinderME(TokenNameFinderModel model, AdaptiveFeatureGenerator generator, int beamSize) {
        this.model = model.getNameFinderModel();
        this.contextGenerator = generator != null ? new DefaultNameContextGenerator(generator) : new DefaultNameContextGenerator(NameFinderME.createFeatureGenerator());
        this.contextGenerator.addFeatureGenerator(new WindowFeatureGenerator((AdaptiveFeatureGenerator)this.additionalContextFeatureGenerator, 8, 8));
        this.beam = new BeamSearch<String>(beamSize, this.contextGenerator, this.model, new NameFinderSequenceValidator(), beamSize);
    }

    public NameFinderME(TokenNameFinderModel model, int beamSize) {
        this(model, null, beamSize);
    }

    @Deprecated
    public NameFinderME(MaxentModel mod) {
        this(mod, new DefaultNameContextGenerator(), 3);
    }

    @Deprecated
    public NameFinderME(MaxentModel mod, NameContextGenerator cg) {
        this(mod, cg, 3);
    }

    @Deprecated
    public NameFinderME(MaxentModel mod, NameContextGenerator cg, int beamSize) {
        this.model = mod;
        this.contextGenerator = cg;
        this.contextGenerator.addFeatureGenerator(new WindowFeatureGenerator((AdaptiveFeatureGenerator)this.additionalContextFeatureGenerator, 8, 8));
        this.beam = new BeamSearch<String>(beamSize, cg, mod, new NameFinderSequenceValidator(), beamSize);
    }

    private static AdaptiveFeatureGenerator createFeatureGenerator() {
        return new CachedFeatureGenerator(new WindowFeatureGenerator((AdaptiveFeatureGenerator)new TokenFeatureGenerator(), 2, 2), new WindowFeatureGenerator((AdaptiveFeatureGenerator)new TokenClassFeatureGenerator(true), 2, 2), new OutcomePriorFeatureGenerator(), new PreviousMapFeatureGenerator(), new BigramNameFeatureGenerator(), new SentenceFeatureGenerator(true, false));
    }

    @Override
    public Span[] find(String[] tokens) {
        return this.find(tokens, EMPTY);
    }

    public Span[] find(String[] tokens, String[][] additionalContext) {
        this.additionalContextFeatureGenerator.setCurrentContext(additionalContext);
        this.bestSequence = this.beam.bestSequence((String[])tokens, (Object[])additionalContext);
        List<String> c = this.bestSequence.getOutcomes();
        this.contextGenerator.updateAdaptiveData(tokens, c.toArray(new String[c.size()]));
        int start = -1;
        int end = -1;
        ArrayList<Span> spans = new ArrayList<Span>(tokens.length);
        for (int li = 0; li < c.size(); ++li) {
            String chunkTag = c.get(li);
            if (chunkTag.endsWith(START)) {
                if (start != -1) {
                    spans.add(new Span(start, end, NameFinderME.extractNameType(chunkTag)));
                }
                start = li;
                end = li + 1;
                continue;
            }
            if (chunkTag.endsWith(CONTINUE)) {
                end = li + 1;
                continue;
            }
            if (!chunkTag.endsWith(OTHER) || start == -1) continue;
            spans.add(new Span(start, end, NameFinderME.extractNameType(c.get(li - 1))));
            start = -1;
            end = -1;
        }
        if (start != -1) {
            spans.add(new Span(start, end, NameFinderME.extractNameType(c.get(c.size() - 1))));
        }
        return spans.toArray(new Span[spans.size()]);
    }

    @Override
    public void clearAdaptiveData() {
        this.contextGenerator.clearAdaptiveData();
    }

    public void probs(double[] probs) {
        this.bestSequence.getProbs(probs);
    }

    public double[] probs() {
        return this.bestSequence.getProbs();
    }

    public double[] probs(Span[] spans) {
        double[] sprobs = new double[spans.length];
        double[] probs = this.bestSequence.getProbs();
        for (int si = 0; si < spans.length; ++si) {
            double p = 1.0;
            for (int oi = spans[si].getStart(); oi < spans[si].getEnd(); ++oi) {
                p *= probs[oi];
            }
            sprobs[si] = p;
        }
        return sprobs;
    }

    public static TokenNameFinderModel train(String languageCode, String type, ObjectStream<NameSample> samples, AdaptiveFeatureGenerator generator, Map<String, Object> resources, int iterations, int cutoff) throws IOException {
        HashMap<String, String> manifestInfoEntries = new HashMap<String, String>();
        ModelUtil.addCutoffAndIterations(manifestInfoEntries, cutoff, iterations);
        AdaptiveFeatureGenerator featureGenerator = generator != null ? generator : NameFinderME.createFeatureGenerator();
        NameFinderEventStream eventStream = new NameFinderEventStream(samples, type, new DefaultNameContextGenerator(featureGenerator));
        HashSumEventStream hses = new HashSumEventStream(eventStream);
        GISModel nameFinderModel = GIS.trainModel(iterations, new TwoPassDataIndexer(hses, cutoff));
        manifestInfoEntries.put("Training-Eventhash", hses.calculateHashSum().toString(16));
        return new TokenNameFinderModel(languageCode, nameFinderModel, resources, manifestInfoEntries);
    }

    public static TokenNameFinderModel train(String languageCode, String type, ObjectStream<NameSample> samples, Map<String, Object> resources, int iterations, int cutoff) throws IOException {
        return NameFinderME.train(languageCode, type, samples, null, resources, iterations, cutoff);
    }

    public static TokenNameFinderModel train(String languageCode, String type, ObjectStream<NameSample> samples, Map<String, Object> resources) throws IOException {
        return NameFinderME.train(languageCode, type, samples, resources, 100, 5);
    }

    @Deprecated
    public static GISModel train(EventStream es, int iterations, int cut) throws IOException {
        return GIS.trainModel(iterations, new TwoPassDataIndexer(es, cut));
    }

    private static final String extractNameType(String outcome) {
        Matcher matcher = typedOutcomePattern.matcher(outcome);
        if (matcher.matches()) {
            String nameType = matcher.group(1);
            return nameType;
        }
        return null;
    }

    public static Span[] dropOverlappingSpans(Span[] spans) {
        ArrayList sortedSpans = new ArrayList(spans.length);
        Collections.addAll(sortedSpans, spans);
        Collections.sort(sortedSpans);
        Iterator it = sortedSpans.iterator();
        Span lastSpan = null;
        while (it.hasNext()) {
            Span span = (Span)it.next();
            if (lastSpan != null && lastSpan.intersects(span)) {
                it.remove();
                span = lastSpan;
            }
            lastSpan = span;
        }
        return sortedSpans.toArray(new Span[sortedSpans.size()]);
    }

    @Deprecated
    public static void main(String[] args) throws IOException {
        if (args.length == 4) {
            NameSampleDataStream sampleStream = new NameSampleDataStream(new PlainTextByLineStream(new InputStreamReader((InputStream)new FileInputStream(args[2]), args[1])));
            TokenNameFinderModel model = NameFinderME.train("x-unspecified", "default", sampleStream, new HashMap<String, Object>());
            model.serialize(new FileOutputStream(args[4]));
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class NameFinderSequenceValidator
    implements SequenceValidator<String> {
        private NameFinderSequenceValidator() {
        }

        public boolean validSequence(int i, String[] inputSequence, String[] outcomesSequence, String outcome) {
            if (outcome.endsWith(NameFinderME.CONTINUE)) {
                int li = outcomesSequence.length - 1;
                if (li == -1) {
                    return false;
                }
                if (outcomesSequence[li].endsWith(NameFinderME.OTHER)) {
                    return false;
                }
                if (outcomesSequence[li].endsWith(NameFinderME.CONTINUE)) {
                    String previousNameType = NameFinderME.extractNameType(outcomesSequence[li]);
                    String nameType = NameFinderME.extractNameType(outcome);
                    if (previousNameType != null || nameType != null) {
                        return nameType != null && nameType.equals(previousNameType);
                    }
                }
            }
            return true;
        }
    }
}

