/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.tokenize;

import java.io.IOException;
import java.io.ObjectStreamException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Pattern;
import opennlp.maxent.GIS;
import opennlp.maxent.GISModel;
import opennlp.model.MaxentModel;
import opennlp.model.TwoPassDataIndexer;
import opennlp.tools.tokenize.AbstractTokenizer;
import opennlp.tools.tokenize.DefaultTokenContextGenerator;
import opennlp.tools.tokenize.TokSpanEventStream;
import opennlp.tools.tokenize.TokenContextGenerator;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.HashSumEventStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
import opennlp.tools.util.model.ModelUtil;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class TokenizerME
extends AbstractTokenizer {
    public static final String SPLIT = "T";
    public static final String NO_SPLIT = "F";
    public static final Pattern alphaNumeric = Pattern.compile("^[A-Za-z0-9]+$");
    private MaxentModel model;
    private final TokenContextGenerator cg = new DefaultTokenContextGenerator();
    private boolean useAlphaNumericOptimization;
    private List<Double> tokProbs;
    private List<Span> newTokens;

    public TokenizerME(TokenizerModel model) {
        this.model = model.getMaxentModel();
        this.useAlphaNumericOptimization = model.useAlphaNumericOptimization();
        this.newTokens = new ArrayList<Span>();
        this.tokProbs = new ArrayList<Double>(50);
    }

    public double[] getTokenProbabilities() {
        double[] tokProbArray = new double[this.tokProbs.size()];
        for (int i = 0; i < tokProbArray.length; ++i) {
            tokProbArray[i] = this.tokProbs.get(i);
        }
        return tokProbArray;
    }

    @Override
    public Span[] tokenizePos(String d) {
        Span[] tokens = WhitespaceTokenizer.INSTANCE.tokenizePos(d);
        this.newTokens.clear();
        this.tokProbs.clear();
        for (Span s : tokens) {
            String tok = d.substring(s.getStart(), s.getEnd());
            if (tok.length() < 2) {
                this.newTokens.add(s);
                this.tokProbs.add(1.0);
                continue;
            }
            if (this.useAlphaNumericOptimization() && alphaNumeric.matcher(tok).matches()) {
                this.newTokens.add(s);
                this.tokProbs.add(1.0);
                continue;
            }
            int start = s.getStart();
            int end = s.getEnd();
            int origStart = s.getStart();
            double tokenProb = 1.0;
            for (int j = origStart + 1; j < end; ++j) {
                double[] probs = this.model.eval(this.cg.getContext(tok, j - origStart));
                String best = this.model.getBestOutcome(probs);
                tokenProb *= probs[this.model.getIndex(best)];
                if (!best.equals(SPLIT)) continue;
                this.newTokens.add(new Span(start, j));
                this.tokProbs.add(tokenProb);
                start = j;
                tokenProb = 1.0;
            }
            this.newTokens.add(new Span(start, end));
            this.tokProbs.add(tokenProb);
        }
        Span[] spans = new Span[this.newTokens.size()];
        this.newTokens.toArray(spans);
        return spans;
    }

    public static TokenizerModel train(String languageCode, ObjectStream<TokenSample> samples, boolean useAlphaNumericOptimization, int cutoff, int iterations) throws IOException {
        HashMap<String, String> manifestInfoEntries = new HashMap<String, String>();
        ModelUtil.addCutoffAndIterations(manifestInfoEntries, cutoff, iterations);
        TokSpanEventStream eventStream = new TokSpanEventStream(samples, useAlphaNumericOptimization);
        HashSumEventStream hses = new HashSumEventStream(eventStream);
        GISModel maxentModel = GIS.trainModel(iterations, new TwoPassDataIndexer(hses, cutoff));
        manifestInfoEntries.put("Training-Eventhash", hses.calculateHashSum().toString(16));
        return new TokenizerModel(languageCode, maxentModel, useAlphaNumericOptimization, manifestInfoEntries);
    }

    public static TokenizerModel train(String languageCode, ObjectStream<TokenSample> samples, boolean useAlphaNumericOptimization) throws IOException, ObjectStreamException {
        return TokenizerME.train(languageCode, samples, useAlphaNumericOptimization, 5, 100);
    }

    public boolean useAlphaNumericOptimization() {
        return this.useAlphaNumericOptimization;
    }
}

