/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.cmdline.tokenizer;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import opennlp.tools.cmdline.CmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.cmdline.tokenizer.TrainingParameters;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.tokenize.TokenSampleStream;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public final class TokenizerTrainerTool
implements CmdLineTool {
    @Override
    public String getName() {
        return "TokenizerTrainer";
    }

    @Override
    public String getShortDescription() {
        return "trainer for the learnable tokenizer";
    }

    @Override
    public String getHelp() {
        return "Usage: opennlp " + this.getName() + TrainingParameters.getParameterUsage() + " -data trainingData -model model\n" + TrainingParameters.getDescription();
    }

    static ObjectStream<TokenSample> openSampleData(String sampleDataName, File sampleDataFile, Charset encoding) {
        CmdLineUtil.checkInputFile(sampleDataName + " Data", sampleDataFile);
        FileInputStream sampleDataIn = CmdLineUtil.openInFile(sampleDataFile);
        PlainTextByLineStream lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(), encoding);
        return new TokenSampleStream(lineStream);
    }

    @Override
    public void run(String[] args) {
        TokenizerModel model;
        if (args.length < 6) {
            System.out.println(this.getHelp());
            throw new TerminateToolException(1);
        }
        TrainingParameters parameters = new TrainingParameters(args);
        if (!parameters.isValid()) {
            System.out.println(this.getHelp());
            throw new TerminateToolException(1);
        }
        File trainingDataInFile = new File(CmdLineUtil.getParameter("-data", args));
        File modelOutFile = new File(CmdLineUtil.getParameter("-model", args));
        CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile);
        ObjectStream<TokenSample> sampleStream = TokenizerTrainerTool.openSampleData("Training", trainingDataInFile, parameters.getEncoding());
        try {
            model = TokenizerME.train(parameters.getLanguage(), sampleStream, parameters.isAlphaNumericOptimizationEnabled(), parameters.getCutoff(), parameters.getNumberOfIterations());
        }
        catch (IOException e) {
            CmdLineUtil.printTrainingIoError(e);
            throw new TerminateToolException(-1);
        }
        finally {
            try {
                sampleStream.close();
            }
            catch (IOException e) {}
        }
        CmdLineUtil.writeModel("tokenizer", modelOutFile, model);
    }
}

