/*
 * Decompiled with CFR 0.152.
 */
package marytts.modules;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import marytts.datatypes.MaryData;
import marytts.datatypes.MaryDataType;
import marytts.modules.InternalModule;
import marytts.server.MaryProperties;
import marytts.util.MaryUtils;
import marytts.util.dom.MaryDomUtils;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.traversal.NodeIterator;
import org.w3c.dom.traversal.TreeWalker;

public class OpenNLPPosTagger
extends InternalModule {
    private String propertyPrefix;
    private POSTaggerME tagger;
    private Map<String, String> posMapper = null;

    public OpenNLPPosTagger(String locale, String propertyPrefix) throws Exception {
        super("OpenNLPPosTagger", MaryDataType.WORDS, MaryDataType.PARTSOFSPEECH, MaryUtils.string2locale(locale));
        if (!propertyPrefix.endsWith(".")) {
            propertyPrefix = propertyPrefix + ".";
        }
        this.propertyPrefix = propertyPrefix;
    }

    @Override
    public void startup() throws Exception {
        super.startup();
        InputStream modelStream = MaryProperties.needStream(this.propertyPrefix + "model");
        InputStream posMapperStream = MaryProperties.getStream(this.propertyPrefix + "posMap");
        this.tagger = new POSTaggerME(new POSModel(modelStream));
        modelStream.close();
        if (posMapperStream != null) {
            String line;
            this.posMapper = new HashMap<String, String>();
            BufferedReader br = new BufferedReader(new InputStreamReader(posMapperStream, "UTF-8"));
            while ((line = br.readLine()) != null) {
                if (line.startsWith("#") || line.trim().equals("")) continue;
                StringTokenizer st = new StringTokenizer(line);
                String pos = st.nextToken();
                String gpos = st.nextToken();
                this.posMapper.put(pos, gpos);
            }
            posMapperStream.close();
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public MaryData process(MaryData d) throws Exception {
        Element sentence;
        Document doc = d.getDocument();
        NodeIterator sentenceIt = MaryDomUtils.createNodeIterator(doc, doc, "s");
        while ((sentence = (Element)sentenceIt.nextNode()) != null) {
            Element t;
            TreeWalker tokenIt = MaryDomUtils.createTreeWalker(sentence, "t");
            ArrayList<String> tokens = new ArrayList<String>();
            while ((t = (Element)tokenIt.nextNode()) != null) {
                tokens.add(MaryDomUtils.tokenText(t));
            }
            List<String> partsOfSpeech = null;
            OpenNLPPosTagger openNLPPosTagger = this;
            synchronized (openNLPPosTagger) {
                partsOfSpeech = this.tagger.tag(tokens);
            }
            tokenIt.setCurrentNode(sentence);
            Iterator<String> posIt = partsOfSpeech.iterator();
            while ((t = (Element)tokenIt.nextNode()) != null) {
                assert (posIt.hasNext());
                String pos = posIt.next();
                if (this.posMapper != null) {
                    String gpos = this.posMapper.get(pos);
                    if (gpos == null) {
                        this.logger.warn("POS map file incomplete: do not know how to map '" + pos + "'");
                    } else {
                        pos = gpos;
                    }
                }
                t.setAttribute("pos", pos);
            }
        }
        MaryData output = new MaryData(this.outputType(), d.getLocale());
        output.setDocument(doc);
        return output;
    }
}

