package gate.creole;

import gate.Annotation;
import gate.AnnotationSet;
import gate.creole.gazetteer.Gazetteer;
import gate.creole.gazetteer.GazetteerList;
import gate.creole.gazetteer.GazetteerNode;
import gate.creole.gazetteer.Lookup;
import gate.creole.orthomatcher.OrthoMatcherRule;
import gate.util.GateRuntimeException;
import gate.util.InvalidOffsetException;
import gate.util.Out;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;

/* loaded from: input_file:gate/creole/GazetteerListsCollector.class */
public class GazetteerListsCollector extends AbstractLanguageAnalyser {
    private static String PERSON_ANNOT_NAME = "PER";
    private AnnotationSet allAnnots;
    private List annotationTypes;
    private Gazetteer gazetteer;
    private String markupSetName = OrthoMatcherRule.description;
    private String theLanguage = OrthoMatcherRule.description;
    private HashMap statsPerType = new HashMap();

    @Override // gate.creole.AbstractProcessingResource, gate.Executable
    public void execute() throws ExecutionException {
        this.statsPerType = new HashMap();
        if (this.document == null) {
            throw new ExecutionException("No document to process!");
        }
        if (this.gazetteer == null) {
            throw new ExecutionException("No gazetteer set!");
        }
        if (this.annotationTypes == null || this.annotationTypes.isEmpty()) {
            Out.prln("Gazetteer Lists Collector Warning: No annotation types given for processing");
            return;
        }
        if (this.markupSetName == null || this.markupSetName.equals(OrthoMatcherRule.description)) {
            this.allAnnots = this.document.getAnnotations();
        } else {
            this.allAnnots = this.document.getAnnotations(this.markupSetName);
        }
        if (this.allAnnots == null || this.allAnnots.isEmpty()) {
            Out.prln("Gazetteer Lists Collector Warning: No annotations found for processing");
            return;
        }
        for (int i = 0; i < this.annotationTypes.size(); i++) {
            AnnotationSet annotationSet = this.allAnnots.get((String) this.annotationTypes.get(i));
            if (annotationSet != null && !annotationSet.isEmpty()) {
                this.statsPerType.put(this.annotationTypes.get(i), new HashMap());
                collectLists(annotationSet, (String) this.annotationTypes.get(i));
            }
        }
        printStats();
        Map listsByNode = this.gazetteer.getLinearDefinition().getListsByNode();
        Iterator it = listsByNode.keySet().iterator();
        while (it.hasNext()) {
            GazetteerList gazetteerList = (GazetteerList) listsByNode.get(it.next());
            try {
                if (gazetteerList.isModified()) {
                    gazetteerList.store();
                }
            } catch (ResourceInstantiationException e) {
                throw new GateRuntimeException(e.getMessage());
            }
        }
    }

    public void setMarkupASName(String str) {
        this.markupSetName = str;
    }

    public String getMarkupASName() {
        return this.markupSetName;
    }

    public List getAnnotationTypes() {
        return this.annotationTypes;
    }

    public void setAnnotationTypes(List list) {
        this.annotationTypes = list;
    }

    public Gazetteer getGazetteer() {
        return this.gazetteer;
    }

    public void setGazetteer(Gazetteer gazetteer) {
        this.gazetteer = gazetteer;
    }

    public void setTheLanguage(String str) {
        this.theLanguage = str;
    }

    public String getTheLanguage() {
        return this.theLanguage;
    }

    protected void collectLists(AnnotationSet annotationSet, String str) {
        String str2 = OrthoMatcherRule.description;
        GazetteerList gazetteerList = null;
        Iterator it = this.gazetteer.getLinearDefinition().getListsByNode().values().iterator();
        while (it.hasNext() && str2.equals(OrthoMatcherRule.description)) {
            gazetteerList = (GazetteerList) it.next();
            if (gazetteerList.getURL().toExternalForm().endsWith(str + ".lst")) {
                str2 = gazetteerList.getURL().toExternalForm();
            }
        }
        for (Annotation annotation : annotationSet) {
            ArrayList arrayList = new ArrayList();
            try {
                String obj = this.document.getContent().getContent(annotation.getStartNode().getOffset(), annotation.getEndNode().getOffset()).toString();
                StringTokenizer stringTokenizer = new StringTokenizer(obj, "\n\r.|();-?!\t", false);
                while (stringTokenizer.hasMoreTokens()) {
                    arrayList.add(stringTokenizer.nextToken());
                }
                String replace = obj.replace('\r', ' ').replace('\n', ' ').replace('\t', ' ');
                if (((HashMap) this.statsPerType.get(str)).containsKey(replace)) {
                    ((HashMap) this.statsPerType.get(str)).put(replace, new Integer(((Integer) ((HashMap) this.statsPerType.get(str)).get(replace)).intValue() + 1));
                } else {
                    ((HashMap) this.statsPerType.get(str)).put(replace, new Integer(1));
                }
                if (arrayList.size() > 1) {
                    for (int i = 0; i < arrayList.size(); i++) {
                        String str3 = (String) arrayList.get(i);
                        if (((HashMap) this.statsPerType.get(str)).containsKey(str3)) {
                            ((HashMap) this.statsPerType.get(str)).put(str3, new Integer(((Integer) ((HashMap) this.statsPerType.get(str)).get(str3)).intValue() + 1));
                        } else {
                            ((HashMap) this.statsPerType.get(str)).put(str3, new Integer(1));
                        }
                    }
                }
                Set lookup = this.gazetteer.lookup(replace);
                if (lookup == null || lookup.size() <= 0) {
                    this.gazetteer.add(replace, new Lookup(str2, str, "inferred", this.theLanguage));
                    gazetteerList.add(new GazetteerNode(replace));
                    if (str.equals(PERSON_ANNOT_NAME) && arrayList.size() > 1) {
                        for (int i2 = 0; i2 < arrayList.size(); i2++) {
                            String str4 = (String) arrayList.get(i2);
                            Set lookup2 = this.gazetteer.lookup(str4);
                            if ((lookup2 == null || lookup2.size() <= 0) && str4.length() >= 3) {
                                this.gazetteer.add(str4, new Lookup(str2, str, "inferred", this.theLanguage));
                                gazetteerList.add(new GazetteerNode(str4));
                            }
                        }
                    }
                }
            } catch (InvalidOffsetException e) {
                throw new GateRuntimeException(e.getMessage());
            }
        }
    }

    protected void printStats() {
        for (int i = 0; i < this.annotationTypes.size(); i++) {
            try {
                if (this.statsPerType.containsKey(this.annotationTypes.get(i))) {
                    BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(this.annotationTypes.get(i) + ".stats.lst"), "UTF-8"));
                    HashMap hashMap = (HashMap) this.statsPerType.get(this.annotationTypes.get(i));
                    for (String str : hashMap.keySet()) {
                        bufferedWriter.write(str);
                        bufferedWriter.write("$");
                        bufferedWriter.write(((Integer) hashMap.get(str)).toString());
                        bufferedWriter.newLine();
                    }
                    bufferedWriter.close();
                }
            } catch (IOException e) {
                throw new RuntimeException(e.getMessage());
            }
        }
    }

    protected boolean alreadyPresentInGazetteer(String str) {
        return false;
    }
}
