/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.data.mapreduce.hbase.dedup.experiment;

import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.data.mapreduce.hbase.dedup.experiment.CsvEntry;
import eu.dnetlib.data.mapreduce.hbase.dedup.experiment.SubjectParser;
import eu.dnetlib.data.mapreduce.hbase.dedup.experiment.Subjects;
import eu.dnetlib.data.mapreduce.hbase.dedup.experiment.SubjectsMap;
import eu.dnetlib.data.proto.TypeProtos;
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
import eu.dnetlib.pace.model.Person;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

public class JoinPersonGroupMapper
extends Mapper<Text, Text, Text, Text> {
    private static final String SUBJECT_PREFIX = "subject.";
    private static final String COAUTHOR_PREFIX = "coauthor.";
    public static final String PERSON = "person";
    private static final int MAX_TOKENS = 5;
    private static final int MIN_FEATURES = 10;
    private Text outKey;
    private Text outValue;
    private SubjectParser sp;

    protected void setup(Mapper.Context context) throws IOException, InterruptedException {
        this.outKey = new Text();
        this.outValue = new Text();
        this.sp = new SubjectParser();
    }

    protected void map(Text key, Text value, Mapper.Context context) throws IOException, InterruptedException {
        SAXReader r = new SAXReader();
        try {
            Document doc = r.read((Reader)new StringReader(value.toString()));
            SubjectsMap sm = this.sp.parse(doc);
            CsvEntry entry = new CsvEntry();
            for (Subjects subs : sm.values()) {
                for (String subject : subs) {
                    String s = SUBJECT_PREFIX + this.cleanup(subject);
                    entry.addFeature("\"" + s + "\"");
                }
            }
            List<Person> authors = this.getAuthors(doc);
            String title = this.getTitle(doc);
            String pubId = this.getId(doc);
            for (Person p1 : authors) {
                context.getCounter(PERSON, "accurate " + p1.isAccurate()).increment(1L);
                Set<String> hashes = this.getOutKeys(p1);
                context.getCounter(PERSON, String.format("accurate %s keys", p1.isAccurate())).increment((long)hashes.size());
                for (String s1 : hashes) {
                    CsvEntry c = new CsvEntry(s1, entry.getFeatures());
                    for (Person p2 : authors) {
                        String s2 = this.normalize(p2.getSurnameString());
                        if (!p1.isAccurate() || !p2.isAccurate() || p1.getSurnameString().equalsIgnoreCase(p2.getSurnameString())) continue;
                        c.addFeature("\"coauthor." + s2.replaceAll("\"", "").replaceAll("\\s+", "_") + "\"");
                    }
                    String prefix = StringUtils.substringBefore((String)pubId, (String)"::");
                    String originalId = StringUtils.substringAfter((String)pubId, (String)"::");
                    c.setId(this.getId(prefix, originalId, p1.getOriginal()));
                    c.setOriginalName(p1.getOriginal());
                    c.setTitle(title);
                    c.getFeatures().remove(s1);
                    if (s1.length() <= 3) {
                        context.getCounter(PERSON, "key size <= 3").increment(1L);
                        return;
                    }
                    if (c.getFeatures().size() < 10) {
                        context.getCounter(PERSON, "features < 10").increment(1L);
                        return;
                    }
                    this.outKey.set(s1);
                    this.outValue.set(c.toString());
                    context.write((Object)this.outKey, (Object)this.outValue);
                }
            }
        }
        catch (Throwable e) {
            System.out.println("GOT EX " + e);
            e.printStackTrace(System.err);
            context.getCounter(PERSON, e.getClass().toString()).increment(1L);
        }
    }

    protected String getId(String nsPrefix, String originalId, String name) {
        String localId = name.replaceAll("\\s+", " ").trim();
        return AbstractDNetXsltFunctions.oafId((String)TypeProtos.Type.person.toString(), (String)nsPrefix, (String)(originalId + "::" + localId));
    }

    private String cleanup(String s) {
        return s.replaceAll(" ", "_").replaceAll("\\.", "_").replaceAll("\"", "");
    }

    private String getId(Document doc) {
        return doc.valueOf("//*[local-name() = 'objIdentifier']/text()");
    }

    private List<Person> getAuthors(Document doc) {
        List creatorNodes = doc.selectNodes("//*[local-name() = 'creator']");
        ArrayList authors = Lists.newArrayList();
        for (int i = 0; i < creatorNodes.size(); ++i) {
            Element e = (Element)creatorNodes.get(i);
            authors.add(new Person(e.getText(), false));
        }
        return authors;
    }

    private String getTitle(Document doc) {
        List titleNodes = doc.selectNodes("//*[local-name() = 'title']");
        if (titleNodes != null && titleNodes.size() > 0) {
            Element titleNode = (Element)titleNodes.get(0);
            return titleNode.getText().replaceAll(",", "");
        }
        return "";
    }

    private Set<String> getOutKeys(Person p1) {
        HashSet hashes = Sets.newHashSet();
        if (p1.isAccurate()) {
            for (String name : p1.getName()) {
                hashes.add(this.normalize(p1.getSurnameString() + this.firstLC(name)));
            }
        } else {
            String s = this.normalize(p1.getOriginal());
            for (String token1 : this.tokens(s)) {
                for (String token2 : this.tokens(s)) {
                    if (token1.equals(token2)) continue;
                    hashes.add(this.firstLC(token1) + token2);
                }
            }
        }
        return hashes;
    }

    private String normalize(Person p) {
        String s = p.getSurnameString() + this.firstLC(p.getNameString());
        return this.normalize(s);
    }

    private String normalize(String s) {
        return s.replaceAll("[^a-zA-Z ]", "").toLowerCase().trim();
    }

    private Iterable<String> tokens(String s) {
        return Iterables.limit((Iterable)Splitter.on((String)" ").omitEmptyStrings().trimResults().split((CharSequence)s), (int)5);
    }

    private String firstLC(String s) {
        return StringUtils.substring((String)s, (int)0, (int)1).toLowerCase();
    }
}

