/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.data.mapreduce.hbase.dedup.experiment;

import com.google.common.base.Function;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import eu.dnetlib.data.mapreduce.hbase.dedup.experiment.CsvEntry;
import eu.dnetlib.data.mapreduce.hbase.dedup.experiment.CsvSerialiser;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class JoinPersonGroupReducer
extends Reducer<Text, Text, Text, Text> {
    private static final Log log = LogFactory.getLog(JoinPersonGroupReducer.class);
    private Text tKey;
    private Text tValue;
    private static final int MIN_ENTRIES_THRESHOLD = 1;
    private int minEntriesThreshold;
    private static final int MAX_ENTRIES_THRESHOLD = Integer.MAX_VALUE;
    private int maxEntriesThreshold;
    private static final int MAX_FEATURES_THRESHOLD = Integer.MAX_VALUE;
    private int maxFeaturesThreshold;
    private Set<String> knownHashValues = Sets.newHashSet();
    private boolean passAll = false;

    protected void setup(Reducer.Context context) throws IOException, InterruptedException {
        super.setup(context);
        this.tKey = new Text("");
        this.tValue = new Text();
        this.minEntriesThreshold = context.getConfiguration().getInt("min.entries.threshold", 1);
        this.maxEntriesThreshold = context.getConfiguration().getInt("max.entries.threshold", Integer.MAX_VALUE);
        this.maxFeaturesThreshold = context.getConfiguration().getInt("max.features.threshold", Integer.MAX_VALUE);
        String hashCsv = context.getConfiguration().get("hash.values.csv", "");
        log.info((Object)("hash csv: " + hashCsv));
        if (hashCsv.contains("ALL")) {
            this.passAll = true;
        }
        for (String hash : Splitter.on((String)",").omitEmptyStrings().trimResults().split((CharSequence)hashCsv)) {
            this.knownHashValues.add(hash);
        }
    }

    protected void reduce(Text key, Iterable<Text> values, Reducer.Context context) throws IOException, InterruptedException {
        CsvSerialiser csvSerialiser = new CsvSerialiser(this.maxEntriesThreshold, this.maxFeaturesThreshold);
        String outKey = key.toString().replaceAll("[^a-zA-Z ]", "").toLowerCase();
        if (!this.passAll && !this.knownHashValues.contains(outKey)) {
            return;
        }
        if (StringUtils.isBlank((String)outKey)) {
            context.getCounter("person", "blank key").increment(1L);
            return;
        }
        ArrayList entries = Lists.newArrayList((Iterable)Iterables.transform(values, (Function)new Function<Text, CsvEntry>(){

            public CsvEntry apply(Text t) {
                return CsvEntry.fromJson(t.toString());
            }
        }));
        this.trackPersonInfo(entries.size(), context, "person");
        if (entries.size() < this.minEntriesThreshold || entries.size() > this.maxEntriesThreshold) {
            return;
        }
        if (!this.passAll) {
            context.getCounter("person hash", outKey).increment((long)entries.size());
        }
        this.tValue.set(csvSerialiser.asCSV(entries));
        context.write((Object)this.tKey, (Object)this.tValue);
        context.getCounter("person", "csv").increment(1L);
    }

    private void trackPersonInfo(int count, Reducer.Context context, String counterName) {
        if (count > 0 && count <= 10) {
            context.getCounter(counterName, count + "").increment(1L);
            return;
        }
        if (count > 10 && count <= 20) {
            context.getCounter(counterName, "[10, 20)").increment(1L);
            return;
        }
        if (count > 20 && count <= 30) {
            context.getCounter(counterName, "[20, 30)").increment(1L);
            return;
        }
        if (count > 30 && count <= 40) {
            context.getCounter(counterName, "[30, 40)").increment(1L);
            return;
        }
        if (count > 40 && count <= 50) {
            context.getCounter(counterName, "[40, 50)").increment(1L);
            return;
        }
        if (count > 50 && count <= 70) {
            context.getCounter(counterName, "[50, 70)").increment(1L);
            return;
        }
        if (count > 70 && count <= 100) {
            context.getCounter(counterName, "[70, 100)").increment(1L);
            return;
        }
        if (count > 100 && count <= 150) {
            context.getCounter(counterName, "[100, 150)").increment(1L);
            return;
        }
        if (count > 150 && count <= 200) {
            context.getCounter(counterName, "[150, 200)").increment(1L);
            return;
        }
        if (count > 200) {
            context.getCounter(counterName, "[200, *)").increment(1L);
            return;
        }
    }

    public void cleanup(Reducer.Context context) throws IOException, InterruptedException {
        super.cleanup(context);
    }
}

