/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.data.mapreduce.hbase.dedup;

import com.google.protobuf.GeneratedMessage;
import eu.dnetlib.data.mapreduce.util.DedupUtils;
import eu.dnetlib.data.mapreduce.util.OafDecoder;
import eu.dnetlib.data.proto.OafProtos;
import eu.dnetlib.data.proto.TypeProtos;
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.config.WfConfig;
import eu.dnetlib.pace.model.Field;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.model.ProtoDocumentBuilder;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobTracker;
import org.apache.hadoop.mapreduce.Mapper;

public class DedupMapper
extends TableMapper<Text, ImmutableBytesWritable> {
    private static final Log log = LogFactory.getLog(DedupMapper.class);
    private DedupConfig dedupConf;
    private Text outKey;
    private ImmutableBytesWritable ibw;

    protected void setup(Mapper.Context context) throws IOException, InterruptedException {
        String dedupConfJson = context.getConfiguration().get("dedup.conf");
        log.info((Object)("pace conf: " + dedupConfJson));
        this.dedupConf = DedupConfig.load((String)dedupConfJson);
        log.debug((Object)("wf conf: " + this.dedupConf.toString()));
        this.outKey = new Text();
        this.ibw = new ImmutableBytesWritable();
    }

    protected void map(ImmutableBytesWritable keyIn, Result result, Mapper.Context context) throws IOException, InterruptedException {
        WfConfig wf = this.dedupConf.getWf();
        byte[] body = result.getValue(wf.getEntityType().getBytes(), DedupUtils.BODY_B);
        if (body != null) {
            OafDecoder decoder = OafDecoder.decode((byte[])body);
            if (decoder.getOaf().getDataInfo().getDeletedbyinference()) {
                context.getCounter(wf.getEntityType(), "deleted by inference").increment(1L);
                return;
            }
            OafProtos.OafEntity entity = decoder.getEntity();
            context.getCounter(entity.getType().toString(), "decoded").increment(1L);
            if (entity.getType().equals((Object)TypeProtos.Type.valueOf((String)wf.getEntityType()))) {
                MapDocument doc = ProtoDocumentBuilder.newInstance((String)Bytes.toString((byte[])keyIn.copyBytes()), (GeneratedMessage)entity, (List)this.dedupConf.getPace().getModel());
                context.getCounter(entity.getType().toString(), "converted as MapDocument").increment(1L);
                if (wf.hasSubType()) {
                    Map fields = doc.getFieldMap();
                    if (!fields.containsKey(wf.getSubEntityType())) {
                        throw new JobTracker.IllegalStateException(String.format("model map does not contain field %s", wf.getSubEntityType()));
                    }
                    String subType = ((Field)fields.get(wf.getSubEntityType())).stringValue();
                    if (wf.getSubEntityValue().equalsIgnoreCase(subType)) {
                        context.getCounter(subType, "converted as MapDocument").increment(1L);
                        this.emitNGrams(context, doc, BlacklistAwareClusteringCombiner.filterAndCombine((MapDocument)doc, (Config)this.dedupConf));
                    } else {
                        context.getCounter(subType, "ignored").increment(1L);
                    }
                } else {
                    this.emitNGrams(context, doc, BlacklistAwareClusteringCombiner.filterAndCombine((MapDocument)doc, (Config)this.dedupConf));
                }
            }
        } else {
            context.getCounter(wf.getEntityType(), "missing body").increment(1L);
        }
    }

    private void emitNGrams(Mapper.Context context, MapDocument doc, Collection<String> ngrams) throws IOException, InterruptedException {
        for (String ngram : ngrams) {
            this.outKey.set(ngram);
            this.ibw.set(doc.toByteArray());
            context.write((Object)this.outKey, (Object)this.ibw);
        }
    }
}

