/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.data.mapreduce.hbase.dedup;

import com.google.common.collect.Maps;
import com.google.protobuf.GeneratedMessage;
import eu.dnetlib.data.mapreduce.util.DedupUtils;
import eu.dnetlib.data.mapreduce.util.OafDecoder;
import eu.dnetlib.data.proto.OafProtos;
import eu.dnetlib.data.proto.TypeProtos;
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.model.ProtoDocumentBuilder;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class DedupMapper
extends TableMapper<Text, ImmutableBytesWritable> {
    private static final Log log = LogFactory.getLog(DedupMapper.class);
    private DedupConfig dedupConf;
    private Map<String, List<String>> blackListMap = Maps.newHashMap();
    private Text outKey;
    private ImmutableBytesWritable ibw;

    protected void setup(Mapper.Context context) throws IOException, InterruptedException {
        String dedupConfJson = context.getConfiguration().get("dedup.conf");
        log.info((Object)"pace conf strings");
        log.info((Object)("pace conf: " + dedupConfJson));
        this.dedupConf = DedupConfig.load((String)dedupConfJson);
        this.blackListMap = this.dedupConf.getPace().getBlacklists();
        this.outKey = new Text();
        this.ibw = new ImmutableBytesWritable();
        log.info((Object)"pace conf");
        log.info((Object)("entity type: " + this.dedupConf.getWf().getEntityType()));
        log.info((Object)("clustering: " + this.dedupConf.getPace().getClustering()));
        log.info((Object)("conditions: " + this.dedupConf.getPace().getConditions()));
        log.info((Object)("fields: " + this.dedupConf.getPace().getModel()));
        log.info((Object)("blacklists: " + this.blackListMap));
        log.info((Object)("wf conf: " + this.dedupConf.toString()));
    }

    protected void map(ImmutableBytesWritable keyIn, Result result, Mapper.Context context) throws IOException, InterruptedException {
        byte[] body = result.getValue(this.dedupConf.getWf().getEntityType().getBytes(), DedupUtils.BODY_B);
        if (body != null) {
            OafDecoder decoder = OafDecoder.decode((byte[])body);
            if (decoder.getOaf().getDataInfo().getDeletedbyinference()) {
                context.getCounter(this.dedupConf.getWf().getEntityType(), "deleted by inference").increment(1L);
                return;
            }
            OafProtos.OafEntity entity = decoder.getEntity();
            context.getCounter(entity.getType().toString(), "decoded").increment(1L);
            if (entity.getType().equals((Object)TypeProtos.Type.valueOf((String)this.dedupConf.getWf().getEntityType())) && entity.getType().equals((Object)TypeProtos.Type.result) && entity.getResult().getMetadata().getResulttype().getClassid().equals("publication")) {
                MapDocument doc = ProtoDocumentBuilder.newInstance((String)Bytes.toString((byte[])keyIn.copyBytes()), (GeneratedMessage)entity, (List)this.dedupConf.getPace().getModel());
                this.emitNGrams(context, doc, BlacklistAwareClusteringCombiner.filterAndCombine((MapDocument)doc, (Config)this.dedupConf, this.blackListMap));
            }
        } else {
            context.getCounter(this.dedupConf.getWf().getEntityType(), "missing body").increment(1L);
        }
    }

    private void emitNGrams(Mapper.Context context, MapDocument doc, Collection<String> ngrams) throws IOException, InterruptedException {
        for (String ngram : ngrams) {
            this.outKey.set(ngram);
            this.ibw.set(doc.toByteArray());
            context.write((Object)this.outKey, (Object)this.ibw);
        }
    }
}

