package eu.dnetlib.data.mapreduce.hbase.dedup;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;

import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;

import eu.dnetlib.data.mapreduce.JobParams;
import eu.dnetlib.data.mapreduce.util.DedupUtils;
import eu.dnetlib.data.proto.TypeProtos.Type;
import eu.dnetlib.pace.config.DedupConfig;

public class DedupGrouperMapper extends TableMapper<Text, Put> {

	public static final String COUNTER_GROUP = "dedup.grouper";

	public static final String COUNTER_NAME = "written.rels";

	private Text rowKey;

	private DedupConfig dedupConf;

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {
		rowKey = new Text();

		dedupConf = DedupConfig.load(context.getConfiguration().get(JobParams.DEDUP_CONF));
	}

	@Override
	protected void map(final ImmutableBytesWritable keyIn, final Result value, final Context context) throws IOException, InterruptedException {

		final byte[] family = DedupUtils.getSimilarityCFBytes(Type.valueOf(dedupConf.getWf().getEntityType()));
		final byte[] cellValue = Bytes.toBytes("");

		final List<KeyValue> kvList = value.list();
		// System.out.println("Grouper mapping " + kvList.size() + " rels for key: " + new String(keyIn.copyBytes()));

		for (final KeyValue n : kvList) {
			for (final KeyValue j : kvList) {

				final byte[] nq = n.getQualifier();
				final byte[] jq = j.getQualifier();

				if (!Arrays.equals(nq, jq)) {

					final Put put = new Put(nq).add(family, jq, cellValue);
					put.setDurability(Durability.USE_DEFAULT);
					rowKey.set(nq);
					context.write(rowKey, put);

					context.getCounter(COUNTER_GROUP, COUNTER_NAME).increment(1);
				}
			}
		}
	}

}
