package eu.dnetlib.data.mapreduce.hbase.dedup;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Map;

import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;

import com.google.protobuf.InvalidProtocolBufferException;
import com.googlecode.protobuf.format.JsonFormat;

import eu.dnetlib.data.mapreduce.JobParams;
import eu.dnetlib.data.mapreduce.util.DedupUtils;
import eu.dnetlib.data.mapreduce.util.OafDecoder;
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
import eu.dnetlib.data.proto.OafProtos.Oaf;
import eu.dnetlib.data.proto.PersonProtos.Person;
import eu.dnetlib.data.proto.TypeProtos.Type;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.gt.GTAuthor;

public class DedupFindRootsPersonMapper extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {

	private DedupConfig dedupConf;

	private ImmutableBytesWritable outKey;

	private ImmutableBytesWritable outValue;

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {
		dedupConf = DedupConfig.load(context.getConfiguration().get(JobParams.DEDUP_CONF));
		System.out.println("dedup findRoots mapper\nwf conf: " + dedupConf.toString());

		outKey = new ImmutableBytesWritable();
		outValue = new ImmutableBytesWritable();
	}

	@Override
	protected void map(final ImmutableBytesWritable rowkey, final Result value, final Context context) throws IOException, InterruptedException {
		// System.out.println("Find root mapping: " + new String(rowkey.copyBytes()));

		final OafRowKeyDecoder rkd = OafRowKeyDecoder.decode(rowkey.copyBytes());

		if (!Type.person.equals(rkd.getType())) {
			context.getCounter(rkd.getType().toString(), "skipped").increment(1);
		}

		final Type type = Type.valueOf(dedupConf.getWf().getEntityType());
		final Map<byte[], byte[]> similarRels = value.getFamilyMap(DedupUtils.getSimilarityCFBytes(type));

		if ((similarRels != null) && !similarRels.isEmpty()) {
			final ByteBuffer min = findMin(rowkey.copyBytes(), similarRels.keySet());

			final byte[] groupingKey = DedupUtils.newIdBytes(min, dedupConf.getWf().getDedupRun());

			final GTAuthor gta = asGTA(context, rowkey, value.getValue(Bytes.toBytes(dedupConf.getWf().getEntityType()), DedupUtils.BODY_B));

			emitBody(context, groupingKey, gta);
		} else {
			context.getCounter(dedupConf.getWf().getEntityType(), "row not in similarity mesh").increment(1);
		}
	}

	private GTAuthor asGTA(final Context context, final ImmutableBytesWritable rowkey, final byte[] input) {

		final OafDecoder decoder = OafDecoder.decode(input);
		final Oaf oaf = decoder.getOaf();

		final Person person = oaf.getEntity().getPerson();

		final GTAuthor gta = GTAuthor.fromOafJson(JsonFormat.printToString(person));
		final String id = new String(rowkey.copyBytes());
		gta.setId(id);
		gta.getAuthor().setId(id);
		return gta;
	}

	private ByteBuffer findMin(final byte[] key, final Iterable<byte[]> keys) {
		ByteBuffer bb = ByteBuffer.wrap(key);
		for (final byte[] q : keys) {
			final ByteBuffer iq = ByteBuffer.wrap(q);
			if (bb.compareTo(iq) > 0) {
				bb = iq;
			}
		}
		return bb;
	}

	private void emitBody(final Context context, final byte[] row, final GTAuthor gta) throws InvalidProtocolBufferException, IOException, InterruptedException {

		outKey.set(row);
		outValue.set(toOafByteArray(gta));

		context.write(outKey, outValue);
		context.getCounter(dedupConf.getWf().getEntityType(), "in").increment(1);
	}

	public byte[] toOafByteArray(final GTAuthor gta) {
		// final Oaf oaf = new GTAuthorMapper().map(gta);
		// return oaf.toByteArray();
		return Bytes.toBytes(gta.toString());
	}

}
