package eu.dnetlib.data.mapreduce.hbase.dedup;

import java.io.IOException;
import java.util.*;

import com.google.common.base.Function;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.protobuf.InvalidProtocolBufferException;
import eu.dnetlib.data.mapreduce.JobParams;
import eu.dnetlib.data.mapreduce.util.DedupUtils;
import eu.dnetlib.data.mapreduce.util.StreamUtils;
import eu.dnetlib.data.proto.OafProtos;
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
import eu.dnetlib.data.proto.TypeProtos.Type;
import eu.dnetlib.pace.clustering.NGramUtils;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.config.WfConfig;
import eu.dnetlib.pace.distance.PaceDocumentDistance;
import eu.dnetlib.pace.distance.eval.ScoreResult;
import eu.dnetlib.pace.model.*;
import eu.dnetlib.pace.util.BlockProcessor;
import eu.dnetlib.pace.util.Reporter;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;

import javax.annotation.Nullable;

public class DedupReducer extends TableReducer<Text, ImmutableBytesWritable, ImmutableBytesWritable> {

	private static final Log log = LogFactory.getLog(DedupReducer.class);

	private DedupConfig dedupConf;

	private ImmutableBytesWritable ibw;

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {

		dedupConf = DedupConfig.load(context.getConfiguration().get(JobParams.DEDUP_CONF));
		ibw = new ImmutableBytesWritable();

		log.info("dedup reduce phase \npace conf: " + dedupConf.toString());
	}

	@Override
	protected void reduce(final Text key, final Iterable<ImmutableBytesWritable> values, final Context context) {

		final Iterable<MapDocument> docs = Iterables.transform(values, new Function<ImmutableBytesWritable, MapDocument>() {
			@Nullable
			@Override
			public MapDocument apply(@Nullable ImmutableBytesWritable b) {
				return MapDocumentSerializer.decode(b.copyBytes());
			}
		});

		new BlockProcessor(dedupConf).process(key.toString(), docs, new Reporter() {

			@Override
			public void incrementCounter(String counterGroup, String counterName, long delta) {
				context.getCounter(counterGroup, counterName).increment(delta);
			}

			@Override
			public void emit(String type, String from, String to) {

				emitRel(context, type, from, to);
				emitRel(context, type, to, from);
			}

			private void emitRel(final Context context, final String type, final String from, final String to) {
				final Put put = new Put(Bytes.toBytes(from)).add(DedupUtils.getSimilarityCFBytes(type), Bytes.toBytes(to), Bytes.toBytes(""));
				put.setDurability(Durability.SKIP_WAL);
				ibw.set(Bytes.toBytes(from));
				try {
					context.write(ibw, put);
				} catch (IOException | InterruptedException e) {
					e.printStackTrace();
				}
			}
		});
	}

}
