package eu.dnetlib.data.mapreduce.hbase.dedup.experiment;

import java.io.IOException;
import java.util.List;

import eu.dnetlib.data.mapreduce.util.DedupUtils;
import eu.dnetlib.data.mapreduce.util.OafDecoder;
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
import eu.dnetlib.data.proto.ResultProtos;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.NullWritable;

/**
 * Created by claudio on 22/04/16.
 */
public class PublicationAnalysisMapper extends TableMapper<NullWritable, NullWritable> {

	public static final String RESULT = "result";
	private static final int MAX_DESCRIPTIONS = 50;

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {
		super.setup(context);
	}

	@Override
	protected void map(final ImmutableBytesWritable key, final Result value, final Context context) throws IOException, InterruptedException {

		if (new String(key.copyBytes()).contains("dedup_wf")) {
			context.getCounter(RESULT, "roots").increment(1);
			return;
		}

		final byte[] body = value.getValue(RESULT.getBytes(), DedupUtils.BODY_B);
		if (body == null) {
			context.getCounter(RESULT, "missing body").increment(1);
			return;
		}
		final OafDecoder decoder = OafDecoder.decode(body);
		final ResultProtos.Result result = decoder.getEntity().getResult();
		if (result.getMetadata().getResulttype().getClassid().equals("dataset")) {
			context.getCounter(RESULT, "dataset").increment(1);
			return;
		} else {
			context.getCounter(RESULT, "publication").increment(1);
		}

		if (result.getMetadata().getDescriptionCount() > MAX_DESCRIPTIONS) {
			context.getCounter(RESULT, "abstracts > " + MAX_DESCRIPTIONS).increment(1);
		} else {
			context.getCounter(RESULT, "abstracts: " + result.getMetadata().getDescriptionCount()).increment(1);
		}

		final List<StringField> descList = result.getMetadata().getDescriptionList();

		boolean empty = true;
		for(StringField desc : descList) {
			empty = empty && StringUtils.isBlank(desc.getValue());
		}

		context.getCounter(RESULT, "empty abstract: " + empty).increment(1);
	}

	@Override
	protected void cleanup(final Context context) throws IOException, InterruptedException {
		super.cleanup(context);
	}
}
