package eu.dnetlib.data.mapreduce.hbase.bulktag;

import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import eu.dnetlib.data.bulktag.CommunityConfiguration;
import eu.dnetlib.data.bulktag.CommunityConfigurationFactory;
import eu.dnetlib.data.proto.FieldTypeProtos;
import eu.dnetlib.data.proto.OafProtos.Oaf;
import eu.dnetlib.data.proto.ResultProtos;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Writable;

import java.io.IOException;
import java.util.List;
import java.util.Map;

public class BulkTaggingMapper extends TableMapper<ImmutableBytesWritable, Writable> {

	private CommunityConfiguration cc;

	private ResultTagger tagger;
	private boolean enabled;

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {
		super.setup(context);

		final String conf = context.getConfiguration().get("tagging.conf");
		enabled = context.getConfiguration().getBoolean("tagging.enabled",false);
		if (StringUtils.isBlank(conf)) {
			throw new IllegalArgumentException("missing bulk tagging configuration");
		}
		System.out.println("conf = " + conf);
		cc = CommunityConfigurationFactory.fromJson(conf);
		tagger = new ResultTagger();
		tagger.setTrust(context.getConfiguration().get("bulktagging.trust", "0.85"));
	}

	@Override
	protected void map(final ImmutableBytesWritable key, final Result value, final Context context) throws IOException, InterruptedException {

		final Map<byte[], byte[]> resultMap = value.getFamilyMap(Bytes.toBytes("result"));

		final byte[] body = resultMap.get(Bytes.toBytes("body"));

		if (body != null) {
			context.getCounter("Bulk Tagging", "not null body ").increment(1);

			final Oaf oaf = tagger.enrichContext(Oaf.parseFrom(body), cc, context);
			if (oaf == null) {
				//context.getCounter("In mapper", " null oaf ").increment(1);
				return;
			}

			long tagged = oaf.getEntity().getResult().getMetadata().getContextList().stream()
					.flatMap(c -> c.getDataInfoList().stream())
					.map(FieldTypeProtos.DataInfo::getInferenceprovenance)
					.filter(infProv -> "bulktagging".equals(infProv))
					.count();
			context.getCounter("Bulk Tagging", " bulktagged ").increment(tagged);


			final Put put = new Put(key.copyBytes()).add(Bytes.toBytes("result"), Bytes.toBytes("body"), oaf.toByteArray());

			if(tagged > 0){
				if (enabled)
					context.write(key, put);
				context.getCounter("Bulk Tagging", " write op ").increment(1);
			}

		}
		else{
			context.getCounter("Bulk Tagging", " null body ").increment(1);
		}

	}


}
