package eu.dnetlib.data.mapreduce.hbase.broker.enrich;

import java.io.IOException;
import java.math.BigDecimal;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;

import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.protobuf.InvalidProtocolBufferException;
import eu.dnetlib.data.mapreduce.Algorithms;
import eu.dnetlib.data.mapreduce.JobParams;
import eu.dnetlib.data.mapreduce.hbase.broker.model.Event;
import eu.dnetlib.data.mapreduce.hbase.broker.model.EventWrapper;
import eu.dnetlib.data.proto.OafProtos.Oaf;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.distance.PaceDocumentDistance;
import eu.dnetlib.pace.distance.eval.ScoreResult;
import eu.dnetlib.pace.model.MapDocument;
import eu.dnetlib.pace.model.ProtoDocumentBuilder;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.math.util.MathUtils;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.FilterList.Operator;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import static eu.dnetlib.data.mapreduce.util.OafHbaseUtils.getPropertyValues;

/**
 * Created by claudio on 20/02/2017.
 */
public abstract class AbstractEnrichmentReducer extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable, Text, Text> {

	protected DedupConfig dedupConf;

	protected static final int LIMIT = 1000;

	protected static final int SCORE_DECIMALS = 2;

	protected Map<String, String> dsTypeMap = Maps.newHashMap();

	protected Set<String> dsWhitelist = Sets.newHashSet();

	protected Set<String> dsBlacklist = Sets.newHashSet();

	// This is for EuropePMC. They expose OA abstracts, but we want to identify real OA publications. WTF.
	protected Set<String> untrustedOaDsList = Sets.newHashSet();

	// White list for datasource typologies.
	protected Set<String> dsTypeWhitelist = Sets.newHashSet();

	protected Text tKey = new Text("");

	/**
	 * lower bound for trust scaling
	 */
	protected double scaleLB;

	protected abstract String counterGroup();

	protected Map<String, String> baseUrlMap = Maps.newHashMap();

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {
		super.setup(context);

		System.out.println("LIMIT: " + LIMIT);

		dsWhitelist.addAll(getPropertyValues(context, "broker.datasource.id.whitelist"));
		dsBlacklist.addAll(getPropertyValues(context, "broker.datasource.id.blacklist"));
		dsTypeWhitelist.addAll(getPropertyValues(context, "broker.datasource.type.whitelist"));
		untrustedOaDsList.addAll(getPropertyValues(context, "broker.datasource.untrusted.oa.list"));

		dsTypeMap = getDsTypeMap(context, dsTypeWhitelist);

		System.out.println("datasource whitelist: " + dsWhitelist);
		System.out.println("datasource blacklist: " + dsBlacklist);
		System.out.println("datasource OA list: " + untrustedOaDsList);

		System.out.println("datasource type whitelist: " + dsTypeWhitelist);

		final String dedupConfJson = context.getConfiguration().get(JobParams.DEDUP_CONF);

		System.out.println("got dedup conf: " + dedupConfJson);

		dedupConf = DedupConfig.load(dedupConfJson);

		System.out.println("parsed dedup conf: " + dedupConf.toString());

		scaleLB = dedupConf.getWf().getThreshold() - 0.01;

		baseUrlMap.put("publication", context.getConfiguration().get("broker.baseurl.publication", "%s"));
		baseUrlMap.put("dataset", context.getConfiguration().get("broker.baseurl.datset", "%s"));
		baseUrlMap.put("software", context.getConfiguration().get("broker.baseurl.software", "%s"));
		baseUrlMap.put("other", context.getConfiguration().get("broker.baseurl.other", "%s"));
	}

	protected Map<String, String> getDsTypeMap(final Context context, final Set<String> dsTypeWhitelist) throws IOException {
		System.out.println("loading datasource typology mapping");

		final Map<String, String> dsTypeMap = Maps.newHashMap();

		final Scan scan = new Scan();
		final FilterList fl = new FilterList(Operator.MUST_PASS_ALL);
		fl.addFilter(new PrefixFilter(Bytes.toBytes("10")));
		scan.setFilter(fl);
		scan.addFamily(Bytes.toBytes("datasource"));

		final String tableName = context.getConfiguration().get("hbase.mapred.inputtable");

		System.out.println(String.format("table name: '%s'", tableName));

		try (final HTable table = new HTable(context.getConfiguration(), tableName); final ResultScanner res = table.getScanner(scan)) {

			for (final Result r : res) {
				final byte[] b = r.getValue(Bytes.toBytes("datasource"), Bytes.toBytes("body"));
				if (b != null) {
					final Oaf oaf = Oaf.parseFrom(b);
					final String dsId = StringUtils.substringAfter(oaf.getEntity().getId(), "|");
					final String dsType = oaf.getEntity().getDatasource().getMetadata().getDatasourcetype().getClassid();

					if (dsTypeWhitelist.contains(dsType)) {
						//System.out.println(String.format("dsId '%s', dsType '%s'", dsId, dsType));
						dsTypeMap.put(dsId, dsType);
					}
				}
			}
		}
		System.out.println("datasource type map size: " + dsTypeMap.size());
		return dsTypeMap;
	}

	protected void emit(final List<EventWrapper> events, final Context context) {
		events.stream().filter(Objects::nonNull).forEach(eventWrapper -> {
			try {
				final Event event = eventWrapper.asBrokerEvent();
				final String json = event.toJson();
				final Text valueout = new Text(json);
				context.write(tKey, valueout);
				context.getCounter(counterGroup(), eventWrapper.getCounterName()).increment(1);
			} catch (Exception e) {
				throw new RuntimeException(e);
			}
		});
	}

	protected double similarity(final Oaf oa, final Oaf ob) {

		final MapDocument a = ProtoDocumentBuilder.newInstance(oa.getEntity().getId(), oa.getEntity(), dedupConf.getPace().getModel());
		final MapDocument b = ProtoDocumentBuilder.newInstance(ob.getEntity().getId(), ob.getEntity(), dedupConf.getPace().getModel());

		final ScoreResult sr =  new PaceDocumentDistance().between(a, b, dedupConf);
		return sr.getScore();
	}

	protected float scale(final double d) {
		final float score = (float) Algorithms.scale(d, scaleLB, 1, 0, 1);

		return MathUtils.round(score, SCORE_DECIMALS, BigDecimal.ROUND_HALF_DOWN);
	}

	public static Function<ImmutableBytesWritable, Oaf> oafDeserialiser() {
		return p -> {
			try {
				return Oaf.parseFrom(p.copyBytes());
			} catch (final InvalidProtocolBufferException e) {
				throw new IllegalArgumentException(e);
			}
		};
	}

	public static Oaf toOaf(ImmutableBytesWritable p) {
		try {
			return Oaf.parseFrom(p.copyBytes());
		} catch (final InvalidProtocolBufferException e) {
			throw new IllegalArgumentException(e);
		}
	}

}
