package eu.dnetlib.data.mapreduce.hbase.broker.enrich;

import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;

import com.google.common.collect.Lists;
import com.google.common.collect.Streams;
import eu.dnetlib.data.mapreduce.hbase.broker.*;
import eu.dnetlib.data.mapreduce.hbase.broker.model.EventWrapper;
import eu.dnetlib.data.mapreduce.util.DedupUtils;
import eu.dnetlib.data.proto.OafProtos.Oaf;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.dom4j.DocumentException;

import static eu.dnetlib.data.mapreduce.util.OafHbaseUtils.getKey;

/**
 * Created by claudio on 08/07/16.
 */
public class EnrichmentReducer extends AbstractEnrichmentReducer {

	@Override
	protected String counterGroup() {
		return "Broker Enrichment";
	}

	@Override
	protected void reduce(final ImmutableBytesWritable key, final Iterable<ImmutableBytesWritable> values, final Context context) throws IOException,
			InterruptedException {

		try {
			generateEvents(Streams.stream(values)
					.limit(LIMIT)
					.map(EnrichmentReducer::toOaf)
					.collect(Collectors.toList()), context);
		} catch (final DocumentException e) {
			throw new RuntimeException(e);
		}
	}

	private void generateEvents(final List<Oaf> oafList, final Context context) throws IOException, InterruptedException, DocumentException {

		for (final Oaf current : oafList) {

			context.getCounter(counterGroup(), "entity type: " + current.getEntity().getResult().getMetadata().getResulttype().getClassid()).increment(1);

			final String currentId = current.getEntity().getId();

			final String currentDsId = StringUtils.substringAfter(getKey(current.getEntity().getCollectedfromList()), "|");
			final String currentDsType = dsTypeMap.get(currentDsId);

			// System.out.println(String.format("'%s' -> '%s'", currentDsId, currentDsType));

			if (StringUtils.isBlank(currentDsType) && !dsWhitelist.contains(currentDsId)) {
				context.getCounter("events skipped", "datasource type excluded").increment(1);
			} else {
				if (dsBlacklist.contains(currentDsId)) {
					context.getCounter("events skipped", "datasource blacklisted").increment(1);
				} else {

					final List<EventWrapper> events = Lists.newArrayList();
					for (final Oaf other : oafList) {

						final String otherId = other.getEntity().getId();
						if (!currentId.equals(otherId)) {

							final double similarity = similarity(current, other);

							if (similarity >= dedupConf.getWf().getThreshold()) {

								final float trust = scale(similarity);
								if (!DedupUtils.isRoot(currentId) && !DedupUtils.isRoot(otherId)) {
									events.addAll(PIDEventFactory.process(current, other, trust));
									events.addAll(OAVersionEventFactory.process(current, other, trust, untrustedOaDsList));
									events.addAll(AbstractEventFactory.process(current, other, trust));
									events.addAll(PublicationDateEventFactory.process(current, other, trust));
									events.addAll(OrcidEventFactory.process(current, other, trust));
								}

								events.addAll(SubjectEventFactory.process(context, current, other, trust));
							} else {
								context.getCounter(counterGroup(), "d < " + dedupConf.getWf().getThreshold()).increment(1);
							}

						} else if (oafList.size() == 1) {
							events.addAll(SubjectEventFactory.process(context, current));
						}
					}
					emit(events, context);
				}
			}
		}
	}

}
