package eu.dnetlib.data.mapreduce.hbase.openorgs;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import com.google.protobuf.InvalidProtocolBufferException;

import eu.dnetlib.data.proto.OafProtos.Oaf;
import eu.dnetlib.data.proto.OafProtos.OafEntity;

public class GenerateSimilaritiesReducer extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable, NullWritable, Text> {

	private final Text valueOut = new Text();

	@Override
	protected void reduce(final ImmutableBytesWritable key, final Iterable<ImmutableBytesWritable> values, final Context context)
			throws IOException, InterruptedException {

		try {
			final List<OafEntity> list = new ArrayList<>();

			for (final ImmutableBytesWritable ibw : values) {
				list.add(Oaf.parseFrom(ibw.get()).getEntity());
			}

			if (list.size() < 2) { return; }

			final List<String> mainIds = findMainIds(OpenOrgsCommon.OPENORGS_MAIN_PREFIX, list);

			if (mainIds.size() > 10) {
				context.getCounter("organization", "groups with n of openOrgsID > 10").increment(1);
			} else {
				context.getCounter("organization", "groups with n of openOrgsID = " + mainIds.size()).increment(1);
			}

			for (final String mainId : mainIds) {
				if (StringUtils.isNotBlank(mainId)) {
					for (final OafEntity o : list) {
						if (!o.getOriginalIdList().contains(mainId)) {
							context.getCounter("organization", "relations to " + OpenOrgsCommon.OPENORGS_MAIN_PREFIX + "*").increment(1);
							emit(newSimilarity(mainId, o), context);
						}
					}
				}
			}
		} catch (final InvalidProtocolBufferException e) {
			e.printStackTrace();
			throw new RuntimeException(e);
		}
	}

	private List<String> findMainIds(final String idPrefix, final List<OafEntity> list) {
		final List<String> valids = new ArrayList<>();
		for (final OafEntity e : list) {
			for (final String id : e.getOriginalIdList()) {
				if (id.startsWith(idPrefix)) {
					valids.add(id);
				}
			}
		}
		return valids;
	}

	private void emit(final Similarity simrel, final Context context) {
		try {
			valueOut.set(simrel.toTsv());
			context.getCounter("organization", "relations (total)").increment(1);
			context.write(NullWritable.get(), valueOut);
		} catch (IOException | InterruptedException e) {
			throw new RuntimeException(e);
		}
	}

	private Similarity newSimilarity(final String openOrgsId, final OafEntity oafEntity) {
		final Similarity s = new Similarity();
		s.setOpenOrgID(openOrgsId);
		s.setOpenaireOriginalId(oafEntity.getOriginalId(0));
		s.setName(oafEntity.getOrganization().getMetadata().getLegalname().getValue());
		s.setAcronym(oafEntity.getOrganization().getMetadata().getLegalshortname().getValue());
		s.setCountry(oafEntity.getOrganization().getMetadata().getCountry().getClassid());
		s.setUrl(oafEntity.getOrganization().getMetadata().getWebsiteurl().getValue());
		s.setCollectedFrom(oafEntity.getCollectedfrom(0).getValue());
		return s;
	}

}
