package eu.dnetlib.data.mapreduce.hbase.dataexport;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import eu.dnetlib.data.mapreduce.util.DedupUtils;
import eu.dnetlib.data.mapreduce.util.OafDecoder;
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
import eu.dnetlib.data.proto.OafProtos.OafEntity;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;

/**
 * Exports the result identifiers as json.
 *
 * @author claudio
 */
public class ExportResultIdentifiersMapper extends TableMapper<Text, Text> {

	/**
	 * logger.
	 */
	private static final Log log = LogFactory.getLog(ExportResultIdentifiersMapper.class); // NOPMD by marko on 11/24/08 5:02 PM

	private static final String CF = "result";

	private Text keyOut;

	private Text valueOut;

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {
		super.setup(context);

		keyOut = new Text("");
		valueOut = new Text();
	}

	@Override
	protected void map(final ImmutableBytesWritable keyIn, final Result value, final Context context) throws IOException, InterruptedException {
		try {
			final byte[] body = value.getValue(Bytes.toBytes(CF), DedupUtils.BODY_B);

			if (body == null) {
				context.getCounter(CF, "missing body").increment(1);
				return;
			}

			final OpenaireEntityId id = new OpenaireEntityId();
			final OafDecoder d = OafDecoder.decode(body);

			id.setDeleted(d.getOaf().getDataInfo().getDeletedbyinference());
			id.setId(d.getEntityId());
			id.setPids(d.getOaf().getEntity().getPidList().stream()
					.collect(Collectors.groupingBy(
							p -> p.getQualifier().getClassid()
					)).entrySet().stream()
					.collect(Collectors.toMap(
							Map.Entry::getKey,
							e -> e.getValue().stream()
									.map(StructuredProperty::getValue)
									.collect(Collectors.toList()))));


			final List<OafEntity> childrenList = d.getEntity().getChildrenList();
			if (childrenList != null && !childrenList.isEmpty()) {
				final ArrayList<String> mergedIds = Lists.newArrayList(Iterables.transform(childrenList, oafEntity -> oafEntity.getId()));
				Collections.sort(mergedIds);
				id.setMergedIds(mergedIds);
			}

			valueOut.set(id.toString());
			context.write(keyOut, valueOut);

		} catch (final Throwable e) {
			log.error("error exporting the following record from HBase: " + value.toString(), e);
			context.getCounter("error", e.getClass().getName()).increment(1);
			throw new RuntimeException(e);
		}
	}

}
