package eu.dnetlib.data.mapreduce.hbase.misc;

import java.io.IOException;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import com.google.common.collect.Sets;
import eu.dnetlib.data.mapreduce.util.DedupUtils;
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
import eu.dnetlib.data.proto.OafProtos.Oaf;
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;

/**
 * Reads publications and datasources,
 *  for publications, if publication is related to an EC project emit(collectedFrom@id, 1);
 *  for datasources emit(id, ds)
 *
 * @author claudio
 */
public class PredatoryJournalsMapper extends TableMapper<Text, ImmutableBytesWritable> {

	/**
	 * logger.
	 */
	private static final Log log = LogFactory.getLog(PredatoryJournalsMapper.class); // NOPMD by marko on 11/24/08 5:02 PM

	private static final String PREDATORY_JOURNALS = "Predatory Journal Counters";

	private static final byte[] isProducedBy = "resultProject_outcome_isProducedBy".getBytes();

	private Text keyOut;

	private ImmutableBytesWritable valueOut;

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {
		super.setup(context);

		keyOut = new Text("");
		valueOut = new ImmutableBytesWritable();
	}

	@Override
	protected void map(final ImmutableBytesWritable keyIn, final Result value, final Context context) throws IOException, InterruptedException {
		try {
			final OafRowKeyDecoder keyDecoder = OafRowKeyDecoder.decode(keyIn.copyBytes());

			final byte[] body = value.getValue(keyDecoder.getType().toString().getBytes(), DedupUtils.BODY_B);

			if (body != null) {
				final Oaf oaf = Oaf.parseFrom(body);

				if (oaf.getDataInfo().getDeletedbyinference() != false) {

					switch (keyDecoder.getType()) {
					case result:

						final Map<byte[], byte[]> projectRels = value.getFamilyMap(isProducedBy);

						if (projectRels != null) {
							long ecProjects = projectRels.keySet().stream()
									.map(String::new)
									.filter(s -> StringUtils.contains(s, "corda"))
									.count();

							final Set<String> ids = Sets.newHashSet();
							if (ecProjects > 0) {
								ids.addAll(
										Stream.concat(
												oaf.getEntity().getCollectedfromList().stream()
													.map(KeyValue::getKey),
												oaf.getEntity().getResult().getInstanceList().stream()
													.map(Instance::getHostedby)
													.map(KeyValue::getKey)
										).collect(Collectors.toList()));

								ids.forEach(dsId -> {
									emit(context, dsId, Bytes.toBytes(1));
									context.getCounter(PREDATORY_JOURNALS, "publications").increment(1);
								});
							}
						}
						break;
					case datasource:

						final Datasource ds = oaf.getEntity().getDatasource();
						final String dsType = ds.getMetadata().getDatasourcetype().getClassid();

						if (StringUtils.contains(dsType, "journal")) {
							emit(context, oaf.getEntity().getId(), body);
							context.getCounter(PREDATORY_JOURNALS, "journals").increment(1);
						}
						break;

					default:
						break;
					}
				}
			}
		} catch (final Throwable e) {
			log.error("error exporting the following record from HBase: " + value.toString(), e);
			context.getCounter("error", e.getClass().getName()).increment(1);
			throw new RuntimeException(e);
		}
	}

	private void emit(final Context context, final String key, byte[] value) {
		keyOut.set(key);
		valueOut.set(value);
		try {
			context.write(keyOut, valueOut);
		} catch (IOException | InterruptedException e) {
			throw new RuntimeException(e);
		}
	}

}
