package eu.dnetlib.data.mapreduce.hbase.dedup;

import java.io.IOException;
import java.util.Properties;

import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;

import eu.dnetlib.data.mapreduce.JobParams;
import eu.dnetlib.data.mapreduce.hbase.AbstractHBaseMapReduceJob;
import eu.dnetlib.data.proto.TypeProtos.Type;

public class FindDedupCandidatePersonsJob extends AbstractHBaseMapReduceJob {

	private static final String PERSON_SCAN_PREFIX = Type.person.getNumber() + "|";

	@Override
	public Job setJobDetails(Job job, Properties p) {
		p.setProperty("mapred.reduce.tasks.speculative.execution", "true");

		initMapper(job, getScan(p), p.getProperty(JobParams.HBASE_SOURCE_TABLE));

		job.setNumReduceTasks(100);

		return job;
	}

	private Scan getScan(Properties p) {
		Scan scan = new Scan();
		scan.setCaching(100);
		scan.setCacheBlocks(false);
		scan.setFilter(new PrefixFilter(Bytes.toBytes(PERSON_SCAN_PREFIX)));
		return scan;
	}

	private void initMapper(final Job job, final Scan scan, final String sourceTable) {
		try {
			TableMapReduceUtil.initTableMapperJob(sourceTable, scan, FindDedupCandidatePersonsMapper.class, Text.class, Text.class, job);
			TableMapReduceUtil.initTableReducerJob(sourceTable, FindDedupCandidatePersonsReducer.class, job);
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
	}

}