package eu.dnetlib.data.mapreduce.hbase.dataexport;

import com.google.gson.Gson;
import eu.dnetlib.data.mapreduce.hbase.bulktag.ProtoMap;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.dom4j.Document;
import org.dom4j.io.SAXReader;

import java.io.IOException;
import java.io.StringReader;
import java.time.Year;

/**
 * Exports the result matching the criteria found in the configuration.
 *
 * @author claudio
 */
public class ExportFilteredResultMapper extends Mapper<Text, Text, Text, Text> {

	private final static String RESULT_TYPE_XPATH = "/*[local-name() ='record']/*[local-name() ='result']/*[local-name() ='metadata']/*[local-name() ='entity']/*[local-name() ='result']/*[local-name() ='resulttype']/@classid";

	private Text keyOut;

	private Text valueOut;

	private RecordFilter defaultFilter;

	private RecordFilter userFilter;

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {
		keyOut = new Text("");
		valueOut = new Text();

		defaultFilter = new RecordFilter(
				new Gson().fromJson(context.getConfiguration().get("filter.defaultcriteria", "{}"), ProtoMap.class),
				context.getConfiguration().get("filter.yearxpath"),
				0,
				Year.now().getValue());

		userFilter = new RecordFilter(
				new Gson().fromJson(context.getConfiguration().get("filter.criteria", "{}"), ProtoMap.class),
				context.getConfiguration().get("filter.yearxpath"),
				context.getConfiguration().getInt("filter.fromyear", 0),
				context.getConfiguration().getInt("filter.toyear", 0));
	}

	@Override
	protected void map(final Text keyIn, final Text value, final Context context) throws IOException, InterruptedException {
		try {
			final String record = value.toString();

			final Document doc = new SAXReader().read(new StringReader(record));

			if (defaultFilter.matches(doc, true)) {

				if (userFilter.matches(doc, false)) {
					keyOut.set(keyIn.toString());
					valueOut.set(value.toString());

					context.write(keyOut, valueOut);
					context.getCounter("filter", "matched criteria " +doc.valueOf(RESULT_TYPE_XPATH)).increment(1);
				} else {
					context.getCounter("filter", "filtered by criteria").increment(1);
				}
			} else {
				context.getCounter("filter", "filtered by default criteria").increment(1);
			}
		} catch (final Throwable e) {
			context.getCounter("error", e.getClass().getName()).increment(1);
			throw new RuntimeException(e);
		}
	}

}
