package eu.dnetlib.data.mapreduce.hbase.dataexport;

import java.io.IOException;

import eu.dnetlib.miscutils.functional.xml.ApplyXslt;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class ExportSimplifiedRecordsMapper extends Mapper<Text, Text, Text, Text> {

	private static final Log log = LogFactory.getLog(ExportSimplifiedRecordsMapper.class); // NOPMD by marko on 11/24/08 5:02 PM

	private ApplyXslt recordSummarizer;

	private Text valueOut;

	private Text keyOut;

	@Override
	protected void setup(final Context context) throws IOException, InterruptedException {

		final String xslt = new String(Base64.decodeBase64(context.getConfiguration().get("xslt")));

		log.info("got xslt: \n" + xslt);

		recordSummarizer = new ApplyXslt(xslt);
		valueOut = new Text();
		keyOut = new Text("");
	}

	@Override
	protected void map(final Text key, final Text value, final Context context) throws IOException, InterruptedException {

		final String summary = recordSummarizer.evaluate(value.toString());
		if (StringUtils.isNotBlank(summary)) {
		    keyOut.set(StringUtils.substringAfter(key.toString(), "::"));
			valueOut.set(summary.replaceAll("\n","").replaceAll("\t",""));
			context.write(keyOut, valueOut);
		}
	}

}
