package eu.dnetlib.enabling.manager.msro.hadoop;

import java.io.IOException;
import java.io.StringReader;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.bson.BSONObject;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;

public class HopeEDMMapper extends Mapper<Object, BSONObject, Text, Text> {

	private static final Log log = LogFactory.getLog(HopeEDMMapper.class); // NOPMD by marko on 11/24/08 5:02 PM

	private final Text mapValue = new Text();
	private final Text mapKey = new Text();

	@Override
	public void map(Object key, BSONObject value, Context context) throws IOException, InterruptedException {
		final String bodyContent = value.get("body").toString();
		SAXReader sr = new SAXReader();
		Document doc;
		try {
			doc = sr.read(new StringReader(bodyContent));
			String entityType = doc.valueOf("local-name(//hopeEntity/*[last()])");
			if (entityType.equals("digitalResource")) {
				String dr = doc.valueOf("//digitalResource/represents/@persistentID");
				if (dr.startsWith("http")) {
					mapKey.set(dr);
					log.debug("Found dr linked to: " + dr);
					mapValue.set(bodyContent);
					context.write(mapKey, mapValue);
				} else
					log.info("Found dr whose du has a non-PID identifier (" + dr + ") ...dr skipped");
			} else if (entityType.equals("descriptiveUnit")) {
				String du = doc.valueOf("//hopeEntity/persistentID");
				if (du.startsWith("http")) {
					mapKey.set(du);
					log.debug("Found du with persistentID: " + du);
					mapValue.set(bodyContent);
					context.write(mapKey, mapValue);
				} else {
					log.info("Found du with a non-PID identifier...du " + du + " skipped");
				}
			}
		} catch (DocumentException e) {
			throw new RuntimeException(e);
		}

	}

}
