package eu.dnetlib.enabling.manager.msro.hadoop;

import java.io.IOException;
import java.io.StringReader;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;

import com.google.common.collect.Lists;

public class HopeTagReducer extends Reducer<Text, Text, Text, Text> {

	private static final Log log = LogFactory.getLog(HopeTagReducer.class); // NOPMD by marko on 11/24/08 5:02 PM

	private final Text result = new Text();
	private final SAXReader saxReader = new SAXReader();

	public SAXReader getSaxReader() {
		return saxReader;
	}

	@Override
	public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
		//log.debug("-------------------Reducing key: " + key.toString());
		org.dom4j.Document doc = null;
		List<String> themeTags = Lists.newArrayList();
		List<String> exportTags = Lists.newArrayList();

		for (Text value : values) {
			String val = value.toString();
			if (val.startsWith("theme")) {
				themeTags.add(val.split("::-::")[1]);
			} else {
				if (val.startsWith("export")) {
					exportTags.add(val.split("::-::")[1]);
				} else {
					try {
						doc = this.saxReader.read(new StringReader(value.toString()));
					} catch (DocumentException e) {
						throw new RuntimeException("Can't read input value: " + value.toString());
					}
				}
			}
		}
		//log.debug("Is doc null before tag filling? " + (doc == null));
		fillRecordWithThemes(doc, themeTags);
		fillRecordWithExports(doc, exportTags);

		String recordXML = doc.asXML();

		result.set(recordXML);
		context.write(key, result);
	}

	protected void fillRecordWithThemes(org.dom4j.Document doc, List<String> themes) {
		org.dom4j.Node duNode = doc.selectSingleNode("//descriptiveUnit");
		if (duNode == null)
			return;
		@SuppressWarnings("unchecked")
		List<org.dom4j.Node> themeNodes = doc.selectNodes("//associatedHopeTheme");
		for (org.dom4j.Node themeNode : themeNodes) {
			themeNode.detach();
		}

		log.debug("Now doc should have no <associatedHopeTheme>: ");
		log.debug(doc.asXML());

		if (!themes.isEmpty()) {
			for (String theme : themes) {
				org.dom4j.Element el = (org.dom4j.Element) duNode;
				el.addElement("associatedHopeTheme").addText(theme);
			}
		}
	}

	protected void fillRecordWithExports(org.dom4j.Document doc, List<String> exports) {
		org.dom4j.Node drNode = doc.selectSingleNode("//digitalResource");
		if (drNode == null)
			return;
		org.dom4j.Node miscNode = doc.selectSingleNode("//misc");
		if (miscNode == null) {
			miscNode = ((org.dom4j.Element) drNode).addElement("misc");
		} else {
			@SuppressWarnings("unchecked")
			List<org.dom4j.Node> exportTagNodes = doc.selectNodes("//misc/miscfield[./key/text()='exportTag']");
			for (org.dom4j.Node exportTagNode : exportTagNodes) {
				exportTagNode.detach();
			}
		}
		log.debug("Now doc should have no export tags: ");
		log.debug(doc.asXML());
		for (String exp : exports) {
			// //misc/miscfield[./key='exportTag']/value
			org.dom4j.Element miscField = ((org.dom4j.Element) miscNode).addElement("miscfield");
			miscField.addElement("key").addText("exportTag");
			miscField.addElement("value").addText(exp);
		}
	}

}
