package eu.dnetlib.data.mapreduce.hbase.oai.utils;

import java.io.StringReader;
import java.util.Collection;
import java.util.List;
import java.util.Map.Entry;

import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;

import com.google.common.base.Function;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Collections2;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;

import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfigurationReader;

/**
 * An instance of this class can parse an XML record and extract the information needed to store the record in a publisher store.
 * 
 * 
 * @author alessia
 * 
 */
public class RecordFieldsExtractor {

	/**
	 * List of the indices of the target store.
	 */
	private List<PublisherField> storeIndices;
	/**
	 * XPaths to execute to understand if a record has been enhanced: inferred subjects, deduplicated records, inferred relationships (only
	 * those generated from IIS, not those redirected by the dedup)
	 */
	// private List<String> enrichmentXPaths = Lists.newArrayList("//subject[./@inferred='true']",
	// "//result/datainfo[./inferenceprovenance='dedup']",
	// "//rel[./@inferred='true' and ./@inferenceprovenance != 'dedup']");

	private String duplicateXPath;
	private boolean skipDuplicates = false;

	/**
	 * Parses the record and returns a map where a key is the name of an index, the value is the value in the record at the xpath specificed
	 * in this.storeIndices. The enrichment xpaths are used to generate additional OAI sets that can be used by providers to get the subset
	 * of records enriched by OpenAIRE.
	 * 
	 * @param record
	 *            the XML string to parse.
	 * @param enrichmentXPaths
	 *            collection of xpaths that must be satisfied to consider the current record as "enriched by OpenAIRE"
	 * @return a Multimap describing the values to be indexed for this record.
	 */
	@SuppressWarnings({ "unchecked", "rawtypes" })
	public Multimap<String, String> extractFields(final String record, final Collection<String> enrichmentXPaths) {
		Multimap<String, String> recordProps = ArrayListMultimap.create();
		try {
			Document doc = new SAXReader().read(new StringReader(record));
			if (skipDuplicates && isDuplicate(doc)) {
				recordProps.put("duplicate", "true");
			}
			//dates: note that this will be used to generate the LAST_COLLECTION_DATE_FIELD and DATESTAMP_FIELD
			Node coll = doc.selectSingleNode("//*[local-name()='header']/*[local-name()='dateOfCollection']");
			Node trans = doc.selectSingleNode("//*[local-name()='header']/*[local-name()='dateOfTransformation']");
			if(coll != null && StringUtils.isNotBlank(coll.getText())){
				recordProps.put("dateOfCollection", coll.getText());
			}
			if(trans != null && StringUtils.isNotBlank(trans.getText())){
				recordProps.put("dateOfTransformation", trans.getText());
			}
			for (PublisherField field : this.storeIndices) {
				for (Entry<String, String> indexEntry : field.getSources().entries()) {
					// each xpath can return a list of nodes or strings, depending on the xpath
					String xpath = indexEntry.getValue();

					List xPathResult = doc.selectNodes(xpath);
					if ((xPathResult != null) && !xPathResult.isEmpty()) {
						if (containsStrings(xPathResult)) {
							recordProps.putAll(field.getFieldName(), xPathResult);
						} else {
							if (containsNodes(xPathResult)) {
								recordProps.putAll(field.getFieldName(), Iterables.transform(xPathResult, new Function<Object, String>() {

									@Override
									public String apply(final Object obj) {
										if (obj == null) return "";
										Node node = (Node) obj;
										return node.getText();
									}
								}));
							}
						}
					}
				}
			}
			recordProps.putAll(OAIConfigurationReader.SET_FIELD, getEnrichedSets(doc, enrichmentXPaths, recordProps.get(OAIConfigurationReader.SET_FIELD)));

		} catch (DocumentException e) {
			recordProps = null;
		}
		return recordProps;
	}

	public Collection<String> getEnrichedSets(final Document docBody, final Collection<String> enrichmentXPaths, final Collection<String> originalSets) {
		Collection<String> enhancedSets = Lists.newArrayList();
		if (isEnhanced(docBody, enrichmentXPaths)) {
			enhancedSets.addAll(Collections2.transform(originalSets, new Function<String, String>() {

				@Override
				public String apply(final String originalSet) {
					return originalSet + "_enriched";
				}

			}));
		}
		return enhancedSets;
	}

	private boolean isEnhanced(final Document docBody, final Collection<String> enrichmentXPaths) {
		for (String xpath : enrichmentXPaths) {
			List xPathResult = docBody.selectNodes(xpath);
			if ((xPathResult != null) && !xPathResult.isEmpty()) return true;
		}
		return false;
	}

	public boolean isDuplicate(final Document doc) {
		return doc.selectSingleNode(duplicateXPath) != null;
	}

	@SuppressWarnings("rawtypes")
	private boolean containsStrings(final List objects) {
		Object first = objects.get(0);
		return first instanceof String;
	}

	@SuppressWarnings("rawtypes")
	private boolean containsNodes(final List objects) {
		Object first = objects.get(0);
		return first instanceof Node;
	}

	public List<PublisherField> getStoreIndices() {
		return storeIndices;
	}

	public void setStoreIndices(final List<PublisherField> storeIndices) {
		this.storeIndices = storeIndices;
	}

	public RecordFieldsExtractor(final List<PublisherField> storeIndices) {
		super();
		this.storeIndices = storeIndices;
	}

	public RecordFieldsExtractor() {
		super();
	}

	public String getDuplicateXPath() {
		return duplicateXPath;
	}

	public void setDuplicateXPath(final String duplicateXPath) {
		this.duplicateXPath = duplicateXPath;
	}

	public boolean isSkipDuplicates() {
		return skipDuplicates;
	}

	public void setSkipDuplicates(final boolean skipDuplicates) {
		this.skipDuplicates = skipDuplicates;
	}

}
