package eu.dnetlib.data.transform;

import java.io.StringReader;

import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.common.SolrInputDocument;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.protobuf.GeneratedMessage;

import eu.dnetlib.pace.config.Type;

/**
 * The Class ProtoDocumentMapper.
 */
public class SolrProtoMapper extends AbstractProtoMapper {

	private static final String ID_SEPARATOR = "::";

	/** The fields. */
	private Document fields;

	/**
	 * Instantiates a new proto document mapper.
	 *
	 * @param fields
	 *            the fields
	 * @throws DocumentException
	 *             the document exception
	 */
	public SolrProtoMapper(final String fields) throws DocumentException {
		this.fields = parse(fields);

		if (StringUtils.isBlank(this.fields.valueOf("//FIELD[@name = 'objIdentifier']/@name")))
			throw new IllegalArgumentException("field objIdentifier is mandatory");
	}

	/**
	 * Map.
	 *
	 * @param proto
	 *            the proto
	 * @param version
	 *            the version
	 * @param dsId
	 *            the ds id
	 * @return the solr input document
	 * @throws DocumentException
	 *             the document exception
	 */
	public SolrInputDocument map(final GeneratedMessage proto, final String version, final String dsId, final String actionSetId) throws DocumentException {

		final SolrInputDocument doc = new SolrInputDocument();

		for (final Object o : fields.selectNodes("//FIELD[string(@path)]")) {
			final Element e = (Element) o;

			final String name = e.attribute("name").getValue().toLowerCase().trim();
			final String path = e.attribute("path").getValue();

			doc.setField(name, processMultiPath(proto, Lists.newLinkedList(Splitter.on("|").trimResults().split(path)), Type.String));
		}

		final String objIdentifier = patchId((String) doc.getFieldValue("objidentifier"));
		doc.setField("objidentifier", objIdentifier);
		doc.setField("__indexrecordidentifier", getRecordId(objIdentifier, actionSetId));
		doc.setField("__dsid", dsId);
		doc.setField("__dsversion", version);
		doc.setField("__result", Base64.encodeBase64String(proto.toByteArray()));
		doc.setField("actionset", actionSetId);

		return doc;
	}

	public String getRecordId(final String objIdentifier, final String actionSetId) {
		return objIdentifier + ID_SEPARATOR + actionSetId;
	}

	/**
	 * Patch the objidentifier: when it comes from HBase, i.e. contains the separator '|' returns the string that follows.
	 *
	 * @param objidentifier
	 *            the objidentifier
	 * @return the string
	 */
	private String patchId(final String objidentifier) {
		return objidentifier.contains("|") ? StringUtils.substringAfter(objidentifier, "|") : objidentifier;
	}

	/**
	 * Parses the.
	 *
	 * @param s
	 *            the s
	 * @return the document
	 * @throws DocumentException
	 *             the document exception
	 */
	private Document parse(final String s) throws DocumentException {
		return new SAXReader().read(new StringReader(s));
	}

}
