package org.gcube.indexmanagement.lucenewrapper;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;

/**
 * A wrapper for the highlight functionality of lucene . See <a href=
 * "http://lucene.apache.org/java/2_9_1/api/contrib-highlighter/org/apache/lucene/search/highlight/Highlighter.html"
 * >Highlighter javadoc</a> Note: we use the version 2.4.1, not the 2.9.1 that
 * is referring to the above link
 * 
 * @author Alexandros Antoniadis
 * @version 1.1
 * @since 12-23-2011
 * 
 */

public class HighlighterWrapper {
	private Integer snippetSize = 50;
	private Integer maximumSnippetsCount = 10;
	private IndexReader indexReader;

	public HighlighterWrapper(IndexReader indexReader) {
		this.indexReader = indexReader;
	}

	public Integer getSnippetSize() {
		return snippetSize;
	}

	public void setSnippetSize(Integer snippetSize) {
		this.snippetSize = snippetSize;
	}

	public Integer getMaximumSnippetsCount() {
		return maximumSnippetsCount;
	}

	public void setMaximumSnippetsCount(Integer maximumSnippetsCount) {
		this.maximumSnippetsCount = maximumSnippetsCount;
	}

	public IndexReader getIndexReader() {
		return indexReader;
	}

	public void setIndexReader(IndexReader indexReader) {
		this.indexReader = indexReader;
	}

	/**
	 * Highlights chosen terms in a text, extracting the most relevant sections
	 * depending on the query
	 * 
	 * @param field
	 *            The field of the doc
	 * @param text
	 *            The text of the field
	 * @param queryString
	 *            The custom highlight query
	 * @return a list of highlighted text fragment or <code>null</code> if no
	 *         terms found
	 * @throws Exception
	 */
	private List<String> getSnippets(String field, String text, String queryString) throws Exception {
		Analyzer analyzer = new StandardAnalyzer();

		// QueryParser parser = new QueryParser(VERSION, DOCDATA, analyzer); for
		// lucene version 3.0+
		QueryParser parser = new QueryParser(field, analyzer);
		Query query = parser.parse(queryString);
		Query rewrittenQuery = query.rewrite(getIndexReader());

		Highlighter highlighter = new Highlighter(new QueryScorer(rewrittenQuery));
		highlighter.setTextFragmenter(new SimpleFragmenter(snippetSize)); //the size of earch snippet. Note: lucene highlighter mergers continuous snippets! 

		return Arrays.asList(highlighter.getBestFragments(analyzer, field, text, maximumSnippetsCount));
	}

	/**
	 * Highlights chosen terms of fields in a document, extracting the most
	 * relevant sections
	 * 
	 * @param doc
	 *            the document that will be highlighted
	 * @param queryMap
	 *            a mapping of all the fields and the attributes
	 * @return a list of highlighted text fragment or <code>null</code> if no
	 *         terms found
	 * @throws Exception
	 */
	public List<String> getDocSnippets(Document doc, HashMap<String, ArrayList<String>> queryMap) throws Exception {
		List<String> snippets = new ArrayList<String>();

		for (Entry<String, ArrayList<String>> e : queryMap.entrySet()) {
			String field = e.getKey();
			String text = doc.get(field);

			/* In case the field is empty ignore it */
			if (text == null)
				continue;

			String query = getQueryFromEntries(e.getValue());

			try {
				snippets.addAll(getSnippets(field, text, query));
			} catch (Exception ex) {
				throw new Exception("Error while getting snippets", ex);
			}
		}
		return snippets;
	}

	/**
	 * Transforms a list of entries into a highlight query. Simple appending
	 * with a space between the terms.
	 * 
	 * @param entries
	 * @return the highlight query string
	 */
	private String getQueryFromEntries(List<String> entries) {
		StringBuffer strbuf = new StringBuffer("");
		for (String entry : entries) {
			strbuf.append(entry);
			strbuf.append(" ");
		}

		return strbuf.toString();
	}

	/**
	 * Transforms a list of snippets into a formatted string
	 * 
	 * @param snippets
	 *            The list of the snippets
	 * @return a string that contains all the snippets concatenated
	 */
	public static String getSnippetString(List<String> snippets) {
		StringBuffer strbuf = new StringBuffer("");

		if (snippets.size() > 0)
			strbuf.append("...");
		for (String snippet : snippets) {
			strbuf.append(snippet);
			strbuf.append("...");
		}

		return strbuf.toString();
	}

}
