package org.gcube.indexmanagement.lucenewrapper;

import gr.uoa.di.madgik.grs.buffer.IBuffer.Status;
import gr.uoa.di.madgik.grs.record.GenericRecord;
import gr.uoa.di.madgik.grs.record.field.StringField;
import gr.uoa.di.madgik.grs.writer.RecordWriter;

import java.util.ArrayList;
import java.util.Calendar;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.Vector;
import java.util.concurrent.TimeUnit;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
//import org.apache.lucene.search.WildcardTermEnum;
import org.apache.lucene.search.Hits;
import org.gcube.common.searchservice.searchlibrary.rswriter.RSXMLWriter;
import org.gcube.common.searchservice.searchlibrary.resultset.elements.ResultElementGeneric;
import org.gcube.common.core.utils.logging.GCUBELog;
import org.gcube.indexmanagement.common.FullTextIndexType;
import org.gcube.indexmanagement.common.IndexField;
import org.gcube.indexmanagement.common.IndexType;
import org.gcube.indexmanagement.common.XMLProfileParser;
import org.gcube.indexmanagement.common.XMLTokenReplacer;
import org.gcube.indexmanagement.resourceregistry.RRadaptor;

/** A worker thread to take care of the ResultSet feeding after a query */
public class LuceneSearchWorker extends Thread {

	private static final long RSTIMEOUT = 10;

	/** logger */
	static GCUBELog logger = new GCUBELog(LuceneSearchWorker.class);
	
    /** The ResultSet writer to write to */
    private RecordWriter<GenericRecord> rsWriter;

    /** The hits from the Lucenequery */
    private Hits queryHits;

    /** The number of hits from the query */
    private int numberOfHits;

    /** The QueryTerms from the Lucene query */
    private QueryTerm[] terms;

    /** An IndexReader to be used in order to find Query statistics */
    private IndexReader reader;

    /** The IndexType of the queried Index */
    private FullTextIndexType idxType;

    /** The number of hits from the query */
    private boolean isComplete;
    
    /** The fields that must be included in each result */
    private LinkedHashMap<String, String> projections;
    
    /** The fields that must be included in each result */
    private ArrayList<String> presentable;
    
    private RRadaptor adaptor = null;

	private QuerySnippetTermsPair querySnippetTermsPair;
	
	private HighlighterWrapper highlighter;
	
	private Integer snippetSize;
	
	private Integer maximumSnippetsCount;
	
	
    public void setSnippetSize(Integer snippetSize) {
		this.snippetSize = snippetSize;
	}

	public void setMaximumSnippetsCount(Integer maximumSnippetsCount) {
		this.maximumSnippetsCount = maximumSnippetsCount;
	}

	/**
     * The constructor
     * 
     * @param rsWriter -
     *            The ResultSet writer to be used by the new LuceneSearchWorker
     *            thread.
     * @param reader -
     *            An IndexReader to be used in order to find Query statistics.
     * @param terms -
     *            QueryTerms from the Lucene query.
     * @param queryHits -
     *            The hits resulting from the Lucene query
     * @param numberOfHits -
     *            The number of hits from the Lucene query
     * @param idxType -
     *            The IndexType of the queried Index.
     * @param presentable 
     * @param querySnippetTermsPair 
     */
    public LuceneSearchWorker(RecordWriter<GenericRecord> rsWriter, IndexReader reader,
            QueryTerm[] terms, Hits queryHits, int numberOfHits,
            org.gcube.indexmanagement.common.FullTextIndexType idxType, 
            boolean isComplete, ArrayList<String> presentable, LinkedHashMap<String, String> projections, 
            QuerySnippetTermsPair querySnippetTermsPair, RRadaptor adaptor) {
        this.rsWriter = rsWriter;
        this.queryHits = queryHits;
        this.numberOfHits = numberOfHits;
        this.terms = terms;
        this.reader = reader;
        this.idxType = idxType;
        this.isComplete = isComplete;
        this.projections = projections;
        this.presentable = presentable;
        this.adaptor = adaptor;
        this.querySnippetTermsPair = querySnippetTermsPair;
        this.highlighter = new HighlighterWrapper(reader);
    }

    /**
     * {@inheritDoc}
     */
    public void run() {
        try {
        	
        	logger.info("Starting lucene worker");
        	
        	//for performance statistics
        	long before = Calendar.getInstance().getTimeInMillis();
        	long beforeFirst = Calendar.getInstance().getTimeInMillis();
        	long totalTimeForPuttingResults = 0;
        	long totalLucene = 0;
        	
        	int capacity = 0;
        	if(rsWriter != null) {
        		capacity = rsWriter.getCapacity();
        	}
            
            int i;
            for (i = 0; i < numberOfHits; i++) {

            	//while the reader hasn't stopped reading
                if(rsWriter.getStatus() != Status.Open) 
                	break;    
                //the current RS record
                GenericRecord rec = new GenericRecord();
                //the fields for this record
                ArrayList<gr.uoa.di.madgik.grs.record.field.Field> fields = 
                	new ArrayList<gr.uoa.di.madgik.grs.record.field.Field>();
                
                long beforeLucene = Calendar.getInstance().getTimeInMillis();
            	
                //field 0 is the score
                fields.add(new StringField("" + queryHits.score(i)));
                
                int docID = queryHits.id(i);
                Document doc = queryHits.doc(i);
                Vector addedTermStrings = new Vector();
                String docStatistics = "<docStatistics wc=\""
                    + doc.get("_wordcount") + "\"><terms>\n";
                for (int tt = 0; tt < terms.length; tt++) {
                    String termString = terms[tt].getTerm();
                    // String termField = terms[tt].getField();
                    if (!addedTermStrings.contains(termString)) {
                        int termCount = getFreq(doc.fields(), termString, docID);
                        docStatistics += "<term name=\"" + termString
                                + "\" tf=\"" + termCount + "\"/>\n";
                        addedTermStrings.add(termString);
                    }
                }
                docStatistics += "</terms><rank>" + queryHits.score(i)
                        + "</rank>";
                docStatistics += "</docStatistics>\n";
                
                long afterLucene = Calendar.getInstance().getTimeInMillis();
            	totalLucene += (afterLucene - beforeLucene);
                
                //field 1 contains the statistics
                fields.add(new StringField(docStatistics));
                //field 2 contains the objectID
                //get the objectID(in the current situation this is the Content Object ID)
                String objectID = "NoMetaId";
                String fieldContentDocID = doc.get(IndexType.DOCID_FIELD.toLowerCase());
                if (fieldContentDocID != null) {
                    objectID = fieldContentDocID;
                }
                fields.add(new StringField(objectID));

                //if there is not a isComplete indication
                if(!isComplete)
                {
	                // add the document fields
	                if(projections == null || projections.size() ==0) {
	                	
	                	//don't add any more fields
	                
	                } else {
	                	
	                	//in case there is the wildcard in projections
	                	if(projections.containsValue(IndexType.WILDCARD)) {
	                		
	                		//return all the presentable fields (we assume that its the updater's responsibility 
	                		// to check for the fields to be returnable, stored) except for the full payload
	        				for (String fieldName : this.presentable) {
			                    
	        					//if a field is not the ObjectID or full payload field
			                    if (!fieldName.equalsIgnoreCase(IndexType.DOCID_FIELD)
			                            && !fieldName.equalsIgnoreCase(IndexType.PAYLOAD_FIELD)) {
			                        String fieldContent = null;
			                    	if(fieldName.equals(IndexType.SNIPPET)) {
			                			fieldContent = createSnippet(doc, querySnippetTermsPair.snippetTerms);
			                		} else {
			                			fieldContent = doc.get(fieldName);
			                		}
			                        if (fieldContent != null) {
			                            fields.add(new StringField(XMLProfileParser.escapeForXML(fieldContent)));
			                        } else {
			                        	fields.add(new StringField(""));
			                        }
			                    }
			                }
			                
	                	} else {
	                	
		                	//return only the projections
		                	for(Entry<String, String> current : projections.entrySet()) {
		                		
		                		String proj = current.getValue();
		                		
		                		String fieldContent = "";
		                		
		                		if(proj.equals(IndexType.SNIPPET)) {
		                			fieldContent = createSnippet(doc, querySnippetTermsPair.snippetTerms);
		                		
		                		} else {
			                		IndexField idxTypeField = getFieldFromIndexType(proj);
			                		//if there is no returnable field for this projection return an empty String
			                		if(idxTypeField == null) {
			                			logger.error("The projection " + proj + ", is not part of the index Type");
			                		}			                		
			                		String fieldName = idxTypeField.name;		                		
		                			fieldContent = doc.get(fieldName);
		                		}
		                		
		                		if(fieldContent != null)
		                			fieldContent = XMLProfileParser.escapeForXML(fieldContent);
		                		
		                        fields.add(new StringField(fieldContent));
		                	}
	                	
	                	}
	                }
	                
                }else{
                	//if there is an isComplete indication then we will add just the full payload contained in the full payload field
                	fields.add(new StringField(XMLTokenReplacer.XMLResolve(doc.get(IndexType.PAYLOAD_FIELD))));
                }
                
                if (rsWriter != null) {
                	//while the reader hasn't stopped reading
                    if(rsWriter.getStatus() != Status.Open) 
                    	break;

                    //set the fields in the record
                    rec.setFields(fields.toArray(new gr.uoa.di.madgik.grs.record.field.Field[fields.size()]));
                    
                    long beforePut = Calendar.getInstance().getTimeInMillis();
                    while(!rsWriter.put(rec, RSTIMEOUT, TimeUnit.SECONDS)) {
                    	//while the reader hasn't stopped reading
                        if(rsWriter.getStatus() != Status.Open) 
                        	break;
                    }
                    long afterPut = Calendar.getInstance().getTimeInMillis();
                    
                    
                    //store the timestamp of the first retrieval
                    if(i == capacity) {
                    	beforeFirst = Calendar.getInstance().getTimeInMillis();                        
                    } else { 
                    	totalTimeForPuttingResults += (afterPut - beforePut);
                    }
                    
                } else {
                    for(gr.uoa.di.madgik.grs.record.field.Field f : fields) {
                    	logger.info("Field content: " + ((StringField)f).getPayload());
                    }
                }

                // logger.info("<root><Record" + idAttr + ">\n" +
                // docFields + docStatistics + "</Record></root>");
            }
            
            long after = Calendar.getInstance().getTimeInMillis();
            logger.info("Lucene worker finished after: " + (after - before));
            logger.info("Lucene worker, actual time from first to last: " + (after - beforeFirst));
            logger.info("Lucene worker, total time for lucene: " + totalLucene);
            logger.info("Lucene worker, total time for putting results into the RS: " + totalTimeForPuttingResults);
            logger.info("Percentage of total time for putting results TO total time from first to last: " 
            		+ ((double)(totalTimeForPuttingResults)/(double)(after - beforeFirst)* 100) + "%");            
            
            if(rsWriter.getStatus() != Status.Dispose)
            	rsWriter.close();
        } catch (Exception e) {
        	logger.error("Error during search.", e);
            try {
            	if(rsWriter.getStatus() != Status.Dispose)
                	rsWriter.close();
            } catch (Exception ex) {
            	logger.error("Error while closing RS writer.", ex);
            }
        }
    }

    private String createSnippet(Document doc,
			HashMap<String, ArrayList<String>> snippetTerms) throws Exception {

    	this.highlighter.setMaximumSnippetsCount(maximumSnippetsCount);
    	this.setSnippetSize(snippetSize);
    	
    	List<String> snippets = this.highlighter.getDocSnippets(doc, snippetTerms);
    	return HighlighterWrapper.getSnippetString(snippets);
	}

	private IndexField getFieldFromIndexType(String proj) {
		for(IndexField type : idxType.getFields()) {
			if(type.name.equalsIgnoreCase(proj)) {
				if(type.store && type.returned) {
					return type;
				} else {
					logger.warn("Projection: " + proj + ", isStored: " 
							+ type.store + ", and isReturned: " + type.returned);
					return null;
				}
			}
		}
		logger.warn("Projection: " + proj + " was not found!");
		return null;
	}

	/**
     * A method used to count the occurrence frequency of a Term in a Document.
     * 
     * @param t -
     *            The Term of which to count the occurrences.
     * @param docID -
     *            The Document in which to count the Term frequency.
     * @return The requested Term frequency.
     * @throws java.io.IOException
     *             An error reading from the Lucene Index.
     */
    public int getFreq(Term t, int docID) throws java.io.IOException {
        TermDocs td = reader.termDocs(t);
        if (td.skipTo(docID) && td.doc() == docID)
            return td.freq();
        else
            return 0;
    }

    // DocFreq... not termFreq
    // /**
    // * Returns the word count of the term t in the ...........
    // */
    // public int getWildcardFreq(Term t) throws java.io.IOException {
    // WildcardTermEnum wildcard = new WildcardTermEnum(reader, t);
    // return wildcard.docFreq();
    // }

    /**
     * A method used to the Term frequency of the term termString for multiple
     * fields in the document(docID).
     * 
     * @param fields -
     *            The fields to include in term fequency count.
     * @param termString -
     *            The term text of which to count the occurrences.
     * @param docID -
     *            The document in which to count the term frequency.
     * @return The requested Term frequency.
     * @throws java.io.IOException
     *             An error reading from the Lucene Index.
     */
    public int getFreq(Enumeration fields, String termString, int docID)
            throws java.io.IOException {
        int count = 0;
        Vector countedFields = new Vector();
        Field field;
        while (fields.hasMoreElements()) {
            field = (Field) fields.nextElement();
            if (!countedFields.contains(field.name()) && field.isIndexed()) {
                count += getFreq(new Term(field.name(), termString), docID);
                countedFields.add(field.name());
            }
        }
        return count;
    }
}
