package eu.dnetlib.data.mapreduce.hbase.oai;

import java.io.IOException;
import java.net.UnknownHostException;
import java.text.ParseException;
import java.util.Collection;
import java.util.Date;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

import com.google.common.base.Function;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import com.mongodb.WriteConcern;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import eu.dnetlib.data.mapreduce.JobParams;
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfiguration;
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfigurationReader;
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfigurationStringReader;
import eu.dnetlib.data.mapreduce.hbase.oai.utils.MongoSetCollection;
import eu.dnetlib.data.mapreduce.hbase.oai.utils.PublisherField;
import eu.dnetlib.data.mapreduce.hbase.oai.utils.RecordFieldsExtractor;
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
import eu.dnetlib.data.proto.TypeProtos.Type;
import eu.dnetlib.miscutils.functional.xml.IndentXmlString;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.DateUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.bson.types.Binary;

public class OaiFeedMapper extends Mapper<Text, Text, NullWritable, NullWritable> {

	enum RecordStatus {
		NEW, UPDATED, UNCHANGED;
	}

	private MongoCollection<DBObject> collection;
	private MongoCollection<DBObject> discardedCollection;
	private OAIConfigurationStringReader oaiConfigurationReader;
	private OAIConfiguration oaiConfiguration;

	private Date feedDate;

	private MongoSetCollection mongoSetCollection;

	private RecordFieldsExtractor extractor;

	// these are set in the setup
	private String format;
	private String interpretation;
	private String layout;
	private Map<String, PublisherField> fieldsToIndex = Maps.newHashMap();

	private String duplicateXPath;
	private boolean skipDuplicates;

	private MongoClient mongo;

	private Collection<String> enrichmentXPaths;

	@Override
	protected void setup(final Context context) throws UnknownHostException {

		String host = context.getConfiguration().get("services.publisher.oai.host");
		String port = context.getConfiguration().get("services.publisher.oai.port");
		String db = context.getConfiguration().get("services.publisher.oai.db");
		String collectionName = context.getConfiguration().get("services.publisher.oai.collection");

		System.out.println("Mongodb client params");
		System.out.println("host: " + host);
		System.out.println("port: " + port);
		System.out.println("db: " + db);
		System.out.println("collection: " + collectionName);

		String[] formatLayoutInterp = collectionName.split("-");
		format = formatLayoutInterp[0];
		layout = formatLayoutInterp[1];
		interpretation = formatLayoutInterp[2];

		String oaiConfigurationProfile = context.getConfiguration().get("oaiConfiguration");
		System.out.println("oaiConfiguration:\n" + IndentXmlString.apply(oaiConfigurationProfile));
		oaiConfigurationReader = new OAIConfigurationStringReader(oaiConfigurationProfile);
		oaiConfiguration = oaiConfigurationReader.getOaiConfiguration();

		System.out.println("parsed configuration:" + oaiConfiguration.toString());

		mongo = new MongoClient(host, Integer.parseInt(port));
		MongoDatabase mongoDB = mongo.getDatabase(db);
		//DB mongoDB = mongo.getDB(db);
		collection = mongoDB.getCollection(collectionName, DBObject.class).withWriteConcern(WriteConcern.UNACKNOWLEDGED);
		discardedCollection = mongoDB.getCollection("discarded-" + collectionName, DBObject.class).withWriteConcern(WriteConcern.UNACKNOWLEDGED);
		mongoSetCollection = new MongoSetCollection(mongo);

		duplicateXPath = context.getConfiguration().get("services.publisher.oai.duplicateXPath");
		skipDuplicates = Boolean.parseBoolean(context.getConfiguration().get("services.publisher.oai.skipDuplicates"));

		enrichmentXPaths = oaiConfiguration.getEnrichmentXPathsFor(format, layout, interpretation);
		Collection<PublisherField> indexFields = oaiConfiguration.getFieldsFor(format, layout, interpretation);
		extractor = new RecordFieldsExtractor(Lists.newArrayList(indexFields));
		extractor.setDuplicateXPath(duplicateXPath);
		extractor.setSkipDuplicates(skipDuplicates);

		for (PublisherField field : indexFields) {
			fieldsToIndex.put(field.getFieldName(), field);
		}

		String feedDateString = context.getConfiguration().get(JobParams.OAI_FEED_DATE);
		try {
			feedDate = DateUtils.parseDate(feedDateString, new String[]{"yyyy-MM-dd\'T\'hh:mm:ss\'Z\'"});
		} catch (ParseException e) {
			e.printStackTrace(System.err);
			throw new RuntimeException(e);
		}

	}

	@Override
	protected void map(final Text key, final Text value, final Context context) throws IOException, InterruptedException {

		String recordKey = key.toString();
		Type entityType = OafRowKeyDecoder.decode(recordKey).getType();
		switch (entityType) {
		case person:
			context.getCounter("oai", "discardedPerson").increment(1);
			break;
		default:
			String recordBody = value.toString();
			if (StringUtils.isBlank(recordBody)) {
				discard(context, recordKey, recordBody, "blank body");
			} else {
				Multimap<String, String> recordFields = extractor.extractFields(recordBody, enrichmentXPaths);
				String id;
				String oaiID;
				if (checkRecordFields(recordFields, context, recordKey, recordBody)) {
					id = recordFields.get(OAIConfigurationReader.ID_FIELD).iterator().next();
					oaiID = getOAIIdentifier(id);
					handleRecord(context, oaiID, recordBody, recordFields);
				}
			}
		}
	}

	public boolean checkRecordFields(final Multimap<String, String> recordFields, final Context context, final String recordKey, final String recordBody) {
		if ((recordFields == null)) {
			context.getCounter("oai", "invalid").increment(1);
			return false;
		}
		if (recordFields.containsEntry("duplicate", "true")) {
			if (skipDuplicates) {
				context.getCounter("oai", "discardedDuplicate").increment(1);
				return false;
			} else return true;
		}
		if (!recordFields.containsKey(OAIConfigurationReader.ID_FIELD)) {
			discard(context, recordKey, recordBody, "missing " + OAIConfigurationReader.ID_FIELD);
			return false;
		}
		return true;
	}

	private void handleRecord(final Context context, final String oaiID, final String record, final Multimap<String, String> recordProperties) {
		DBObject obj = this.createBasicObject(oaiID, record, recordProperties,context);
		if (obj != null) { // it can be null if the compression did not succeeded: counter is updated in the compress method in that case
			obj.put(OAIConfigurationReader.LAST_COLLECTION_DATE_FIELD, feedDate);
			obj.put(OAIConfigurationReader.DATESTAMP_FIELD, feedDate);
			obj.put(OAIConfigurationReader.UPDATED_FIELD, false);
			collection.insertOne(obj);
			context.getCounter("oai", "total").increment(1);
		}
	}


	private void discard(final Context context, final String recordKey, final String recordBody, final String reason) {
		context.getCounter("oai", reason).increment(1);
		discardedCollection.insertOne(new BasicDBObject("id", recordKey).append(OAIConfigurationReader.BODY_FIELD, recordBody));
	}

	private String getOAIIdentifier(final String id) {
		return oaiConfiguration.getIdScheme() + ":" + oaiConfiguration.getIdNamespace() + ":" + id;
	}

	protected DBObject createBasicObject(final String oaiID, final String record, final Multimap<String, String> recordProperties, final Context context) {
		DBObject obj = new BasicDBObject();
		for (final String key : recordProperties.keySet()) {
			if (key.equals(OAIConfigurationReader.ID_FIELD)) {
				obj.put(key, oaiID);
			} else {
				Collection<String> values = recordProperties.get(key);
				if (key.equals(OAIConfigurationReader.SET_FIELD)) {

					Iterable<String> setSpecs = Iterables.transform(values, new Function<String, String>() {

						@Override
						public String apply(final String s) {
							return mongoSetCollection.normalizeSetSpec(s);
						}

					});
					obj.put(key, setSpecs);
				} else {
					PublisherField keyField = fieldsToIndex.get(key);
					if (keyField == null) {
						context.getCounter("oai", key + " found for record but not in configuration. Assuming it is repeatable.").increment(1);
					}
					// let's check if the key is the name of a repeatable field or not
					if ((keyField != null) && !keyField.isRepeatable()) {
						if ((values != null) && !values.isEmpty()) {
							obj.put(key, values.iterator().next());
						}
					} else {
						obj.put(key, values);
					}
				}
			}
		}

		Binary compressedRecordBody = createCompressRecord(context, oaiID, record);
		if (compressedRecordBody != null) {
			obj.put(OAIConfigurationReader.BODY_FIELD, compressedRecordBody);
			obj.put(OAIConfigurationReader.DELETED_FIELD, false);
			return obj;
		} else return null;
	}

	public Binary createCompressRecord(final Context context, final String recordKey, final String recordBody) {
		try {
			ByteArrayOutputStream os = new ByteArrayOutputStream();
			ZipOutputStream zos = new ZipOutputStream(os);
			ZipEntry entry = new ZipEntry(OAIConfigurationReader.BODY_FIELD);
			zos.putNextEntry(entry);
			zos.write(recordBody.getBytes());
			zos.closeEntry();
			zos.flush();
			zos.close();
			return new Binary(os.toByteArray());
		} catch (IOException e) {
			discard(context, recordKey, recordBody, "cannot compress");
			return null;
		}
	}

	@Override
	protected void cleanup(final Context context) throws IOException, InterruptedException {

		super.cleanup(context);
	}

	public MongoCollection<DBObject> getCollection() {
		return collection;
	}

	public void setCollection(final MongoCollection<DBObject> collection) {
		this.collection = collection;
	}

	public MongoCollection<DBObject> getDiscardedCollection() {
		return discardedCollection;
	}

	public void setDiscardedCollection(final MongoCollection<DBObject> discardedCollection) {
		this.discardedCollection = discardedCollection;
	}

	public OAIConfigurationStringReader getOaiConfigurationReader() {
		return oaiConfigurationReader;
	}

	public void setOaiConfigurationReader(final OAIConfigurationStringReader oaiConfigurationReader) {
		this.oaiConfigurationReader = oaiConfigurationReader;
	}

	public OAIConfiguration getOaiConfiguration() {
		return oaiConfiguration;
	}

	public void setOaiConfiguration(final OAIConfiguration oaiConfiguration) {
		this.oaiConfiguration = oaiConfiguration;
	}

	public Date getFeedDate() {
		return feedDate;
	}

	public void setFeedDate(final Date feedDate) {
		this.feedDate = feedDate;
	}

	public MongoSetCollection getMongoSetCollection() {
		return mongoSetCollection;
	}

	public void setMongoSetCollection(final MongoSetCollection mongoSetCollection) {
		this.mongoSetCollection = mongoSetCollection;
	}

	public String getDuplicateXPath() {
		return duplicateXPath;
	}

	public void setDuplicateXPath(final String duplicateXPath) {
		this.duplicateXPath = duplicateXPath;
	}

	public boolean isSkipDuplicates() {
		return skipDuplicates;
	}

	public void setSkipDuplicates(final boolean skipDuplicates) {
		this.skipDuplicates = skipDuplicates;
	}

}
