package eu.dnetlib.data.mapreduce.hbase.dataimport;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.Inflater;

import com.google.common.collect.Lists;
import com.google.gson.Gson;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import eu.dnetlib.actionmanager.actions.ActionFactory;
import eu.dnetlib.actionmanager.actions.AtomicAction;
import eu.dnetlib.actionmanager.common.Agent;
import eu.dnetlib.data.mapreduce.hbase.Reporter;
import eu.dnetlib.data.mapreduce.util.StreamUtils;
import eu.dnetlib.data.proto.*;
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
import eu.dnetlib.miscutils.collections.Pair;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;

import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.*;
import static eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization;

public class DOIBoostToActions {

	public static final String MAG = "MAG";
	public static final String ORCID = "ORCID";
	public static final String CROSSREF = "Crossref";
	public static final String UNPAYWALL = "UnpayWall";

	public static final String GRID_AC = "grid.ac";
	public static final String WIKPEDIA = "wikpedia";

	public final static String doiBoostNSPREFIX = "doiboost____";
	public static final String OPENAIRE_PREFIX = "openaire____";

	public static final String SEPARATOR = "::";
	public static final String DNET_LANGUAGES = "dnet:languages";

	private static final List<String> DATE_TYPES = Lists.newArrayList("issued", "accepted", "published-online", "published-print");



	private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {{
		put(MAG.toLowerCase(), new Pair<>("Microsoft Academic Graph", OPENAIRE_PREFIX + SEPARATOR + "microsoft"));
		put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
		put(CROSSREF.toLowerCase(), new Pair<>(CROSSREF, OPENAIRE_PREFIX + SEPARATOR + "crossref"));
		put(UNPAYWALL.toLowerCase(), new Pair<>(UNPAYWALL, OPENAIRE_PREFIX + SEPARATOR + "unpaywall"));

	}};

	private static String decompressAbstract(final String abstractCompressed) {
		try {
			byte[] byteArray = Base64.decodeBase64(abstractCompressed.getBytes());
			final Inflater decompresser = new Inflater();
			decompresser.setInput(byteArray);
			final ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length);
			byte[] buffer = new byte[8192];
			while (!decompresser.finished()) {
				int size = decompresser.inflate(buffer);
				bos.write(buffer, 0, size);
			}
			byte[] unzippeddata = bos.toByteArray();
			decompresser.end();
			return new String(unzippeddata);
		} catch (Throwable e) {
			System.out.println("Wrong abstract:" + abstractCompressed);
			throw new RuntimeException(e);
		}
	}

	public static final String PID_TYPES = "dnet:pid_types";
	private static Map<String, FieldTypeProtos.Qualifier> affiliationPIDType = new HashMap<String, FieldTypeProtos.Qualifier>() {{
		put(MAG, FieldTypeProtos.Qualifier.newBuilder().setClassid("mag_id").setClassname("Microsoft Academic Graph Identifier").setSchemename(PID_TYPES)
				.setSchemeid(PID_TYPES).build());
		put(GRID_AC, getQualifier("grid", PID_TYPES));
		put(WIKPEDIA, getQualifier("urn", PID_TYPES));
	}};

	static Map<String, Map<String, String>> typologiesMapping;

	static {
		try {
			final InputStream is = DOIBoostToActions.class.getResourceAsStream("/eu/dnetlib/data/mapreduce/hbase/dataimport/mapping_typologies.json");
			final String tt = IOUtils.toString(is);
			typologiesMapping = new Gson().fromJson(tt, Map.class);
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	protected static boolean isValid(final JsonObject rootElement, final Reporter context) {

		final String doi = getStringValue(rootElement, "doi");
		if (doi == null) {
			context.incrementCounter("filtered", "no_doi", 1);
			return false;
		}
		final String type = getStringValue(rootElement, "type");
		if (!typologiesMapping.containsKey(type)) {
			context.incrementCounter("filtered", "unknowntype_" + type, 1);
			return false;
		}
		// fixes #4360 (test publisher)
		final String publisher = getStringValue(rootElement, "publisher");
		if (StringUtils.isNotBlank(publisher) && (publisher.equalsIgnoreCase("Test accounts") || publisher.equalsIgnoreCase("CrossRef Test Account"))) {
			context.incrementCounter("filtered", "test_publisher", 1);
			return false;
		}

		List<JsonObject> authors = getArrayObjects(rootElement, "authors");
		boolean hasAuthors = false;
		for (JsonObject author : authors) {
			final String given = getStringValue(author, "given");
			final String family = getStringValue(author, "family");
			String fullname = getStringValue(author, "fullname");
			if (StringUtils.isBlank(fullname) && StringUtils.isNotBlank(given) && StringUtils.isNotBlank(family)) {
				fullname = String.format("%s %s", given, family);
			}
			// fixes #4368
			if (fullname.equalsIgnoreCase("Addie Jackson") && publisher.equalsIgnoreCase("Elsevier BV")) {
				context.incrementCounter("invalid_author", "addiejackson", 1);
				context.incrementCounter("filtered", "invalid_authors", 1);
				return false;
			}
			if (isValidAuthorName(fullname, context)) hasAuthors = true;
		}

		if (!hasAuthors) {
			context.incrementCounter("filtered", "invalid_authors", 1);
			return false;
		}
		// fixes #4360
		if (getCleanedTitles(rootElement).isEmpty()) {
			context.incrementCounter("filtered", "invalid_title", 1);
			return false;
		}

		return true;
	}

	private static List<String> getCleanedTitles(final JsonObject rootElement) {
		List<String> titles = getArrayValues(rootElement, "title");
		return titles.stream().filter(t -> StringUtils.isNotBlank(t) && !t.equalsIgnoreCase("[NO TITLE AVAILABLE]")).collect(Collectors.toList());
	}

	private static boolean isValidAuthorName(final String fullName, final Reporter context) {
		if (StringUtils.isBlank(fullName)) {
			if(context != null) context.incrementCounter("invalid_author", "blank", 1);
			return false;
		}
		// fixes #4391 and subtasks related to DOIBoost
		switch (StringUtils.lowerCase(fullName)) {
		case ",":
		case "none none":
		case "none, none":
		case "none &na;":
		case "(:null)":
		case "test test test":
		case "test test":
		case "test":
		case "&na; &na;": {
			if(context != null) context.incrementCounter("invalid_author", "value_" + fullName, 1);
			return false;
			}
		}
		return true;
	}

	public static List<AtomicAction> generatePublicationActionsFromDump(final JsonObject rootElement,
			final ActionFactory factory,
			final String setName,
			final Agent agent,
			boolean invisible,
			final boolean onlyOrganization,
			final Reporter context) {

		if (!isValid(rootElement, context)) return null;

		//Create OAF Proto

		final OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
		//Add Data Info
		oaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
				.setInvisible(invisible)
				.setDeletedbyinference(false)
				.setInferred(false)
				.setTrust("0.9")
				.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
				.build());

		//Adding Kind
		oaf.setKind(KindProtos.Kind.entity);

		//creating Result Proto
		final OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder().setType(TypeProtos.Type.result);

		entity.setDateofcollection("2019-02-15");

		if (rootElement.has("collectedFrom") && rootElement.get("collectedFrom").isJsonArray()) {
			StreamUtils.toStream(rootElement.getAsJsonArray("collectedFrom").iterator())
					.map(JsonElement::getAsString)
					.forEach(cf -> {
								final String id = datasources.get(cf.toLowerCase()).getValue();
								final String name = datasources.get(cf.toLowerCase()).getKey();
								if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
									final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
											.setValue(name)
											.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
											.build();
									entity.addCollectedfrom(collectedFrom);
								}
							}
					);
		}
		//Adding identifier
		final String doi = getStringValue(rootElement, "doi");
		entity.addOriginalId(doi);

		final String sourceId = String.format("50|%s" + SEPARATOR + "%s", doiBoostNSPREFIX, AbstractDNetXsltFunctions.md5(doi));
		entity.setId(sourceId);

		entity.addPid(FieldTypeProtos.StructuredProperty.newBuilder()
				.setValue(doi)
				.setQualifier(getQualifier("doi", PID_TYPES))
				.build());

		//Create Result Field
		ResultProtos.Result.Builder result = ResultProtos.Result.newBuilder();

		final String type = getStringValue(rootElement, "type");

		//Adding Instances
		final String typeValue = typologiesMapping.get(type).get("value");
		final String cobjValue = typologiesMapping.get(type).get("cobj");

		// TODO: workaround for #4362: remove it when UnpayWall is correctly mapped
		List<JsonObject> unpaywallLicenses = getArrayObjects(rootElement, "license").stream().filter(prov -> {
			String provS = getStringValue(prov, "provenance");
			if (StringUtils.isNotBlank(provS) && provS.equalsIgnoreCase(UNPAYWALL)) return true;
			else return false;
		}).collect(Collectors.toList());

		Stream.concat(unpaywallLicenses.stream(), getArrayObjects(rootElement, "instances").stream()).map(it ->
		{
			ResultProtos.Result.Instance.Builder instance = ResultProtos.Result.Instance.newBuilder();
			instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder()
					.setClassid(cobjValue)
					.setClassname(typeValue)
					.setSchemeid("dnet:publication_resource")
					.setSchemename("dnet:publication_resource")
					.build());
			instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
					.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c")
					.setValue("Unknown Repository")
					.build());

			String acc_class_id = it.get("access-rights").getAsString();
			String acc_class_value;
			switch (acc_class_id) {
			case "OPEN": {
				acc_class_value = "Open Access";
				break;
			}
			case "CLOSED":
			case "RESTRICTED": {
				//acc_class_value = "Closed Access";
				//4362#note-3
				acc_class_id = "RESTRICTED";
				acc_class_value = "Restricted";
				break;
			}
			case "EMBARGO":
				acc_class_value = "Embargo";
				break;
			default: {
				acc_class_value = "not available";
				acc_class_id = "UNKNOWN";
			}

			}

			instance.addUrl(it.get("url").getAsString());
			instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
					.setClassid(acc_class_id)
					.setClassname(acc_class_value)
					.setSchemeid("dnet:access_modes")
					.setSchemename("dnet:access_modes")
					.build());

			final String id = datasources.get(it.get("provenance").getAsString().toLowerCase()).getValue();
			final String name = datasources.get(it.get("provenance").getAsString().toLowerCase()).getKey();
			if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
				final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
						.setValue(name)
						.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
						.build();

				instance.setCollectedfrom(collectedFrom);
			}

			return instance.build();
		}).forEach(result::addInstance);

		//Adding DOI URL as  Instance
		final String doiURL = getStringValue(rootElement, "doi-url");
		JsonObject hostedByOpenAire = null;
		if (rootElement.has("hostedByOpenAire")) {
			hostedByOpenAire = rootElement.getAsJsonObject("hostedByOpenAire");
		}
		final String publisher = getStringValue(rootElement, "publisher");
		if (StringUtils.isNotBlank(doiURL)) {
			final ResultProtos.Result.Instance.Builder instance = ResultProtos.Result.Instance.newBuilder();
			instance.addUrl(doiURL);
			instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder()
					.setClassid(cobjValue)
					.setClassname(typeValue)
					.setSchemeid("dnet:publication_resource")
					.setSchemename("dnet:publication_resource")
					.build());

			//#4362: if the publisher is Scielo, then the result is OPEN

			String accessModeId = "RESTRICTED";
			String accessModeName = "Restricted";
			if(publisher != null && publisher.equalsIgnoreCase("FapUNIFESP (SciELO)")){
				accessModeId = "OPEN";
				accessModeName = "Open Access";
			}
			instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
					.setClassid(accessModeId)
					.setClassname(accessModeName)
					.setSchemeid("dnet:access_modes")
					.setSchemename("dnet:access_modes")
					.build());
			instance.setCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
					.setValue(CROSSREF)
					.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5("crossref"))
					.build());

			if (hostedByOpenAire == null)
				instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
						.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c")
						.setValue("Unknown Repository")
						.build());
			else {
				instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
						.setKey(AbstractDNetXsltFunctions.oafSplitId("datasource", hostedByOpenAire.get("id").getAsString()))
						.setValue(hostedByOpenAire.get("name").getAsString())
						.build());
			}

			result.addInstance(instance);
		}

		//Create Metadata Proto
		final ResultProtos.Result.Metadata.Builder metadata = ResultProtos.Result.Metadata.newBuilder();

		Pair<List<FieldTypeProtos.Author>, Collection<OafProtos.Oaf>> authorsOrganizations = createAuthorsOrganization(rootElement);

		if (authorsOrganizations.getKey().size() > 0) {
			metadata.addAllAuthor(authorsOrganizations.getKey());
		} else {
			//Should never enter here becasue of the isValid method at the beginning.
			context.incrementCounter("filtered", "unexpected_no_authors", 1);
			return null;
		}
		//adding Language
		metadata.setLanguage(FieldTypeProtos.Qualifier.newBuilder()
				.setClassid("und")
				.setClassname("Undetermined")
				.setSchemeid(DNET_LANGUAGES)
				.setSchemename(DNET_LANGUAGES)
				.build());

		//Adding subjects
		List<String> subjects = getArrayValues(rootElement, "subject");

		subjects.forEach(s -> metadata.addSubject(FieldTypeProtos.StructuredProperty.newBuilder()
				.setValue(s)
				.setQualifier(getQualifier("keyword", "dnet:subject"))
				.build()));

		List<String> titles = getCleanedTitles(rootElement);
		titles.forEach(t ->
				metadata.addTitle(FieldTypeProtos.StructuredProperty.newBuilder()
						.setValue(t)
						.setQualifier(getQualifier("main title", "dnet:dataCite_title"))
						.build()));


		final String firstValidDate = getFirstValidDate(rootElement);
		if (StringUtils.isNotBlank(firstValidDate)) {
			setDate(metadata, "issued", firstValidDate, true);
		} else {
			context.incrementCounter("filtered", "missing_date", 1);
			return null;
		}
		settingRelevantDate(rootElement, metadata, "accepted", "accepted", false);
		settingRelevantDate(rootElement, metadata, "published-online", "published-online", false);
		settingRelevantDate(rootElement, metadata, "published-print", "published-print", false);

		getArrayObjects(rootElement, "abstract").forEach(d ->
				{
					if (MAG.equals(d.get("provenance").getAsString()) && d.get("value")!= null && !d.get("value").isJsonNull())
						metadata.addDescription(FieldTypeProtos.StringField.newBuilder().setValue(decompressAbstract(d.get("value").getAsString())).build());
					else if (d.get("value")!= null && !d.get("value").isJsonNull())
						metadata.addDescription(FieldTypeProtos.StringField.newBuilder().setValue(d.get("value").getAsString()).build());
				}
		);

		//Adding Journal and publisher
		//TODO: name of the journal is not the publisher: this needs to be fixed on DOIBoost side

		if (StringUtils.isNotBlank(publisher)) {
			metadata.setPublisher(FieldTypeProtos.StringField.newBuilder().setValue(publisher).build());
			final FieldTypeProtos.Journal.Builder journal = FieldTypeProtos.Journal.newBuilder().setName(publisher);

			if (hasJSONArrayField(rootElement, "issn")) {
				StreamUtils.toStream(rootElement.getAsJsonArray("issn").iterator())
						.map(JsonElement::getAsJsonObject)
						.forEach(it -> {
							final String issntype = getStringValue(it, "type");
							final String value = getStringValue(it, "value");
							if ("electronic".equals(issntype)) {
								journal.setIssnOnline(value);
							}
							if ("print".equals(issntype))
								journal.setIssnPrinted(value);
						});
			}
			metadata.setJournal(journal.build());
		}
		metadata.setResulttype(getQualifier(getDefaultResulttype(cobjValue), "dnet:result_typologies"));
		result.setMetadata(metadata.build());
		entity.setResult(result.build());
		oaf.setEntity(entity.build());

		//System.out.println(JsonFormat.printToString(oaf.build()));

		final List<AtomicAction> actionList = new ArrayList<>();

		if (!onlyOrganization)
			actionList.add(factory.createAtomicAction(setName, agent, oaf.getEntity().getId(), "result", "body", oaf.build().toByteArray()));

		if (!authorsOrganizations.getValue().isEmpty()) {

			authorsOrganizations.getValue().forEach(o ->
			{

				actionList.add(factory.createAtomicAction(setName, agent, o.getEntity().getId(), "organization", "body", o.toByteArray()));
				if (!onlyOrganization)
					actionList.addAll(createPublicationOrganizationRelation(oaf.build(), o, factory, setName, agent));
				final String gridOrganization = getSimilarGridOrganization(o.getEntity());
				if (gridOrganization != null) {
					actionList.add(factory
							.createAtomicAction(setName, agent, o.getEntity().getId(), "organizationOrganization_dedupSimilarity_isSimilarTo", gridOrganization,
									"".getBytes()));
					actionList.add(factory
							.createAtomicAction(setName, agent, gridOrganization, "organizationOrganization_dedupSimilarity_isSimilarTo", o.getEntity().getId(),
									"".getBytes()));
				}
			});
		}
		return actionList;

	}

	private static String getSimilarGridOrganization(final OafProtos.OafEntity organization) {

		final List<FieldTypeProtos.StructuredProperty> pidList = organization.getPidList();
		if (pidList != null) {
			for (FieldTypeProtos.StructuredProperty p : pidList) {
				if (p.getQualifier().getClassname().equals("grid")) {
					return "20|grid________" + SEPARATOR + AbstractDNetXsltFunctions.md5(p.getValue());
				}
			}
		}
		return null;

	}

	private static List<AtomicAction> createPublicationOrganizationRelation(final OafProtos.Oaf publication,
			final OafProtos.Oaf organization,
			final ActionFactory factory,
			final String setName,
			final Agent agent) {

		List<AtomicAction> result = new ArrayList<>();

		final OafProtos.Oaf.Builder roaf = OafProtos.Oaf.newBuilder();
		roaf.setKind(KindProtos.Kind.relation);

		roaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
				.setInvisible(false)
				.setDeletedbyinference(false)
				.setInferred(false)
				.setTrust("0.9")
				.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
				.build());

		final OafProtos.OafRel.Builder rel = OafProtos.OafRel.newBuilder();

		rel.setRelType(RelTypeProtos.RelType.resultOrganization);
		rel.setSubRelType(RelTypeProtos.SubRelType.affiliation);

		//Create a relation Result --> Organization
		rel.setSource(publication.getEntity().getId());
		rel.setTarget(organization.getEntity().getId());
		rel.setRelClass(ResultOrganization.Affiliation.RelName.hasAuthorInstitution.toString());

		final ResultOrganization.Builder rel_instance = ResultOrganization.newBuilder();

		final ResultOrganization.Affiliation.Builder affiliationRel = ResultOrganization.Affiliation.newBuilder();
		affiliationRel.setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
				.setSemantics(getQualifier("hasAuthorInstitution", "dnet:result_organization_relations"))
				.build());
		rel_instance.setAffiliation(affiliationRel.build());
		rel.setResultOrganization(rel_instance.build());

		rel.addCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
				.setValue(datasources.get(MAG.toLowerCase()).getKey())
				.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions
						.md5(StringUtils.substringAfter(datasources.get(MAG.toLowerCase()).getValue(), SEPARATOR)))
				.build());

		rel.setChild(false);
		roaf.setRel(rel.build());

		result.add(factory.createAtomicAction(setName, agent, publication.getEntity().getId(), "resultOrganization_affiliation_hasAuthorInstitution",
				organization.getEntity().getId(), roaf.build().toByteArray()));

		//Create a relation Organization --> Result
		rel.setTarget(publication.getEntity().getId());
		rel.setSource(organization.getEntity().getId());
		rel.setRelClass(ResultOrganization.Affiliation.RelName.isAuthorInstitutionOf.toString());

		affiliationRel.setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
				.setSemantics(getQualifier("isAuthorInstitutionOf", "dnet:result_organization_relations"))
				.build());
		rel_instance.setAffiliation(affiliationRel.build());
		rel.setResultOrganization(rel_instance.build());
		roaf.setRel(rel.build());
		result.add(factory.createAtomicAction(setName, agent, organization.getEntity().getId(), "resultOrganization_affiliation_isAuthorInstitutionOf",
				publication.getEntity().getId(), roaf.build().toByteArray()));

		return result;

	}

	private static boolean hasJSONArrayField(final JsonObject root, final String key) {
		return root.has(key) && root.get(key).isJsonArray();
	}

	private static String getFirstValidDate(final JsonObject root) {
		return DATE_TYPES.stream()
			.map(type -> getStringValue(root, type))
			.filter(Objects::nonNull)
			.filter(DumpToActionsUtility::isValidDate)
			.findFirst()
			.orElse("");
	}

	private static void setDate(ResultProtos.Result.Metadata.Builder metadata,
											final String dictionaryKey,
											final String date,
											final boolean addToDateOfAcceptance) {
		if (date == null)
			return;
		if (addToDateOfAcceptance) {
			metadata.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(date).build());
		} else {
			metadata.addRelevantdate(FieldTypeProtos.StructuredProperty.newBuilder()
					.setValue(date)
					.setQualifier(getQualifier(dictionaryKey, "dnet:dataCite_date"))
					.build());
		}
	}

	private static void settingRelevantDate(JsonObject rootElement,
			ResultProtos.Result.Metadata.Builder metadata,
			final String jsonKey,
			final String dictionaryKey,
			final boolean addToDateOfAcceptance) {
		//Adding date
		String date = getStringValue(rootElement, jsonKey);
		if (date == null)
			return;
		if (date.length() == 4) {
			date += "-01-01";
		}
		if (isValidDate(date)) {
			if (addToDateOfAcceptance)
				metadata.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(date).build());
			metadata.addRelevantdate(FieldTypeProtos.StructuredProperty.newBuilder()
					.setValue(date)
					.setQualifier(getQualifier(dictionaryKey, "dnet:dataCite_date"))
					.build());
		}
	}

	public static FieldTypeProtos.KeyValue extractIdentifier(final String value) {
		FieldTypeProtos.KeyValue.Builder pid = FieldTypeProtos.KeyValue.newBuilder();
		if (StringUtils.contains(value, "orcid.org")) {
			return pid.setValue(value.replaceAll("https://orcid.org/", "").replaceAll("http://orcid.org/",""))
					.setKey(ORCID).build();
		}
		if (StringUtils.contains(value, "academic.microsoft.com/#/detail")) {
			return pid.setValue(value.replaceAll("https://academic.microsoft.com/#/detail/", ""))
					.setKey("MAG Identifier").build();
		}
		return pid.setValue(value)
				.setKey("URL").build();
	}

	public static OafProtos.Oaf createOrganizationFromJSON(final JsonObject affiliation) {
		final Map<String, FieldTypeProtos.Qualifier> affiliationIdentifiers = new HashMap<>();
		final List<String> magId = new ArrayList<>();
		getArrayObjects(affiliation, "identifiers").forEach(it -> {
			if (StringUtils.contains(it.get("value").getAsString(), "academic.microsoft.com")) {
				affiliationIdentifiers.put(it.get("value").getAsString(), affiliationPIDType.get(MAG));
				magId.add(it.get("value").getAsString());
			} else
				affiliationIdentifiers.put(it.get("value").getAsString(), affiliationPIDType.get(it.get("schema").getAsString()));
		});
		if (magId.size() > 0) {
			final String microsoftID = magId.get(0);
			OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
			oaf.setKind(KindProtos.Kind.entity);
			OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder();
			entity.setType(TypeProtos.Type.organization);
			entity.setId("20|microsoft___" + SEPARATOR + AbstractDNetXsltFunctions.md5(microsoftID));
			final String id = datasources.get(affiliation.get("provenance").getAsString().toLowerCase()).getValue();
			final String name = datasources.get(affiliation.get("provenance").getAsString().toLowerCase()).getKey();
			if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
				final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
						.setValue(name)
						.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
						.build();
				entity.addCollectedfrom(collectedFrom);
			} else {
				return null;
			}
			entity.addOriginalId(microsoftID);

			affiliationIdentifiers.forEach((key, value) -> entity.addPid(
					FieldTypeProtos.StructuredProperty.newBuilder()
							.setQualifier(value)
							.setValue(key)
							.build()));

			final OrganizationProtos.Organization.Builder organization = OrganizationProtos.Organization.newBuilder();
			organization.setMetadata(OrganizationProtos.Organization.Metadata.newBuilder()
					.setWebsiteurl(FieldTypeProtos.StringField.newBuilder().setValue(affiliation.get("official-page").getAsString()).build())
					.setLegalname(FieldTypeProtos.StringField.newBuilder().setValue(affiliation.get("value").getAsString()).build())
					.build());

			entity.setOrganization(organization);
			oaf.setEntity(entity);
			oaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
					.setInvisible(false)
					.setDeletedbyinference(false)
					.setInferred(false)
					.setTrust("0.9")
					.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
					.build());
			return oaf.build();
		}
		return null;
	}

	public static Pair<List<FieldTypeProtos.Author>, Collection<OafProtos.Oaf>> createAuthorsOrganization(final JsonObject root) {

		final Map<String, OafProtos.Oaf> affiliations = new HashMap<>();

		List<JsonObject> authors = getArrayObjects(root, "authors");

		final AtomicInteger counter = new AtomicInteger(1);

		List<FieldTypeProtos.Author> collect = authors.stream().map(author -> {
			final String given = getStringValue(author, "given");
			final String family = getStringValue(author, "family");
			String fullname = getStringValue(author, "fullname");

			if (StringUtils.isBlank(fullname) && StringUtils.isNotBlank(given) && StringUtils.isNotBlank(family)) {
				fullname = String.format("%s %s", given, family);
			}

			if (!isValidAuthorName(fullname, null)) {
				return null;
			}
			final FieldTypeProtos.Author.Builder abuilder = FieldTypeProtos.Author.newBuilder();

			if (StringUtils.isNotBlank(given))
				abuilder.setName(given);
			if (StringUtils.isNotBlank(family))
				abuilder.setSurname(family);
			if (StringUtils.isNotBlank(fullname))
				abuilder.setFullname(fullname);

			final List<JsonObject> identifiers = getArrayObjects(author, "identifiers");
			final List<JsonObject> authorAffiliation = getArrayObjects(author, "affiliations");

			authorAffiliation.forEach(it ->
			{
				OafProtos.Oaf org = createOrganizationFromJSON(it);
				if (org != null) {
					affiliations.put(org.getEntity().getId(), org);
					abuilder.addAffiliation(org.getEntity().getOrganization().getMetadata().getLegalname());
				}
			});
			identifiers.stream().map(id -> {
				final String value = id.get("value").getAsString();
				return extractIdentifier(value);
			}).collect(
					Collectors.toMap(
							FieldTypeProtos.KeyValue::getKey,
							Function.identity(),
							(a, b) -> a
					)).values().forEach(abuilder::addPid);
			abuilder.setRank(counter.getAndIncrement());

			return abuilder.build();

		}).filter(Objects::nonNull).collect(Collectors.toList());

		return new Pair<>(collect, affiliations.values());
	}

}