package eu.dnetlib.data.transform.xml;

import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang.StringUtils;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import com.google.common.base.Predicate;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.protobuf.Descriptors.Descriptor;
import com.google.protobuf.Descriptors.FieldDescriptor;
import com.google.protobuf.Message;
import com.google.protobuf.Message.Builder;

import eu.dnetlib.data.proto.DataInfoProtos.DataInfo;
import eu.dnetlib.data.proto.KeyValueProtos.KeyValue;
import eu.dnetlib.data.proto.KindProtos.Kind;
import eu.dnetlib.data.proto.OafProtos.Oaf;
import eu.dnetlib.data.proto.OafProtos.OafEntity;
import eu.dnetlib.data.proto.OafProtos.OafRel;
import eu.dnetlib.data.proto.QualifierProtos.Qualifier;
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
import eu.dnetlib.data.proto.StructuredPropertyProtos.StructuredProperty;
import eu.dnetlib.data.proto.TypeProtos.Type;
import eu.dnetlib.miscutils.collections.Pair;
import eu.dnetlib.miscutils.iterators.IterablePair;

public abstract class AbstractDNetOafXsltFunctions {

	private static final int MAX_NSPREFIX_LEN = 12;
	public static final String URL_REGEX = "^(http|https|ftp)\\://.*";
	protected static Map<String, String> code2name = Maps.newHashMap();

	public static Predicate<String> urlFilter = new Predicate<String>() {

		@Override
		public boolean apply(final String s) {
			return s.trim().matches(URL_REGEX);
		}
	};

	static {
		code2name.put("AF", "AFGHANISTAN");
		code2name.put("CX", "CHRISTMAS ISLAND");
		code2name.put("CC", "COCOS (KEELING) ISLANDS");
		code2name.put("aka", "Akan");
		code2name.put("CO", "Colombia");
		code2name.put("result", "result");
		code2name.put("AX", "ÅLAND ISLANDS");
		code2name.put("AS", "AMERICAN SAMOA");
		code2name.put("AD", "ANDORRA");
		code2name.put("AO", "ANGOLA");
		code2name.put("AI", "ANGUILLA");
		code2name.put("AQ", "ANTARCTICA");
		code2name.put("AG", "ANTIGUA AND BARBUDA");
		code2name.put("AW", "ARUBA");
		code2name.put("BS", "BAHAMAS");
		code2name.put("BB", "BARBADOS");
		code2name.put("BZ", "BELIZE");
		code2name.put("BM", "BERMUDA");
		code2name.put("BT", "BHUTAN");
		code2name.put("BQ", "BONAIRE, SINT EUSTATIUS AND SABA");
		code2name.put("BV", "BOUVET ISLAND");
		code2name.put("IO", "BRITISH INDIAN OCEAN TERRITORY");
		code2name.put("KY", "CAYMAN ISLANDS");
		code2name.put("TD", "CHAD");
		code2name.put("BG", "Bulgaria");
		code2name.put("AT", "Austria");
		code2name.put("BE", "Belgium");
		code2name.put("CA", "Canada");
		code2name.put("BJ", "Benin");
		code2name.put("CN", "China (People's Republic of)");
		code2name.put("AU", "Australia");
		code2name.put("BR", "Brazil");
		code2name.put("AR", "Argentina");
		code2name.put("BF", "Burkina Faso");
		code2name.put("AL", "Albania");
		code2name.put("CV", "Cape Verde");
		code2name.put("AZ", "Azerbaijan");
		code2name.put("BA", "Bosnia and Herzegovina");
		code2name.put("AM", "Armenia");
		code2name.put("DZ", "Algeria");
		code2name.put("CM", "Cameroon");
		code2name.put("BD", "Bangladesh");
		code2name.put("KH", "Cambodia");
		code2name.put("CL", "Chile");
		code2name.put("BW", "Botswana");
		code2name.put("BY", "Belarus");
		code2name.put("BO", "Bolivia");
		code2name.put("CF", "Central African Republic");
		code2name.put("BH", "Bahrain");
		code2name.put("BN", "Brunei Darussalam");
		code2name.put("BI", "Burundi");
		code2name.put("KM", "COMOROS");
		code2name.put("CK", "COOK ISLANDS");
		code2name.put("CW", "CURAÇAO");
		code2name.put("DJ", "DJIBOUTI");
		code2name.put("DM", "DOMINICA");
		code2name.put("GQ", "EQUATORIAL GUINEA");
		code2name.put("ER", "ERITREA");
		code2name.put("FK", "FALKLAND ISLANDS (MALVINAS)");
		code2name.put("PF", "FRENCH POLYNESIA");
		code2name.put("TF", "FRENCH SOUTHERN TERRITORIES");
		code2name.put("GI", "GIBRALTAR");
		code2name.put("GR", "GREECE");
		code2name.put("GD", "GRENADA");
		code2name.put("GP", "GUADELOUPE");
		code2name.put("GU", "GUAM");
		code2name.put("GG", "GUERNSEY");
		code2name.put("HM", "HEARD ISLAND AND MCDONALD ISLANDS");
		code2name.put("VA", "HOLY SEE (VATICAN CITY STATE)");
		code2name.put("IQ", "IRAQ");
		code2name.put("IM", "ISLE OF MAN");
		code2name.put("JE", "JERSEY");
		code2name.put("KI", "KIRIBATI");
		code2name.put("KP", "KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF");
		code2name.put("LR", "LIBERIA");
		code2name.put("MQ", "MARTINIQUE");
		code2name.put("MR", "MAURITANIA");
		code2name.put("YT", "MAYOTTE");
		code2name.put("FM", "MICRONESIA, FEDERATED STATES OF");
		code2name.put("MN", "MONGOLIA");
		code2name.put("MS", "MONTSERRAT");
		code2name.put("NR", "NAURU");
		code2name.put("NU", "NIUE");
		code2name.put("NF", "NORFOLK ISLAND");
		code2name.put("MP", "NORTHERN MARIANA ISLANDS");
		code2name.put("PW", "PALAU");
		code2name.put("PY", "PARAGUAY");
		code2name.put("PN", "PITCAIRN");
		code2name.put("PR", "PUERTO RICO");
		code2name.put("RE", "RÉUNION");
		code2name.put("BL", "SAINT BARTHÉLEMY");
		code2name.put("SH", "SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA");
		code2name.put("KN", "SAINT KITTS AND NEVIS");
		code2name.put("LC", "SAINT LUCIA");
		code2name.put("MF", "SAINT MARTIN (FRENCH PART)");
		code2name.put("PM", "SAINT PIERRE AND MIQUELON");
		code2name.put("VC", "SAINT VINCENT AND THE GRENADINES");
		code2name.put("FI", "Finland");
		code2name.put("NO", "Norway");
		code2name.put("CZ", "Czech Republic");
		code2name.put("IL", "Israel");
		code2name.put("EE", "Estonia");
		code2name.put("IT", "Italy");
		code2name.put("RO", "Romania");
		code2name.put("HU", "Hungary");
		code2name.put("NL", "Netherlands");
		code2name.put("FR", "France");
		code2name.put("IS", "Iceland");
		code2name.put("LV", "Latvia");
		code2name.put("PT", "Portugal");
		code2name.put("MT", "Malta");
		code2name.put("DK", "Denmark");
		code2name.put("IE", "Ireland");
		code2name.put("MA", "Morocco");
		code2name.put("IN", "India");
		code2name.put("KR", "Korea (Republic of)");
		code2name.put("MX", "Mexico");
		code2name.put("HT", "Haiti");
		code2name.put("EG", "Egypt");
		code2name.put("LT", "Lithuania");
		code2name.put("HR", "Croatia");
		code2name.put("LU", "Luxembourg");
		code2name.put("PG", "Papua New Guinea");
		code2name.put("GT", "Guatemala");
		code2name.put("ID", "Indonesia");
		code2name.put("NG", "Nigeria");
		code2name.put("NZ", "New Zealand");
		code2name.put("MK", "Former Yugoslav Republic of Macedonia");
		code2name.put("JP", "Japan");
		code2name.put("KZ", "Kazakhstan");
		code2name.put("NE", "Niger");
		code2name.put("ME", "Montenegro");
		code2name.put("GE", "Georgia");
		code2name.put("JO", "Jordan");
		code2name.put("LB", "Lebanon");
		code2name.put("PS", "Palestinian-administered areas");
		code2name.put("CR", "Costa Rica");
		code2name.put("PH", "Philippines");
		code2name.put("KE", "Kenya");
		code2name.put("CI", "Cote d'Ivoire");
		code2name.put("IR", "Iran (Islamic Republic of)");
		code2name.put("NI", "Nicaragua");
		code2name.put("KG", "Kyrgyzstan");
		code2name.put("EC", "Ecuador");
		code2name.put("MY", "Malaysia");
		code2name.put("FO", "Faroe Islands");
		code2name.put("ET", "Ethiopia");
		code2name.put("GH", "Ghana");
		code2name.put("GN", "Guinea");
		code2name.put("RW", "Rwanda");
		code2name.put("MG", "Madagascar");
		code2name.put("PE", "Peru");
		code2name.put("MW", "Malawi");
		code2name.put("JM", "Jamaica");
		code2name.put("HK", "Hong Kong");
		code2name.put("PK", "Pakistan");
		code2name.put("MZ", "Mozambique");
		code2name.put("LS", "Lesotho");
		code2name.put("NA", "Namibia");
		code2name.put("DO", "Dominican Republic");
		code2name.put("HN", "Honduras");
		code2name.put("CD", "Congo (Democratic Republic of)");
		code2name.put("ML", "Mali");
		code2name.put("NP", "Nepal");
		code2name.put("MU", "Mauritius");
		code2name.put("CU", "Cuba");
		code2name.put("PA", "Panama");
		code2name.put("LI", "Liechtenstein");
		code2name.put("GL", "Greenland");
		code2name.put("GA", "Gabon");
		code2name.put("CG", "Congo");
		code2name.put("OM", "Oman");
		code2name.put("KW", "Kuwait");
		code2name.put("QA", "Qatar");
		code2name.put("GY", "Guyana");
		code2name.put("GF", "French Guiana");
		code2name.put("FJ", "Fiji");
		code2name.put("NC", "New Caledonia");
		code2name.put("MM", "Myanmar");
		code2name.put("GW", "Guinea-Bissau");
		code2name.put("WS", "SAMOA");
		code2name.put("ST", "SAO TOME AND PRINCIPE");
		code2name.put("SL", "SIERRA LEONE");
		code2name.put("SX", "SINT MAARTEN (DUTCH PART)");
		code2name.put("SB", "SOLOMON ISLANDS");
		code2name.put("GS", "SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS");
		code2name.put("SS", "SOUTH SUDAN");
		code2name.put("SJ", "SVALBARD AND JAN MAYEN");
		code2name.put("TL", "TIMOR-LESTE");
		code2name.put("TK", "TOKELAU");
		code2name.put("TO", "TONGA");
		code2name.put("TC", "TURKS AND CAICOS ISLANDS");
		code2name.put("TV", "TUVALU");
		code2name.put("GB", "UNITED KINGDOM");
		code2name.put("UM", "UNITED STATES MINOR OUTLYING ISLANDS");
		code2name.put("VU", "VANUATU");
		code2name.put("VI", "VIRGIN ISLANDS, U.S.");
		code2name.put("WF", "WALLIS AND FUTUNA");
		code2name.put("EH", "WESTERN SAHARA");
		code2name.put("EU", "European Union");
		code2name.put("abk", "Abkhazian");
		code2name.put("aar", "Afar");
		code2name.put("afr", "Afrikaans");
		code2name.put("alb/sqi", "Albanian");
		code2name.put("amh", "Amharic");
		code2name.put("ara", "Arabic");
		code2name.put("arg", "Aragonese");
		code2name.put("arm/hye", "Armenian");
		code2name.put("asm", "Assamese");
		code2name.put("ava", "Avaric");
		code2name.put("ave", "Avestan");
		code2name.put("aym", "Aymara");
		code2name.put("aze", "Azerbaijani");
		code2name.put("bam", "Bambara");
		code2name.put("bak", "Bashkir");
		code2name.put("baq/eus", "Basque");
		code2name.put("bel", "Belarusian");
		code2name.put("ben", "Bengali");
		code2name.put("bih", "Bihari");
		code2name.put("bis", "Bislama");
		code2name.put("nob", "Bokmål, Norwegian; Norwegian Bokmål");
		code2name.put("bos", "Bosnian");
		code2name.put("bre", "Breton");
		code2name.put("bul", "Bulgarian");
		code2name.put("bur/mya", "Burmese");
		code2name.put("cat", "Catalan; Valencian");
		code2name.put("cha", "Chamorro");
		code2name.put("che", "Chechen");
		code2name.put("nya", "Chewa; Chichewa; Nyanja");
		code2name.put("chi/zho", "Chinese");
		code2name.put("chu", "Church Slavic; Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic");
		code2name.put("chv", "Chuvash");
		code2name.put("cor", "Cornish");
		code2name.put("cos", "Corsican");
		code2name.put("cre", "Cree");
		code2name.put("scr/hrv", "Croatian");
		code2name.put("cze/ces", "Czech");
		code2name.put("dan", "Danish");
		code2name.put("div", "Divehi");
		code2name.put("dut/nld", "Dutch; Flemish");
		code2name.put("dzo", "Dzongkha");
		code2name.put("eng", "English");
		code2name.put("epo", "Esperanto");
		code2name.put("est", "Estonian");
		code2name.put("ewe", "Ewe");
		code2name.put("fao", "Faroese");
		code2name.put("fij", "Fijian");
		code2name.put("fin", "Finnish");
		code2name.put("fre/fra", "French");
		code2name.put("fry", "Frisian");
		code2name.put("ful", "Fulah");
		code2name.put("gla", "Gaelic; Scottish Gaelic");
		code2name.put("glg", "Galician");
		code2name.put("lug", "Ganda");
		code2name.put("geo/kat", "Georgian");
		code2name.put("ger/deu", "German");
		code2name.put("kik", "Gikuyu; Kikuyu");
		code2name.put("gre/ell", "Greek, Modern (1453-)");
		code2name.put("kal", "Greenlandic; Kalaallisut");
		code2name.put("grn", "Guarani");
		code2name.put("guj", "Gujarati");
		code2name.put("hat", "Haitian; Haitian Creole");
		code2name.put("hau", "Hausa");
		code2name.put("heb", "Hebrew");
		code2name.put("her", "Herero");
		code2name.put("hin", "Hindi");
		code2name.put("hmo", "Hiri Motu");
		code2name.put("hun", "Hungarian");
		code2name.put("ice/isl", "Icelandic");
		code2name.put("ido", "Ido");
		code2name.put("ibo", "Igbo");
		code2name.put("ind", "Indonesian");
		code2name.put("ina", "Auxiliary Language Association)");
		code2name.put("ile", "Interlingue");
		code2name.put("ES", "Spain");
		code2name.put("SK", "Slovakia");
		code2name.put("TR", "Turkey");
		code2name.put("EL", "Greece");
		code2name.put("SE", "Sweden");
		code2name.put("UK", "United Kingdom");
		code2name.put("SI", "Slovenia");
		code2name.put("US", "United States");
		code2name.put("ZA", "South Africa");
		code2name.put("VN", "Viet Nam");
		code2name.put("TH", "Thailand");
		code2name.put("UG", "Uganda");
		code2name.put("TW", "Taiwan");
		code2name.put("RS", "Serbia");
		code2name.put("SC", "Seychelles");
		code2name.put("TN", "Tunisia");
		code2name.put("UA", "Ukraine");
		code2name.put("SN", "Senegal");
		code2name.put("KO", "Kosovo * UN resolution");
		code2name.put("UY", "Uruguay");
		code2name.put("UZ", "Uzbekistan");
		code2name.put("LK", "Sri Lanka");
		code2name.put("SG", "Singapore");
		code2name.put("SY", "Syrian Arab Republic");
		code2name.put("ZM", "Zambia");
		code2name.put("SO", "Somalia");
		code2name.put("VE", "Venezuela");
		code2name.put("VG", "Virgin Islands (British)");
		code2name.put("AE", "United Arab Emirates");
		code2name.put("SM", "San Marino");
		code2name.put("TG", "Togo");
		code2name.put("AN", "Netherlands Antilles");
		code2name.put("TJ", "Tajikistan");
		code2name.put("TM", "Turkmenistan");
		code2name.put("SA", "Saudi Arabia");
		code2name.put("YE", "Yemen");
		code2name.put("SZ", "Swaziland");
		code2name.put("SR", "Suriname");
		code2name.put("iku", "Inuktitut");
		code2name.put("ipk", "Inupiaq");
		code2name.put("gle", "Irish");
		code2name.put("ita", "Italian");
		code2name.put("jpn", "Japanese");
		code2name.put("jav", "Javanese");
		code2name.put("kan", "Kannada");
		code2name.put("kau", "Kanuri");
		code2name.put("kas", "Kashmiri");
		code2name.put("kaz", "Kazakh");
		code2name.put("khm", "Khmer");
		code2name.put("kin", "Kinyarwanda");
		code2name.put("kir", "Kirghiz");
		code2name.put("kom", "Komi");
		code2name.put("kon", "Kongo");
		code2name.put("kor", "Korean");
		code2name.put("kua", "Kuanyama; Kwanyama");
		code2name.put("kur", "Kurdish");
		code2name.put("lao", "Lao");
		code2name.put("lat", "Latin");
		code2name.put("lav", "Latvian");
		code2name.put("ltz", "Letzeburgesch; Luxembourgish");
		code2name.put("lim", "Limburgan; Limburger; Limburgish");
		code2name.put("lin", "Lingala");
		code2name.put("lit", "Lithuanian");
		code2name.put("lub", "Luba-Katanga");
		code2name.put("mac/mkd", "Macedonian");
		code2name.put("mlg", "Malagasy");
		code2name.put("may/msa", "Malay");
		code2name.put("mal", "Malayalam");
		code2name.put("mlt", "Maltese");
		code2name.put("glv", "Manx");
		code2name.put("mao/mri", "Maori");
		code2name.put("mar", "Marathi");
		code2name.put("mah", "Marshallese");
		code2name.put("mol", "Moldavian");
		code2name.put("mon", "Mongolian");
		code2name.put("nau", "Nauru");
		code2name.put("nav", "Navajo; Navaho");
		code2name.put("nde", "Ndebele, North");
		code2name.put("nbl", "Ndebele, South");
		code2name.put("ndo", "Ndonga");
		code2name.put("nep", "Nepali");
		code2name.put("sme", "Northern Sami");
		code2name.put("nor", "Norwegian");
		code2name.put("nno", "Norwegian Nynorsk; Nynorsk, Norwegian");
		code2name.put("oci", "Occitan (post 1500); Provençal");
		code2name.put("oji", "Ojibwa");
		code2name.put("ori", "Oriya");
		code2name.put("orm", "Oromo");
		code2name.put("oss", "Ossetian; Ossetic");
		code2name.put("pli", "Pali");
		code2name.put("pan", "Panjabi; Punjabi");
		code2name.put("per/fas", "Persian");
		code2name.put("pol", "Polish");
		code2name.put("por", "Portuguese");
		code2name.put("pus", "Pushto");
		code2name.put("que", "Quechua");
		code2name.put("roh", "Raeto-Romance");
		code2name.put("rum/ron", "Romanian");
		code2name.put("run", "Rundi");
		code2name.put("rus", "Russian");
		code2name.put("smo", "Samoan");
		code2name.put("sag", "Sango");
		code2name.put("san", "Sanskrit");
		code2name.put("srd", "Sardinian");
		code2name.put("scc/srp", "Serbian");
		code2name.put("sna", "Shona");
		code2name.put("iii", "Sichuan Yi");
		code2name.put("snd", "Sindhi");
		code2name.put("sin", "Sinhala; Sinhalese");
		code2name.put("slo/slk", "Slovak");
		code2name.put("slv", "Slovenian");
		code2name.put("som", "Somali");
		code2name.put("sot", "Sotho, Southern");
		code2name.put("spa", "Spanish; Castilian");
		code2name.put("sun", "Sundanese");
		code2name.put("swa", "Swahili");
		code2name.put("ssw", "Swati");
		code2name.put("swe", "Swedish");
		code2name.put("tgl", "Tagalog");
		code2name.put("tah", "Tahitian");
		code2name.put("tgk", "Tajik");
		code2name.put("tam", "Tamil");
		code2name.put("tat", "Tatar");
		code2name.put("tel", "Telugu");
		code2name.put("tha", "Thai");
		code2name.put("tib/bod", "Tibetan");
		code2name.put("tir", "Tigrinya");
		code2name.put("ton", "Tonga (Tonga Islands)");
		code2name.put("tso", "Tsonga");
		code2name.put("tsn", "Tswana");
		code2name.put("tur", "Turkish");
		code2name.put("tuk", "Turkmen");
		code2name.put("twi", "Twi");
		code2name.put("uig", "Uighur; Uyghur");
		code2name.put("ukr", "Ukrainian");
		code2name.put("urd", "Urdu");
		code2name.put("uzb", "Uzbek");
		code2name.put("ven", "Venda");
		code2name.put("vie", "Vietnamese");
		code2name.put("vol", "Volapük");
		code2name.put("wln", "Walloon");
		code2name.put("wel/cym", "Welsh");
		code2name.put("wol", "Wolof");
		code2name.put("xho", "Xhosa");
		code2name.put("yid", "Yiddish");
		code2name.put("yor", "Yoruba");
		code2name.put("zha", "Zhuang; Chuang");
		code2name.put("zul", "Zulu");
		code2name.put("deu/ger", "German");
		code2name.put("fra/fre", "French");
		code2name.put("srr", "Serbian");
		code2name.put("esl/spa", "Spanish");
		code2name.put("und", "Undetermined");
		code2name.put("UNKNOWN", "UNKNOWN");
		code2name.put("entityregistry", "entityregistry");
		code2name.put("aggregator", "aggregator");
		code2name.put("dataarchive", "dataarchive");
		code2name.put("cris", "cris");
		code2name.put("repository", "repository");
		code2name.put("CIP-EIP-TN", "CIP-Eco-Innovation - CIP-Thematic Network");
		code2name.put("ec:specificprogram", "specificprogram");
		code2name.put("ec:program", "program");
		code2name.put("ec:hasframeworkprogram", "hasframeworkprogram");
		code2name.put("ec:hasprogram", "hasprogram");
		code2name.put("171", "Article 171 of the Treaty");
		code2name.put("BSG", "Research for the benefit of specific groups");
		code2name.put("CP", "Collaborative project");
		code2name.put("providedBy", "provided by");
		code2name.put("dataset", "dataset");
		code2name.put("publication", "publication");
		code2name.put("dataset_dataset", "dataset_dataset");
		code2name.put("publication_dataset", "publication_dataset");
		code2name.put("publication_publication", "publication_publication");
		code2name.put("coordinator", "coordinator");
		code2name.put("participant", "participant");
		code2name.put("subcontractor", "subcontractor");
		code2name.put("principal investigating", "principal investigating");
		code2name.put("exploitation", "exploitation");
		code2name.put("collection", "collection");
		code2name.put("event", "event");
		code2name.put("film", "film");
		code2name.put("image", "image");
		code2name.put("interactiveResource", "interactiveResource");
		code2name.put("model", "model");
		code2name.put("physicalObject", "physicalObject");
		code2name.put("service", "service");
		code2name.put("software", "software");
		code2name.put("sound", "sound");
		code2name.put("text", "text");
		code2name.put("0000", "Unknown");
		code2name.put("0001", "Article");
		code2name.put("0002", "Book");
		code2name.put("0004", "Conference object");
		code2name.put("0005", "Contribution for newspaper or weekly magazine");
		code2name.put("0006", "Doctoral thesis");
		code2name.put("0007", "Master thesis");
		code2name.put("0008", "Bachelor thesis");
		code2name.put("0009", "External research report");
		code2name.put("0010", "Lecture");
		code2name.put("0011", "Internal report");
		code2name.put("0012", "Newsletter");
		code2name.put("0013", "Part of book or chapter of book");
		code2name.put("0014", "Research");
		code2name.put("0015", "Review");
		code2name.put("0016", "Preprint");
		code2name.put("0017", "Report");
		code2name.put("0018", "Annotation");
		code2name.put("0019", "Patent");
		code2name.put("0020", "Other");
		code2name.put("0021", "Dataset");
		code2name.put("main title", "main title");
		code2name.put("subtitle", "subtitle");
		code2name.put("alternative title", "alternative title");
		code2name.put("translated title", "translated title");
		code2name.put("OPEN", "Open Access");
		code2name.put("12MONTHS", "12 Months Embargo");
		code2name.put("OTHER", "Other");
		code2name.put("6MONTHS", "6 Months Embargo");
		code2name.put("RESTRICTED", "Restricted");
		code2name.put("EMBARGO", "Embargo");
		code2name.put("CLOSED", "Closed Access");
		code2name.put("wt:fundingStream", "Wellcome Trust: Funding Stream");
		code2name.put("wt:hasParentFunding", "wt:hasParentFunding");
		code2name.put("author", "author");
		code2name.put("isResultOf", "isResultOf");
		code2name.put("driver", "driver");
		code2name.put("openaire", "openaire");
		code2name.put("notCompatible", "notCompatible");
		code2name.put("available", "available");
		code2name.put("copyrighted", "copyrighted");
		code2name.put("created", "created");
		code2name.put("endDate", "endDate");
		code2name.put("issued", "issued");
		code2name.put("startDate", "startDate");
		code2name.put("submitted", "submitted");
		code2name.put("updated", "updated");
		code2name.put("valid", "valid");
		code2name.put("sysimport:crosswalk:repository", "sysimport:crosswalk:repository");
		code2name.put("sysimport:crosswalk:aggregator", "sysimport:crosswalk:aggregator");
		code2name.put("sysimport:crosswalk:entityregistry", "sysimport:crosswalk:entityregistry");
		code2name.put("sysimport:crosswalk:datasetarchive", "sysimport:crosswalk:datasetarchive");
		code2name.put("sysimport:crosswalk:cris", "sysimport:crosswalk:cris");
		code2name.put("sysimport:mining:repository", "sysimport:mining:repository");
		code2name.put("sysimport:mining:aggregator", "sysimport:mining:aggregator");
		code2name.put("sysimport:mining:entityregistry", "sysimport:mining:entityregistry");
		code2name.put("sysimport:mining:datasetarchive", "sysimport:mining:datasetarchive");
		code2name.put("sysimport:mining:cris", "sysimport:mining:cris");
		code2name.put("userclaim:doi", "userclaim:doi");
		code2name.put("userclaim:driver", "userclaim:driver");
		code2name.put("userclaim:orcid", "userclaim:orcid");
		code2name.put("ec:frameworkprogram", "frameworkprogram");
		code2name.put("ec:hasspecificprogram", "hasspecificprogram");
		code2name.put("CY", "Cyprus");
		code2name.put("CH", "Switzerland");
		code2name.put("PL", "Poland");
		code2name.put("DE", "Germany");
		code2name.put("RU", "Russian Federation");
		code2name.put("TZ", "Tanzania (United Republic of)");
		code2name.put("MD", "Moldova (Republic of)");
		code2name.put("LA", "Lao (People's Democratic Republic)");
		code2name.put("SD", "Sudan");
		code2name.put("ZW", "Zimbabwe");
		code2name.put("GM", "Gambia");
		code2name.put("SV", "El Salvador");
		code2name.put("MV", "Maldives");
		code2name.put("TT", "Trinidad and Tobago");
		code2name.put("MH", "Marshall Islands");
		code2name.put("MO", "Macao");
		code2name.put("XK", "Kosovo * UN resolution");
		code2name.put("LY", "Libyan Arab Jamahiriya");
		code2name.put("CP-CSA", "Combination of CP & CSA");
		code2name.put("CSA", "Coordination and support action");
		code2name.put("ERC", "Support for frontier research (ERC)");
		code2name.put("NoE", "Network of Excellence");
		code2name.put("MC", "Support for training and career development of researchers (Marie Curie)");
	}

	// Builder for Entities
	protected static Oaf getOaf(final OafEntity.Builder entity, final DataInfo.Builder info) {
		return _getOaf(Oaf.newBuilder(), info).setKind(Kind.entity).setEntity(entity).build();
	}

	// Builder for Rels
	protected static Oaf getOaf(final OafRel.Builder rel, final DataInfo.Builder info) {
		return _getOaf(Oaf.newBuilder(), info).setKind(Kind.relation).setRel(rel).build();
	}

	private static Oaf.Builder _getOaf(final Oaf.Builder oaf, final DataInfo.Builder info) {
		return oaf.setDataInfo(ensureDataInfo(info)).setTimestamp(System.currentTimeMillis());
	}

	protected static DataInfo.Builder ensureDataInfo(final DataInfo.Builder info) {
		if (info.isInitialized()) { return info; }
		return getDataInfo("UNKNOWN", "0.9", false, false);
	}

	protected static KeyValue getKV(final String id, final String name) {
		return KeyValue.newBuilder().setKey(id).setValue(name).build();
	}

	protected static OafRel.Builder getRel(final String sourceId, final String targetId, final RelType relType, final boolean isChild) {
		return OafRel.newBuilder().setSource(sourceId).setTarget(targetId).setRelType(relType).setChild(isChild);
	}

	protected static OafEntity.Builder getEntity(final Type type,
			final String id,
			final KeyValue collectedFrom,
			final String originalId,
			final String dateOfCollection,
			final List<StructuredProperty> pids) {
		return OafEntity.newBuilder().setType(type).setId(id).addCollectedfrom(collectedFrom).addOriginalId(originalId).setDateofcollection(dateOfCollection)
				.addAllPid(pids != null ? pids : new ArrayList<StructuredProperty>());
	}

	public static DataInfo.Builder getDataInfo(String provenanceaction, String trust, final boolean deletedbyinference, final boolean inferred) {
		if ((provenanceaction == null) || provenanceaction.isEmpty()) {
			provenanceaction = "UNKNOWN";
		}
		if ((trust == null) || trust.isEmpty()) {
			trust = "0.1";
		}
		return DataInfo.newBuilder().setDeletedbyinference(deletedbyinference).setInferred(inferred).setTrust(trust)
				.setProvenanceaction(getSimpleQualifier(provenanceaction, "dnet:provenanceActions"));
	}

	protected static Qualifier.Builder getSimpleQualifier(final String classname, final String schemename) {
		return getQualifier(classname, classname, schemename, schemename);
	}

	protected static Qualifier.Builder getQualifier(final String classid, final String classname, final String schemeid, final String schemename) {
		return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemeid).setSchemename(schemename);
	}

	protected static Qualifier.Builder setQualifier(final Qualifier.Builder qualifier,
			final List<String> fields,
			final String defaultClassId,
			final String defaultClassName) {
		if ((fields != null) && !fields.isEmpty() && (fields.get(0) != null)) {
			qualifier.setClassid(fields.get(0));
			String name = code2name.get(fields.get(0));
			qualifier.setClassname(name != null ? name : fields.get(0));
		} else {
			qualifier.setClassid(defaultClassId);
			qualifier.setClassname(defaultClassName);
		}
		return qualifier;
	}

	protected static void addStructuredProps(final Message.Builder builder,
			final FieldDescriptor fd,
			final List<String> values,
			final String classid,
			final String schemeid) {
		if (values != null) {
			for (String s : values) {
				addField(builder, fd, getStructuredProperty(s, classid, classid, schemeid, schemeid));
			}
		}
	}

	protected static List<StructuredProperty> parsePids(final NodeList nodelist) {

		final List<StructuredProperty> pids = Lists.newArrayList();

		for (int i = 0; i < nodelist.getLength(); i++) {
			final Node node = nodelist.item(i);
			if ((node.getNodeType() == Node.ELEMENT_NODE) && node.getLocalName().toLowerCase().equals("identifier")) {

				final Node pidType = node.getAttributes().getNamedItem("identifierType");

				for (int j = 0; j < node.getChildNodes().getLength(); j++) {
					Node child = node.getChildNodes().item(j);

					if ((child.getNodeType() == Node.TEXT_NODE) && (pidType != null) && (pidType.getNodeValue() != null) && !pidType.getNodeValue().isEmpty()
							&& !pidType.getNodeValue().equalsIgnoreCase("url")) {

						String type = pidType.getNodeValue().toLowerCase();
						String value = child.getTextContent();

						pids.add(getStructuredProperty(value, type, type, "dnet:pid_types", "dnet:pid_types"));
						break;
					}
				}
			}
		}
		return pids;
	}

	@SuppressWarnings("unchecked")
	protected static void addField(final Builder builder, final FieldDescriptor descriptor, final Object value) {

		if (value == null) { return; }

		if (value instanceof List<?>) {
			for (Object o : (List<Object>) value) {
				addField(builder, descriptor, o);
			}
		} else {
			Object fieldValue = value;
			switch (descriptor.getType()) {
			case BOOL:
				fieldValue = Boolean.valueOf(value.toString());
				break;
			case BYTES:
				fieldValue = value.toString().getBytes(Charset.forName("UTF-8"));
				break;
			case DOUBLE:
				fieldValue = Double.valueOf(value.toString());
				break;
			case FLOAT:
				fieldValue = Float.valueOf(value.toString());
				break;
			case INT32:
			case INT64:
			case SINT32:
			case SINT64:
				fieldValue = Integer.valueOf(value.toString());
				break;
			case MESSAGE:
				Builder q = builder.newBuilderForField(descriptor);
				if ("Qualifier".equals(q.getDescriptorForType().getName())) {
					if (value instanceof Qualifier) {
						q.mergeFrom((Qualifier) value);
					} else {
						parseMessage(q, Qualifier.getDescriptor(), value.toString(), "@@@");
					}
				}
				if ("StructuredProperty".equals(q.getDescriptorForType().getName())) {
					if (value instanceof StructuredProperty) {
						q.mergeFrom((StructuredProperty) value);
					} else {
						parseMessage(q, StructuredProperty.getDescriptor(), value.toString(), "###");
					}
				}
				if ("KeyValue".equals(q.getDescriptorForType().getName())) {
					if (value instanceof KeyValue) {
						q.mergeFrom((KeyValue) value);
					} else {
						parseMessage(q, KeyValue.getDescriptor(), value.toString(), "&&&");
					}
				}
				fieldValue = q.buildPartial();
				break;
			default:
				break;
			}

			doAddField(builder, descriptor, fieldValue);
		}

	}

	protected static void doAddField(final Builder builder, final FieldDescriptor fd, final Object value) {
		if (value != null) {
			if (fd.isRepeated()) {
				builder.addRepeatedField(fd, value);
			} else if (fd.isOptional() || fd.isRequired()) {
				builder.setField(fd, value);
			}
		}
	}

	protected static void parseMessage(final Builder builder, final Descriptor descriptor, final String value, final String split) {
		IterablePair<FieldDescriptor, String> iterablePair = new IterablePair<FieldDescriptor, String>(descriptor.getFields(), Lists.newArrayList(Splitter
				.on(split).trimResults().split(value)));

		for (Pair<FieldDescriptor, String> p : iterablePair) {
			addField(builder, p.getKey(), p.getValue());
		}
	}

	protected static ValueMap parseNodeList(final NodeList nodeList) {
		final ValueMap values = new ValueMap();

		for (int i = 0; i < nodeList.getLength(); i++) {
			getNodeValue(nodeList.item(i), values);
		}
		return values;
	}

	public static String testParse(final NodeList nodelist) {
		return "<![CDATA[\n" + parseNodeList(nodelist) + "\n]]>";
	}

	protected static void getNodeValue(final Node node, final Map<String, ElementList> values) {

		final String nodeName = node.getLocalName().toLowerCase();
		if (!values.containsKey(nodeName)) {
			values.put(nodeName, new ElementList());
		}

		final Node nodeText = node.getFirstChild();
		final Element element = nodeText != null ? new Element(nodeText.getNodeValue()) : new Element();
		final Map<String, String> attrs = Maps.newHashMap();

		final NamedNodeMap attributeList = node.getAttributes();
		for (int j = 0; j < attributeList.getLength(); j++) {
			Node attr = attributeList.item(j);
			attrs.put(attr.getLocalName(), attr.getNodeValue());
		}
		element.setAttributes(attrs);
		values.get(nodeName).add(element);
	}

	protected static String base64(final byte[] data) {
		return new String(Base64.encodeBase64(data));
	}

	public static String replace(final String s, final String regex, final String replacement) {
		return s.replaceAll(regex, replacement);
	}

	public static String trim(final String s) {
		return s.trim();
	}

	protected static String removePrefix(final Type type, final String s) {
		return removePrefix(type.toString(), s);
	}

	private static String removePrefix(final String prefix, final String s) {
		return StringUtils.removeStart("" + s, prefix + "|");
	}

	protected static Qualifier.Builder getDefaultQualifier(final String scheme) {
		Qualifier.Builder qualifier = Qualifier.newBuilder().setSchemeid(scheme).setSchemename(scheme);
		return qualifier;
	}

	protected static StructuredProperty getStructuredProperty(final String value,
			final String classid,
			final String classname,
			final String schemeid,
			final String schemename) {
		if ((value == null) || value.isEmpty()) { return null; }
		return StructuredProperty.newBuilder().setValue(value).setQualifier(getQualifier(classid, classname, schemeid, schemename)).build();
	}

	public static String generateNsPrefix(final String prefix, final String externalId) {
		return prefix + StringUtils.leftPad(externalId, MAX_NSPREFIX_LEN - prefix.length(), "_");
	}

	public static String md5(final String s) {
		try {
			MessageDigest md = MessageDigest.getInstance("MD5");
			md.update(s.getBytes("UTF-8"));
			return new String(Hex.encodeHex(md.digest()));
		} catch (Exception e) {
			System.err.println("Error creating id");
			return null;
		}
	}

	public static String oafId(final String entityType, final String prefix, final String id) {
		if (id.isEmpty() || prefix.isEmpty()) { return ""; }
		return oafSimpleId(entityType, prefix + "::" + md5(id));
	}

	public static String oafSimpleId(final String entityType, final String id) {
		return (Type.valueOf(entityType).getNumber() + "|" + id).replaceAll("\\s|\\n", "");
	}

	public static String oafSplitId(final String entityType, final String fullId) {
		return oafId(entityType, StringUtils.substringBefore(fullId, "::"), StringUtils.substringAfter(fullId, "::"));
	}

}
