/*
 * Decompiled with CFR 0.152.
 */
package eu.dnetlib.pace.common;

import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import com.ibm.icu.text.Transliterator;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;

public class PaceCommonUtils {
    protected static Transliterator transliterator = Transliterator.getInstance((String)"Any-Eng");
    protected static final String aliases_from = "\u2070\u00b9\u00b2\u00b3\u2074\u2075\u2076\u2077\u2078\u2079\u207a\u207b\u207c\u207d\u207e\u207f\u2080\u2081\u2082\u2083\u2084\u2085\u2086\u2087\u2088\u2089\u208a\u208b\u208c\u208d\u208e\u00e0\u00e1\u00e2\u00e4\u00e6\u00e3\u00e5\u0101\u00e8\u00e9\u00ea\u00eb\u0113\u0117\u0119\u0259\u00ee\u00ef\u00ed\u012b\u012f\u00ec\u00f4\u00f6\u00f2\u00f3\u0153\u00f8\u014d\u00f5\u00fb\u00fc\u00f9\u00fa\u016b\u00df\u015b\u0161\u0142\u017e\u017a\u017c\u00e7\u0107\u010d\u00f1\u0144";
    protected static final String aliases_to = "0123456789+-=()n0123456789+-=()aaaaaaaaeeeeeeeeiiiiiioooooooouuuuussslzzzcccnn";
    protected static Pattern hexUnicodePattern = Pattern.compile("\\\\u(\\p{XDigit}{4})");

    protected static String fixAliases(String s) {
        StringBuilder sb = new StringBuilder();
        s.chars().forEach(ch -> {
            int i = StringUtils.indexOf((CharSequence)aliases_from, (int)ch);
            sb.append(i >= 0 ? aliases_to.charAt(i) : (char)ch);
        });
        return sb.toString();
    }

    protected static String transliterate(String s) {
        try {
            return transliterator.transliterate(s);
        }
        catch (Exception e) {
            return s;
        }
    }

    public static String normalize(String s) {
        return PaceCommonUtils.fixAliases(PaceCommonUtils.transliterate(PaceCommonUtils.nfd(PaceCommonUtils.unicodeNormalization(s)))).toLowerCase().replaceAll("[^ \\w]+", "").replaceAll("(\\p{InCombiningDiacriticalMarks})+", "").replaceAll("(\\p{Punct})+", " ").replaceAll("(\\d)+", " ").replaceAll("(\\n)+", " ").trim();
    }

    public static String nfd(String s) {
        return Normalizer.normalize(s, Normalizer.Form.NFD);
    }

    public static String unicodeNormalization(String s) {
        Matcher m = hexUnicodePattern.matcher(s);
        StringBuffer buf = new StringBuffer(s.length());
        while (m.find()) {
            String ch = String.valueOf((char)Integer.parseInt(m.group(1), 16));
            m.appendReplacement(buf, Matcher.quoteReplacement(ch));
        }
        m.appendTail(buf);
        return buf.toString();
    }

    public static Set<String> loadFromClasspath(String classpath) {
        Transliterator transliterator = Transliterator.getInstance((String)"Any-Eng");
        HashSet h = Sets.newHashSet();
        try {
            for (String s : IOUtils.readLines((InputStream)PaceCommonUtils.class.getResourceAsStream(classpath), (Charset)StandardCharsets.UTF_8)) {
                h.add(PaceCommonUtils.fixAliases(transliterator.transliterate(s)));
            }
        }
        catch (Throwable e) {
            return Sets.newHashSet();
        }
        return h;
    }

    protected static Iterable<String> tokens(String s, int maxTokens) {
        return Iterables.limit((Iterable)Splitter.on((String)" ").omitEmptyStrings().trimResults().split((CharSequence)s), (int)maxTokens);
    }
}

