package eu.dnetlib.pace.common;

import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import com.ibm.icu.text.Transliterator;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;

/* loaded from: input_file:WEB-INF/lib/dhp-common-1.2.5-VALIDATION.jar:eu/dnetlib/pace/common/PaceCommonUtils.class */
public class PaceCommonUtils {
    protected static final String aliases_from = "⁰¹²³⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎àáâäæãåāèéêëēėęəîïíīįìôöòóœøōõûüùúūßśšłžźżçćčñń";
    protected static final String aliases_to = "0123456789+-=()n0123456789+-=()aaaaaaaaeeeeeeeeiiiiiioooooooouuuuussslzzzcccnn";
    protected static Transliterator transliterator = Transliterator.getInstance("Any-Eng");
    protected static Pattern hexUnicodePattern = Pattern.compile("\\\\u(\\p{XDigit}{4})");

    protected static String fixAliases(String str) {
        StringBuilder sb = new StringBuilder();
        str.chars().forEach(i -> {
            int indexOf = StringUtils.indexOf(aliases_from, i);
            sb.append(indexOf >= 0 ? aliases_to.charAt(indexOf) : (char) i);
        });
        return sb.toString();
    }

    protected static String transliterate(String str) {
        try {
            return transliterator.transliterate(str);
        } catch (Exception e) {
            return str;
        }
    }

    public static String normalize(String str) {
        return fixAliases(transliterate(nfd(unicodeNormalization(str)))).toLowerCase().replaceAll("[^ \\w]+", "").replaceAll("(\\p{InCombiningDiacriticalMarks})+", "").replaceAll("(\\p{Punct})+", " ").replaceAll("(\\d)+", " ").replaceAll("(\\n)+", " ").trim();
    }

    public static String nfd(String str) {
        return Normalizer.normalize(str, Normalizer.Form.NFD);
    }

    public static String unicodeNormalization(String str) {
        Matcher matcher = hexUnicodePattern.matcher(str);
        StringBuffer stringBuffer = new StringBuffer(str.length());
        while (matcher.find()) {
            matcher.appendReplacement(stringBuffer, Matcher.quoteReplacement(String.valueOf((char) Integer.parseInt(matcher.group(1), 16))));
        }
        matcher.appendTail(stringBuffer);
        return stringBuffer.toString();
    }

    public static Set<String> loadFromClasspath(String str) {
        Transliterator transliterator2 = Transliterator.getInstance("Any-Eng");
        HashSet newHashSet = Sets.newHashSet();
        try {
            Iterator<String> it = IOUtils.readLines(PaceCommonUtils.class.getResourceAsStream(str), StandardCharsets.UTF_8).iterator();
            while (it.hasNext()) {
                newHashSet.add(fixAliases(transliterator2.transliterate(it.next())));
            }
            return newHashSet;
        } catch (Throwable th) {
            return Sets.newHashSet();
        }
    }

    protected static Iterable<String> tokens(String str, int i) {
        return Iterables.limit(Splitter.on(" ").omitEmptyStrings().trimResults().split(str), i);
    }
}
