package org.carrot2.text.linguistic;

import com.carrotsearch.hppc.ObjectContainer;
import com.carrotsearch.hppc.ObjectOpenHashSet;
import com.google.common.base.Function;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.lucene.analysis.fa.PersianAnalyzer;
import org.carrot2.core.LanguageCode;
import org.carrot2.core.attribute.Init;
import org.carrot2.core.attribute.Internal;
import org.carrot2.core.attribute.Processing;
import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor;
import org.carrot2.text.util.MutableCharArray;
import org.carrot2.util.CharArrayUtils;
import org.carrot2.util.annotations.AspectModified;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.DefaultGroups;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.constraint.ImplementingClasses;
import org.carrot2.util.resource.IResource;
import org.carrot2.util.resource.ResourceCache;
import org.carrot2.util.resource.ResourceLookup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Bindable(inherit = {LexicalDataLoader.class})
/* loaded from: input_file:WEB-INF/lib/carrot2-mini-3.9.3.jar:org/carrot2/text/linguistic/DefaultLexicalDataFactory.class */
public class DefaultLexicalDataFactory implements ILexicalDataFactory {
    static final Logger logger = LoggerFactory.getLogger(DefaultLexicalDataFactory.class);
    private static final Function<ResourceLookup, HashMap<LanguageCode, ILexicalData>> resourceLoader = new Function<ResourceLookup, HashMap<LanguageCode, ILexicalData>>() { // from class: org.carrot2.text.linguistic.DefaultLexicalDataFactory.1
        @Override // com.google.common.base.Function
        public HashMap<LanguageCode, ILexicalData> apply(ResourceLookup resourceLookup) {
            return DefaultLexicalDataFactory.reloadResources(resourceLookup);
        }

        @Override // com.google.common.base.Function
        public boolean equals(Object obj) {
            throw new UnsupportedOperationException();
        }

        public int hashCode() {
            throw new UnsupportedOperationException();
        }
    };
    private static final ResourceCache<HashMap<LanguageCode, ILexicalData>> cache = new ResourceCache<>(resourceLoader);

    @Input
    @Attribute(key = "reload-resources", inherit = true)
    @Processing
    public boolean reloadResources = false;

    @Level(AttributeLevel.MEDIUM)
    @Input
    @Attribute(key = DefaultLexicalDataFactoryDescriptor.Keys.MERGE_RESOURCES)
    @Init
    @Group(DefaultGroups.PREPROCESSING)
    @Processing
    @Label("Merge lexical resources")
    public boolean mergeResources = true;

    @Input
    @Attribute(key = "resource-lookup", inherit = true)
    @ImplementingClasses(classes = {}, strict = false)
    @Init
    @Processing
    @AspectModified("Substituted with an assembly lookup in .NET release")
    @Internal
    public ResourceLookup resourceLookup = new ResourceLookup(ResourceLookup.Location.CONTEXT_CLASS_LOADER);

    @Override // org.carrot2.text.linguistic.ILexicalDataFactory
    public ILexicalData getLexicalData(LanguageCode languageCode) {
        if (this.mergeResources) {
            languageCode = null;
        }
        ILexicalData iLexicalData = cache.get(this.resourceLookup, this.reloadResources).get(languageCode);
        this.reloadResources = false;
        return iLexicalData;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static HashMap<LanguageCode, ILexicalData> reloadResources(ResourceLookup resourceLookup) {
        ObjectOpenHashSet newInstance = ObjectOpenHashSet.newInstance();
        ArrayList newArrayList = Lists.newArrayList();
        HashMap<LanguageCode, ILexicalData> newHashMap = Maps.newHashMap();
        for (LanguageCode languageCode : LanguageCode.values()) {
            String isoCode = languageCode.getIsoCode();
            ObjectOpenHashSet<MutableCharArray> lower = toLower(load(resourceLookup, "stopwords." + isoCode));
            ArrayList<Pattern> compile = compile(load(resourceLookup, "stoplabels." + isoCode));
            newInstance.addAll((ObjectContainer) lower);
            newArrayList.addAll(compile);
            newHashMap.put(languageCode, new DefaultLexicalData(lower, compile));
        }
        newHashMap.put(null, new DefaultLexicalData(newInstance, newArrayList));
        return newHashMap;
    }

    private static ObjectOpenHashSet<MutableCharArray> toLower(Set<String> set) {
        ObjectOpenHashSet<MutableCharArray> objectOpenHashSet = new ObjectOpenHashSet<>(set.size());
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            char[] charArray = it.next().toCharArray();
            CharArrayUtils.toLowerCaseInPlace(charArray);
            objectOpenHashSet.add((ObjectOpenHashSet<MutableCharArray>) new MutableCharArray(charArray));
        }
        return objectOpenHashSet;
    }

    private static ArrayList<Pattern> compile(HashSet<String> hashSet) {
        ArrayList<Pattern> arrayList = new ArrayList<>(hashSet.size());
        Iterator<String> it = hashSet.iterator();
        while (it.hasNext()) {
            String next = it.next();
            try {
                arrayList.add(Pattern.compile(next));
            } catch (PatternSyntaxException e) {
                logger.warn("Ignoring invalid regular expression: " + next);
            }
        }
        return arrayList;
    }

    private static HashSet<String> load(ResourceLookup resourceLookup, String str) {
        IResource first = resourceLookup.getFirst(str);
        if (first == null) {
            throw new RuntimeException("No resource named " + str + " in resource lookup locations: " + Arrays.toString(resourceLookup.getLocators()));
        }
        try {
            return load(first);
        } catch (IOException e) {
            throw new RuntimeException("Resource named " + str + " failed to load from: " + first.toString());
        }
    }

    public static HashSet<String> load(IResource iResource) throws IOException {
        HashSet<String> newHashSet = Sets.newHashSet();
        InputStream open = iResource.open();
        if (open == null) {
            throw new IOException("Resource returned null stream: " + iResource);
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(open, "UTF-8"));
        while (true) {
            try {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    return newHashSet;
                }
                String trim = readLine.trim();
                if (!trim.startsWith(PersianAnalyzer.STOPWORDS_COMMENT) && trim.length() != 0) {
                    newHashSet.add(trim);
                }
            } finally {
                bufferedReader.close();
            }
        }
    }
}
