package org.carrot2.text.linguistic;

import com.google.common.base.Predicate;
import com.google.common.collect.Maps;
import java.io.IOException;
import java.io.StringReader;
import java.util.EnumMap;
import org.carrot2.core.LanguageCode;
import org.carrot2.text.analysis.ExtendedWhitespaceTokenizer;
import org.carrot2.text.analysis.ITokenizer;
import org.carrot2.text.linguistic.lucene.ChineseTokenizerAdapter;
import org.carrot2.text.linguistic.lucene.ThaiTokenizerAdapter;
import org.carrot2.util.annotations.ThreadSafe;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.factory.FallbackFactory;
import org.carrot2.util.factory.IFactory;
import org.carrot2.util.factory.NewClassInstanceFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Bindable
@ThreadSafe
/* loaded from: input_file:WEB-INF/lib/carrot2-mini-3.9.2.jar:org/carrot2/text/linguistic/DefaultTokenizerFactory.class */
public class DefaultTokenizerFactory implements ITokenizerFactory {
    private static final Logger logger = LoggerFactory.getLogger(DefaultTokenizerFactory.class);
    private static final Predicate<ITokenizer> tokenizerVerifier = new Predicate<ITokenizer>() { // from class: org.carrot2.text.linguistic.DefaultTokenizerFactory.1
        @Override // com.google.common.base.Predicate
        public boolean apply(ITokenizer iTokenizer) {
            try {
                iTokenizer.reset(new StringReader("verify"));
                iTokenizer.nextToken();
                return true;
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    };
    private static final EnumMap<LanguageCode, IFactory<ITokenizer>> tokenizerFactories = createDefaultTokenizers();

    @Override // org.carrot2.text.linguistic.ITokenizerFactory
    public ITokenizer getTokenizer(LanguageCode languageCode) {
        return tokenizerFactories.get(languageCode).createInstance();
    }

    private static EnumMap<LanguageCode, IFactory<ITokenizer>> createDefaultTokenizers() {
        EnumMap<LanguageCode, IFactory<ITokenizer>> newEnumMap = Maps.newEnumMap(LanguageCode.class);
        NewClassInstanceFactory newClassInstanceFactory = new NewClassInstanceFactory(ExtendedWhitespaceTokenizer.class);
        for (LanguageCode languageCode : LanguageCode.values()) {
            newEnumMap.put((EnumMap<LanguageCode, IFactory<ITokenizer>>) languageCode, (LanguageCode) newClassInstanceFactory);
        }
        newEnumMap.put((EnumMap<LanguageCode, IFactory<ITokenizer>>) LanguageCode.CHINESE_SIMPLIFIED, (LanguageCode) new NewClassInstanceFactory(ChineseTokenizerAdapter.class));
        newEnumMap.put((EnumMap<LanguageCode, IFactory<ITokenizer>>) LanguageCode.THAI, (LanguageCode) new NewClassInstanceFactory(ThaiTokenizerAdapter.class));
        newEnumMap.put((EnumMap<LanguageCode, IFactory<ITokenizer>>) LanguageCode.JAPANESE, (LanguageCode) new JapaneseUnsupportedStub());
        for (LanguageCode languageCode2 : LanguageCode.values()) {
            if (newEnumMap.containsKey(languageCode2)) {
                IFactory<ITokenizer> iFactory = newEnumMap.get(languageCode2);
                if (iFactory != newClassInstanceFactory) {
                    newEnumMap.put((EnumMap<LanguageCode, IFactory<ITokenizer>>) languageCode2, (LanguageCode) new FallbackFactory(iFactory, newClassInstanceFactory, tokenizerVerifier, logger, "Tokenizer for " + languageCode2.toString() + " (" + languageCode2.getIsoCode() + ") is not available. This may degrade clustering quality of " + languageCode2.toString() + " content. Cause: {}"));
                }
            } else {
                newEnumMap.put((EnumMap<LanguageCode, IFactory<ITokenizer>>) languageCode2, (LanguageCode) newClassInstanceFactory);
            }
        }
        return newEnumMap;
    }
}
