package org.carrot2.clustering.kmeans;

import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntOpenHashMap;
import com.carrotsearch.hppc.cursors.IntCursor;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import com.carrotsearch.hppc.sorting.IndirectComparator;
import com.carrotsearch.hppc.sorting.IndirectSort;
import com.google.common.collect.Lists;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.ObjectUtils;
import org.apache.mahout.math.function.Functions;
import org.apache.mahout.math.matrix.DoubleMatrix2D;
import org.apache.mahout.math.matrix.impl.DenseDoubleMatrix1D;
import org.apache.mahout.math.matrix.impl.DenseDoubleMatrix2D;
import org.carrot2.core.Cluster;
import org.carrot2.core.Document;
import org.carrot2.core.IClusteringAlgorithm;
import org.carrot2.core.LanguageCode;
import org.carrot2.core.ProcessingComponentBase;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.CommonAttributes;
import org.carrot2.core.attribute.Init;
import org.carrot2.core.attribute.Internal;
import org.carrot2.core.attribute.Processing;
import org.carrot2.text.clustering.IMonolingualClusteringAlgorithm;
import org.carrot2.text.clustering.MultilingualClustering;
import org.carrot2.text.preprocessing.LabelFormatter;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipeline;
import org.carrot2.text.preprocessing.pipeline.IPreprocessingPipeline;
import org.carrot2.text.vsm.ReducedVectorSpaceModelContext;
import org.carrot2.text.vsm.TermDocumentMatrixBuilder;
import org.carrot2.text.vsm.TermDocumentMatrixReducer;
import org.carrot2.text.vsm.VectorSpaceModelContext;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.DefaultGroups;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.Output;
import org.carrot2.util.attribute.Required;
import org.carrot2.util.attribute.constraint.ImplementingClasses;
import org.carrot2.util.attribute.constraint.IntRange;

@Bindable(prefix = "BisectingKMeansClusteringAlgorithm", inherit = {CommonAttributes.class})
/* loaded from: input_file:WEB-INF/lib/carrot2-mini-3.9.2.jar:org/carrot2/clustering/kmeans/BisectingKMeansClusteringAlgorithm.class */
public class BisectingKMeansClusteringAlgorithm extends ProcessingComponentBase implements IClusteringAlgorithm {
    private static final String GROUP_KMEANS = "K-means";

    @Processing
    @Required
    @Input
    @Internal
    @Attribute(key = "documents", inherit = true)
    public List<Document> documents;

    @Processing
    @Output
    @Internal
    @Attribute(key = "clusters", inherit = true)
    public List<Cluster> clusters = null;

    @Level(AttributeLevel.BASIC)
    @Input
    @Attribute
    @Group(DefaultGroups.CLUSTERS)
    @Processing
    @IntRange(min = 2)
    @Label("Cluster count")
    public int clusterCount = 25;

    @Level(AttributeLevel.BASIC)
    @Input
    @Attribute
    @Group(GROUP_KMEANS)
    @Processing
    @IntRange(min = 1)
    @Label("Maximum iterations")
    public int maxIterations = 15;

    @Level(AttributeLevel.BASIC)
    @Group(GROUP_KMEANS)
    @Processing
    @Input
    @Attribute
    @Label("Use dimensionality reduction")
    public boolean useDimensionalityReduction = true;

    @Level(AttributeLevel.BASIC)
    @Input
    @Attribute
    @Group(GROUP_KMEANS)
    @Processing
    @IntRange(min = 2, max = 10)
    @Label("Partition count")
    public int partitionCount = 2;

    @Level(AttributeLevel.BASIC)
    @Input
    @Attribute
    @Group(DefaultGroups.CLUSTERS)
    @Processing
    @IntRange(min = 1, max = 10)
    @Label("Label count")
    public int labelCount = 3;

    @ImplementingClasses(classes = {}, strict = false)
    @Level(AttributeLevel.ADVANCED)
    @Init
    @Input
    @Attribute
    @Internal
    public IPreprocessingPipeline preprocessingPipeline = new BasicPreprocessingPipeline();
    public final TermDocumentMatrixBuilder matrixBuilder = new TermDocumentMatrixBuilder();
    public final TermDocumentMatrixReducer matrixReducer = new TermDocumentMatrixReducer();
    public final LabelFormatter labelFormatter = new LabelFormatter();
    public final MultilingualClustering multilingualClustering = new MultilingualClustering();
    private static final Comparator<IntArrayList> BY_SIZE_DESCENDING = new Comparator<IntArrayList>() { // from class: org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm.2
        @Override // java.util.Comparator
        public int compare(IntArrayList intArrayList, IntArrayList intArrayList2) {
            return intArrayList2.size() - intArrayList.size();
        }
    };

    @Override // org.carrot2.core.ProcessingComponentBase, org.carrot2.core.IProcessingComponent
    public void process() throws ProcessingException {
        List<Document> list = this.documents;
        this.clusters = this.multilingualClustering.process(this.documents, new IMonolingualClusteringAlgorithm() { // from class: org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm.1
            @Override // org.carrot2.text.clustering.IMonolingualClusteringAlgorithm
            public List<Cluster> process(List<Document> list2, LanguageCode languageCode) {
                BisectingKMeansClusteringAlgorithm.this.documents = list2;
                BisectingKMeansClusteringAlgorithm.this.cluster(languageCode);
                return BisectingKMeansClusteringAlgorithm.this.clusters;
            }
        });
        this.documents = list;
    }

    protected void cluster(LanguageCode languageCode) {
        DoubleMatrix2D doubleMatrix2D;
        PreprocessingContext preprocess = this.preprocessingPipeline.preprocess(this.documents, null, languageCode);
        int[] iArr = preprocess.allStems.mostFrequentOriginalWordIndex;
        short[] sArr = preprocess.allWords.type;
        IntArrayList intArrayList = new IntArrayList(iArr.length);
        for (int i = 0; i < iArr.length; i++) {
            if ((sArr[iArr[i]] & 12290) == 0) {
                intArrayList.add(iArr[i]);
            }
        }
        preprocess.allLabels.featureIndex = intArrayList.toArray();
        preprocess.allLabels.firstPhraseIndex = -1;
        this.clusters = Lists.newArrayList();
        if (preprocess.hasLabels()) {
            VectorSpaceModelContext vectorSpaceModelContext = new VectorSpaceModelContext(preprocess);
            ReducedVectorSpaceModelContext reducedVectorSpaceModelContext = new ReducedVectorSpaceModelContext(vectorSpaceModelContext);
            this.matrixBuilder.buildTermDocumentMatrix(vectorSpaceModelContext);
            this.matrixBuilder.buildTermPhraseMatrix(vectorSpaceModelContext);
            IntIntOpenHashMap intIntOpenHashMap = new IntIntOpenHashMap();
            Iterator<IntIntCursor> it = vectorSpaceModelContext.stemToRowIndex.iterator();
            while (it.hasNext()) {
                IntIntCursor next = it.next();
                intIntOpenHashMap.put(next.value, next.key);
            }
            if (!this.useDimensionalityReduction || this.clusterCount * 2 >= preprocess.documents.size()) {
                doubleMatrix2D = vectorSpaceModelContext.termDocumentMatrix;
            } else {
                this.matrixReducer.reduce(reducedVectorSpaceModelContext, this.clusterCount * 2);
                doubleMatrix2D = reducedVectorSpaceModelContext.coefficientMatrix.viewDice();
            }
            IntArrayList intArrayList2 = new IntArrayList(doubleMatrix2D.columns());
            for (int i2 = 0; i2 < doubleMatrix2D.columns(); i2++) {
                intArrayList2.add(i2);
            }
            ArrayList newArrayList = Lists.newArrayList();
            newArrayList.addAll(split(this.partitionCount, doubleMatrix2D, intArrayList2, this.maxIterations));
            Collections.sort(newArrayList, BY_SIZE_DESCENDING);
            int i3 = 0;
            while (newArrayList.size() < this.clusterCount && i3 < newArrayList.size()) {
                IntArrayList intArrayList3 = (IntArrayList) newArrayList.get(i3);
                if (intArrayList3.size() <= this.partitionCount * 2) {
                    break;
                }
                List<IntArrayList> split = split(this.partitionCount, doubleMatrix2D, intArrayList3, this.maxIterations);
                if (split.size() > 1) {
                    newArrayList.remove(i3);
                    newArrayList.addAll(split);
                    Collections.sort(newArrayList, BY_SIZE_DESCENDING);
                    i3 = 0;
                } else {
                    i3++;
                }
            }
            for (int i4 = 0; i4 < newArrayList.size(); i4++) {
                Cluster cluster = new Cluster();
                IntArrayList intArrayList4 = (IntArrayList) newArrayList.get(i4);
                if (intArrayList4.size() > 1) {
                    cluster.addPhrases(getLabels(intArrayList4, vectorSpaceModelContext.termDocumentMatrix, intIntOpenHashMap, preprocess.allStems.mostFrequentOriginalWordIndex, preprocess.allWords.image));
                    for (int i5 = 0; i5 < intArrayList4.size(); i5++) {
                        cluster.addDocuments(this.documents.get(intArrayList4.get(i5)));
                    }
                    this.clusters.add(cluster);
                }
            }
        }
        Collections.sort(this.clusters, Cluster.BY_REVERSED_SIZE_AND_LABEL_COMPARATOR);
        Cluster.appendOtherTopics(this.documents, this.clusters);
    }

    /* JADX WARN: Type inference failed for: r1v10, types: [char[], char[][]] */
    private List<String> getLabels(IntArrayList intArrayList, DoubleMatrix2D doubleMatrix2D, IntIntOpenHashMap intIntOpenHashMap, int[] iArr, char[][] cArr) {
        final DenseDoubleMatrix1D denseDoubleMatrix1D = new DenseDoubleMatrix1D(doubleMatrix2D.rows());
        Iterator<IntCursor> it = intArrayList.iterator();
        while (it.hasNext()) {
            denseDoubleMatrix1D.assign(doubleMatrix2D.viewColumn(it.next().value), Functions.PLUS);
        }
        ArrayList newArrayListWithCapacity = Lists.newArrayListWithCapacity(this.labelCount);
        int[] mergesort = IndirectSort.mergesort(0, denseDoubleMatrix1D.size(), new IndirectComparator() { // from class: org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm.3
            @Override // com.carrotsearch.hppc.sorting.IndirectComparator
            public int compare(int i, int i2) {
                double d = denseDoubleMatrix1D.get(i);
                double d2 = denseDoubleMatrix1D.get(i2);
                if (d < d2) {
                    return -1;
                }
                return d > d2 ? 1 : 0;
            }
        });
        double d = denseDoubleMatrix1D.get(mergesort[mergesort.length - Math.min(this.labelCount, mergesort.length)]);
        for (int i = 0; i < denseDoubleMatrix1D.size(); i++) {
            if (denseDoubleMatrix1D.getQuick(i) >= d) {
                newArrayListWithCapacity.add(LabelFormatter.format(new char[]{cArr[iArr[intIntOpenHashMap.get(i)]]}, new boolean[]{false}, false));
            }
        }
        return newArrayListWithCapacity;
    }

    private List<IntArrayList> split(int i, DoubleMatrix2D doubleMatrix2D, IntArrayList intArrayList, int i2) {
        DoubleMatrix2D copy = doubleMatrix2D.viewSelection(null, intArrayList.toArray()).copy();
        IntIntOpenHashMap intIntOpenHashMap = new IntIntOpenHashMap(copy.columns());
        for (int i3 = 0; i3 < intArrayList.size(); i3++) {
            intIntOpenHashMap.put(i3, intArrayList.get(i3));
        }
        ArrayList newArrayList = Lists.newArrayList();
        ArrayList arrayList = null;
        for (int i4 = 0; i4 < i; i4++) {
            newArrayList.add(new IntArrayList(copy.columns()));
        }
        for (int i5 = 0; i5 < copy.columns(); i5++) {
            ((IntArrayList) newArrayList.get(i5 % i)).add(i5);
        }
        DoubleMatrix2D assign = new DenseDoubleMatrix2D(copy.rows(), i).assign(copy.viewPart(0, 0, copy.rows(), i));
        DenseDoubleMatrix2D denseDoubleMatrix2D = new DenseDoubleMatrix2D(i, copy.columns());
        for (int i6 = 0; i6 < i2; i6++) {
            for (int i7 = 0; i7 < newArrayList.size(); i7++) {
                IntArrayList intArrayList2 = (IntArrayList) newArrayList.get(i7);
                for (int i8 = 0; i8 < copy.rows(); i8++) {
                    double d = 0.0d;
                    for (int i9 = 0; i9 < intArrayList2.size(); i9++) {
                        d += copy.get(i8, intArrayList2.get(i9));
                    }
                    assign.setQuick(i8, i7, d / intArrayList2.size());
                }
            }
            if (i6 < i2 - 1) {
                arrayList = newArrayList;
                newArrayList = Lists.newArrayList();
                for (int i10 = 0; i10 < i; i10++) {
                    newArrayList.add(new IntArrayList(copy.columns()));
                }
            }
            assign.zMult(copy, denseDoubleMatrix2D, 1.0d, 0.0d, true, false);
            for (int i11 = 0; i11 < denseDoubleMatrix2D.columns(); i11++) {
                int i12 = 0;
                double d2 = denseDoubleMatrix2D.get(0, i11);
                for (int i13 = 1; i13 < denseDoubleMatrix2D.rows(); i13++) {
                    if (d2 < denseDoubleMatrix2D.get(i13, i11)) {
                        d2 = denseDoubleMatrix2D.get(i13, i11);
                        i12 = i13;
                    }
                }
                ((IntArrayList) newArrayList.get(i12)).add(i11);
            }
            if (ObjectUtils.equals(arrayList, newArrayList)) {
                break;
            }
        }
        Iterator it = newArrayList.iterator();
        while (it.hasNext()) {
            IntArrayList intArrayList3 = (IntArrayList) it.next();
            if (intArrayList3.isEmpty()) {
                it.remove();
            } else {
                for (int i14 = 0; i14 < intArrayList3.size(); i14++) {
                    intArrayList3.set(i14, intIntOpenHashMap.get(intArrayList3.get(i14)));
                }
            }
        }
        return newArrayList;
    }
}
