package org.gcube.dataanalysis.ecoengine.clustering;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.PrintWriter;
import java.io.Reader;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.clusterers.RandomizableClusterer;
import weka.core.AlgVector;
import weka.core.Capabilities;
import weka.core.DistanceFunction;
import weka.core.EuclideanDistance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.neighboursearch.KDTree;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;

/* loaded from: input_file:org/gcube/dataanalysis/ecoengine/clustering/XMeans.class */
public class XMeans extends RandomizableClusterer implements TechnicalInformationHandler {
    private static final long serialVersionUID = -7941793078404132616L;
    protected ReplaceMissingValues m_ReplaceMissingFilter;
    protected Instances m_ClusterCenters;
    protected int[] m_ClusterAssignments;
    public static int R_LOW = 0;
    public static int R_HIGH = 1;
    public static int R_WIDTH = 2;
    public static int D_PRINTCENTERS = 1;
    public static int D_FOLLOWSPLIT = 2;
    public static int D_CONVCHCLOSER = 3;
    public static int D_RANDOMVECTOR = 4;
    public static int D_KDTREE = 5;
    public static int D_ITERCOUNT = 6;
    public static int D_METH_MISUSE = 80;
    public static int D_CURR = 88;
    public static int D_GENERAL = 99;
    protected Instances m_Instances = null;
    protected Instances m_Model = null;
    protected double m_BinValue = 1.0d;
    protected double m_Bic = Double.MIN_VALUE;
    protected double[] m_Mle = null;
    protected int m_MaxIterations = 1;
    protected int m_MaxKMeans = 1000;
    protected int m_MaxKMeansForChildren = 1000;
    protected int m_NumClusters = 2;
    protected int m_MinNumClusters = 2;
    protected int m_MaxNumClusters = 4;
    protected DistanceFunction m_DistanceF = new EuclideanDistance();
    protected File m_InputCenterFile = new File(System.getProperty("user.dir"));
    protected Reader m_DebugVectorsInput = null;
    protected int m_DebugVectorsIndex = 0;
    protected Instances m_DebugVectors = null;
    protected File m_DebugVectorsFile = new File(System.getProperty("user.dir"));
    protected Reader m_CenterInput = null;
    protected File m_OutputCenterFile = new File(System.getProperty("user.dir"));
    protected PrintWriter m_CenterOutput = null;
    protected double m_CutOffFactor = 0.5d;
    protected KDTree m_KDTree = new KDTree();
    protected boolean m_UseKDTree = false;
    protected int m_IterationCount = 0;
    protected int m_KMeansStopped = 0;
    protected int m_NumSplits = 0;
    protected int m_NumSplitsDone = 0;
    protected int m_NumSplitsStillDone = 0;
    protected int m_DebugLevel = 0;
    public boolean m_CurrDebugFlag = true;

    public XMeans() {
        this.m_SeedDefault = 10;
        setSeed(this.m_SeedDefault);
    }

    public String globalInfo() {
        return "Cluster data using the X-means algorithm.\n\nX-Means is K-Means extended by an Improve-Structure part In this part of the algorithm the centers are attempted to be split in its region. The decision between the children of each center and itself is done comparing the BIC-values of the two structures.\n\nFor more information see:\n\n" + getTechnicalInformation().toString();
    }

    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation technicalInformation = new TechnicalInformation(TechnicalInformation.Type.INPROCEEDINGS);
        technicalInformation.setValue(TechnicalInformation.Field.AUTHOR, "Dan Pelleg and Andrew W. Moore");
        technicalInformation.setValue(TechnicalInformation.Field.TITLE, "X-means: Extending K-means with Efficient Estimation of the Number of Clusters");
        technicalInformation.setValue(TechnicalInformation.Field.BOOKTITLE, "Seventeenth International Conference on Machine Learning");
        technicalInformation.setValue(TechnicalInformation.Field.YEAR, "2000");
        technicalInformation.setValue(TechnicalInformation.Field.PAGES, "727-734");
        technicalInformation.setValue(TechnicalInformation.Field.PUBLISHER, "Morgan Kaufmann");
        return technicalInformation;
    }

    public Capabilities getCapabilities() {
        Capabilities capabilities = super.getCapabilities();
        capabilities.disableAll();
        capabilities.enable(Capabilities.Capability.NO_CLASS);
        capabilities.enable(Capabilities.Capability.NUMERIC_ATTRIBUTES);
        capabilities.enable(Capabilities.Capability.DATE_ATTRIBUTES);
        capabilities.enable(Capabilities.Capability.MISSING_VALUES);
        return capabilities;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v123, types: [int[], int[][]] */
    /* JADX WARN: Type inference failed for: r0v52, types: [int[], int[][]] */
    /* JADX WARN: Type inference failed for: r0v89, types: [int[], int[][]] */
    public void buildClusterer(Instances instances) throws Exception {
        getCapabilities().testWithFail(instances);
        if (this.m_MinNumClusters > this.m_MaxNumClusters) {
            throw new Exception("XMeans: min number of clusters can't be greater than max number of clusters!");
        }
        this.m_NumSplits = 0;
        this.m_NumSplitsDone = 0;
        this.m_NumSplitsStillDone = 0;
        this.m_ReplaceMissingFilter = new ReplaceMissingValues();
        this.m_ReplaceMissingFilter.setInputFormat(instances);
        this.m_Instances = Filter.useFilter(instances, this.m_ReplaceMissingFilter);
        Random random = new Random(this.m_Seed);
        this.m_NumClusters = this.m_MinNumClusters;
        if (this.m_DistanceF == null) {
            this.m_DistanceF = new EuclideanDistance();
        }
        this.m_DistanceF.setInstances(this.m_Instances);
        checkInstances();
        if (this.m_DebugVectorsFile.exists() && this.m_DebugVectorsFile.isFile()) {
            initDebugVectorsInput();
        }
        int[] iArr = new int[this.m_Instances.numInstances()];
        for (int i = 0; i < this.m_Instances.numInstances(); i++) {
            iArr[i] = i;
        }
        this.m_Model = new Instances(this.m_Instances, 0);
        if (this.m_CenterInput != null) {
            this.m_ClusterCenters = new Instances(this.m_CenterInput);
            this.m_NumClusters = this.m_ClusterCenters.numInstances();
        } else {
            this.m_ClusterCenters = makeCentersRandomly(random, this.m_Instances, this.m_NumClusters);
        }
        PFD(D_FOLLOWSPLIT, "\n*** Starting centers ");
        for (int i2 = 0; i2 < this.m_ClusterCenters.numInstances(); i2++) {
            PFD(D_FOLLOWSPLIT, "Center " + i2 + ": " + this.m_ClusterCenters.instance(i2));
        }
        PrCentersFD(D_PRINTCENTERS);
        boolean z = false;
        if (this.m_UseKDTree) {
            this.m_KDTree.setInstances(this.m_Instances);
        }
        this.m_IterationCount = 0;
        while (!z && !stopIteration(this.m_IterationCount, this.m_MaxIterations)) {
            PFD(D_FOLLOWSPLIT, "\nBeginning of main loop - centers:");
            PrCentersFD(D_FOLLOWSPLIT);
            PFD(D_ITERCOUNT, "\n*** 1. Improve-Params " + this.m_IterationCount + ". time");
            this.m_IterationCount++;
            boolean z2 = false;
            this.m_ClusterAssignments = initAssignments(this.m_Instances.numInstances());
            ?? r0 = new int[this.m_ClusterCenters.numInstances()];
            int i3 = 0;
            PFD(D_FOLLOWSPLIT, "\nConverge in K-Means:");
            while (!z2 && !stopKMeansIteration(i3, this.m_MaxKMeans)) {
                i3++;
                assignToCenters(this.m_UseKDTree ? this.m_KDTree : null, this.m_ClusterCenters, r0, iArr, this.m_ClusterAssignments, i3);
                PFD(D_FOLLOWSPLIT, "\nMain loop - Assign - centers:");
                PrCentersFD(D_FOLLOWSPLIT);
                z2 = recomputeCenters(this.m_ClusterCenters, r0, this.m_Model);
                PFD(D_FOLLOWSPLIT, "\nMain loop - Recompute - centers:");
                PrCentersFD(D_FOLLOWSPLIT);
            }
            PFD(D_FOLLOWSPLIT, "");
            PFD(D_FOLLOWSPLIT, "End of Part: 1. Improve-Params - conventional K-means");
            this.m_Mle = distortion(r0, this.m_ClusterCenters);
            this.m_Bic = calculateBIC(r0, this.m_ClusterCenters, this.m_Mle);
            PFD(D_FOLLOWSPLIT, "m_Bic " + this.m_Bic);
            int numInstances = this.m_ClusterCenters.numInstances();
            Instances instances2 = new Instances(this.m_ClusterCenters, numInstances * 2);
            double[] dArr = new double[numInstances];
            double[] dArr2 = new double[numInstances];
            for (int i4 = 0; i4 < numInstances; i4++) {
                PFD(D_FOLLOWSPLIT, "\nsplit center " + i4 + " " + this.m_ClusterCenters.instance(i4));
                Instance instance = this.m_ClusterCenters.instance(i4);
                int[] iArr2 = r0[i4];
                int length = r0[i4].length;
                if (length <= 2) {
                    dArr[i4] = Double.MAX_VALUE;
                    dArr2[i4] = 0.0d;
                    instances2.add(instance);
                    instances2.add(instance);
                } else {
                    Instances splitCenter = splitCenter(random, instance, this.m_Mle[i4] / length, this.m_Model);
                    int[] initAssignments = initAssignments(length);
                    ?? r02 = new int[2];
                    boolean z3 = false;
                    int i5 = 0;
                    PFD(D_FOLLOWSPLIT, "\nConverge, K-Means for children: " + i4);
                    while (!z3 && !stopKMeansIteration(i5, this.m_MaxKMeansForChildren)) {
                        i5++;
                        z3 = assignToCenters(splitCenter, r02, iArr2, initAssignments);
                        if (!z3) {
                            recomputeCentersFast(splitCenter, r02, this.m_Model);
                        }
                    }
                    instances2.add(splitCenter.instance(0));
                    instances2.add(splitCenter.instance(1));
                    PFD(D_FOLLOWSPLIT, "\nconverged cildren ");
                    PFD(D_FOLLOWSPLIT, " " + splitCenter.instance(0));
                    PFD(D_FOLLOWSPLIT, " " + splitCenter.instance(1));
                    dArr[i4] = calculateBIC(iArr2, instance, this.m_Mle[i4], this.m_Model);
                    dArr2[i4] = calculateBIC(r02, splitCenter, distortion(r02, splitCenter));
                }
            }
            Instances newCentersAfterSplit = newCentersAfterSplit(dArr, dArr2, this.m_CutOffFactor, instances2);
            if (newCentersAfterSplit.numInstances() != this.m_NumClusters) {
                PFD(D_FOLLOWSPLIT, "Compare with non-split");
                int[] initAssignments2 = initAssignments(this.m_Instances.numInstances());
                ?? r03 = new int[newCentersAfterSplit.numInstances()];
                assignToCenters(this.m_UseKDTree ? this.m_KDTree : null, newCentersAfterSplit, r03, iArr, initAssignments2, this.m_IterationCount);
                double calculateBIC = calculateBIC(r03, newCentersAfterSplit, distortion(r03, newCentersAfterSplit));
                PFD(D_FOLLOWSPLIT, "newBic " + calculateBIC);
                if (calculateBIC > this.m_Bic) {
                    PFD(D_FOLLOWSPLIT, "*** decide for new clusters");
                    this.m_Bic = calculateBIC;
                    this.m_ClusterCenters = newCentersAfterSplit;
                    this.m_ClusterAssignments = initAssignments2;
                } else {
                    PFD(D_FOLLOWSPLIT, "*** keep old clusters");
                }
            }
            int numInstances2 = this.m_ClusterCenters.numInstances();
            if (numInstances2 >= this.m_MaxNumClusters || numInstances2 == this.m_NumClusters) {
                z = true;
            }
            this.m_NumClusters = numInstances2;
        }
    }

    public boolean checkForNominalAttributes(Instances instances) {
        int i = 0;
        while (i < instances.numAttributes()) {
            if (i != instances.classIndex()) {
                int i2 = i;
                i++;
                if (instances.attribute(i2).isNominal()) {
                    return true;
                }
            }
        }
        return false;
    }

    protected int[] initAssignments(int[] iArr) {
        for (int i = 0; i < iArr.length; i++) {
            iArr[i] = -1;
        }
        return iArr;
    }

    protected int[] initAssignments(int i) {
        int[] iArr = new int[i];
        for (int i2 = 0; i2 < i; i2++) {
            iArr[i2] = -1;
        }
        return iArr;
    }

    boolean[] initBoolArray(int i) {
        boolean[] zArr = new boolean[i];
        for (int i2 = 0; i2 < i; i2++) {
            zArr[i2] = false;
        }
        return zArr;
    }

    protected Instances newCentersAfterSplit(double[] dArr, double[] dArr2, double d, Instances instances) {
        boolean z = false;
        boolean z2 = false;
        boolean[] initBoolArray = initBoolArray(this.m_ClusterCenters.numInstances());
        int i = 0;
        for (int i2 = 0; i2 < dArr2.length; i2++) {
            if (dArr2[i2] > dArr[i2]) {
                initBoolArray[i2] = true;
                i++;
                PFD(D_FOLLOWSPLIT, "Center " + i2 + " decide for children");
            } else {
                PFD(D_FOLLOWSPLIT, "Center " + i2 + " decide for parent");
            }
        }
        if (i == 0 && d > 0.0d) {
            z = true;
            i = (int) (this.m_ClusterCenters.numInstances() * this.m_CutOffFactor);
        }
        double[] dArr3 = new double[this.m_NumClusters];
        for (int i3 = 0; i3 < dArr3.length; i3++) {
            dArr3[i3] = dArr[i3] - dArr2[i3];
        }
        int[] sort = Utils.sort(dArr3);
        int i4 = this.m_MaxNumClusters - this.m_NumClusters;
        if (i4 > i) {
            i4 = i;
        } else {
            z2 = true;
        }
        if (z) {
            for (int i5 = 0; i5 < i4 && dArr2[sort[i5]] > 0.0d; i5++) {
                initBoolArray[sort[i5]] = true;
            }
            this.m_NumSplitsStillDone += i4;
        } else if (z2) {
            int i6 = 0;
            int i7 = 0;
            while (i7 < initBoolArray.length && i6 < i4) {
                if (initBoolArray[sort[i7]]) {
                    i6++;
                }
                i7++;
            }
            while (i7 < initBoolArray.length) {
                initBoolArray[sort[i7]] = false;
                i7++;
            }
        }
        return i4 > 0 ? newCentersAfterSplit(initBoolArray, instances) : this.m_ClusterCenters;
    }

    protected Instances newCentersAfterSplit(boolean[] zArr, Instances instances) {
        Instances instances2 = new Instances(instances, 0);
        int i = 0;
        for (int i2 = 0; i2 < zArr.length; i2++) {
            if (zArr[i2]) {
                this.m_NumSplitsDone++;
                int i3 = i;
                int i4 = i + 1;
                instances2.add(instances.instance(i3));
                i = i4 + 1;
                instances2.add(instances.instance(i4));
            } else {
                i = i + 1 + 1;
                instances2.add(this.m_ClusterCenters.instance(i2));
            }
        }
        return instances2;
    }

    protected boolean stopKMeansIteration(int i, int i2) {
        boolean z = false;
        if (i2 >= 0) {
            z = i >= i2;
        }
        if (z) {
            this.m_KMeansStopped++;
        }
        return z;
    }

    protected boolean stopIteration(int i, int i2) {
        boolean z = false;
        if (i2 >= 0) {
            z = i >= i2;
        }
        return z;
    }

    protected boolean recomputeCenters(Instances instances, int[][] iArr, Instances instances2) {
        boolean z = true;
        for (int i = 0; i < instances.numInstances(); i++) {
            for (int i2 = 0; i2 < instances2.numAttributes(); i2++) {
                double meanOrMode = meanOrMode(this.m_Instances, iArr[i], i2);
                for (int i3 = 0; i3 < iArr[i].length; i3++) {
                    if (z && this.m_ClusterCenters.instance(i).value(i2) != meanOrMode) {
                        z = false;
                    }
                }
                if (!z) {
                    this.m_ClusterCenters.instance(i).setValue(i2, meanOrMode);
                }
            }
        }
        return z;
    }

    protected void recomputeCentersFast(Instances instances, int[][] iArr, Instances instances2) {
        for (int i = 0; i < instances.numInstances(); i++) {
            for (int i2 = 0; i2 < instances2.numAttributes(); i2++) {
                instances.instance(i).setValue(i2, meanOrMode(this.m_Instances, iArr[i], i2));
            }
        }
    }

    protected double meanOrMode(Instances instances, int[] iArr, int i) {
        if (!instances.attribute(i).isNumeric()) {
            if (!instances.attribute(i).isNominal()) {
                return 0.0d;
            }
            int[] iArr2 = new int[instances.attribute(i).numValues()];
            for (int i2 : iArr) {
                Instance instance = instances.instance(i2);
                if (!instance.isMissing(i)) {
                    iArr2[(int) instance.value(i)] = (int) (iArr2[r1] + instance.weight());
                }
            }
            return Utils.maxIndex(iArr2);
        }
        double d = 0.0d;
        double d2 = 0.0d;
        for (int i3 : iArr) {
            Instance instance2 = instances.instance(i3);
            if (!instance2.isMissing(i)) {
                d += instance2.weight();
                d2 += instance2.weight() * instance2.value(i);
            }
        }
        if (Utils.eq(d, 0.0d)) {
            return 0.0d;
        }
        return d2 / d;
    }

    protected boolean assignToCenters(KDTree kDTree, Instances instances, int[][] iArr, int[] iArr2, int[] iArr3, int i) throws Exception {
        return kDTree != null ? assignToCenters(kDTree, instances, iArr, iArr3, i) : assignToCenters(instances, iArr, iArr2, iArr3);
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v48, types: [int[]] */
    protected boolean assignToCenters(KDTree kDTree, Instances instances, int[][] iArr, int[] iArr2, int i) throws Exception {
        int numInstances = instances.numInstances();
        int numInstances2 = this.m_Instances.numInstances();
        int[] iArr3 = new int[numInstances2];
        if (iArr2 == null) {
            iArr2 = new int[numInstances2];
            for (int i2 = 0; i2 < numInstances2; i2++) {
                iArr2[0] = -1;
            }
        }
        if (iArr == null) {
            iArr = new int[numInstances];
        }
        for (int i3 = 0; i3 < iArr2.length; i3++) {
            iArr3[i3] = iArr2[i3];
        }
        kDTree.centerInstances(instances, iArr2, Math.pow(0.8d, i));
        boolean z = true;
        for (int i4 = 0; z && i4 < iArr2.length; i4++) {
            z = iArr3[i4] == iArr2[i4];
            if (iArr2[i4] == -1) {
                throw new Exception("Instance " + i4 + " has not been assigned to cluster.");
            }
        }
        if (!z) {
            int[] iArr4 = new int[numInstances];
            for (int i5 = 0; i5 < numInstances; i5++) {
                iArr4[i5] = 0;
            }
            for (int i6 = 0; i6 < numInstances2; i6++) {
                int i7 = iArr2[i6];
                iArr4[i7] = iArr4[i7] + 1;
            }
            for (int i8 = 0; i8 < numInstances; i8++) {
                iArr[i8] = new int[iArr4[i8]];
            }
            for (int i9 = 0; i9 < numInstances; i9++) {
                int i10 = -1;
                for (int i11 = 0; i11 < iArr4[i9]; i11++) {
                    i10 = nextAssignedOne(i9, i10, iArr2);
                    iArr[i9][i11] = i10;
                }
            }
        }
        return z;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v41, types: [int[]] */
    protected boolean assignToCenters(Instances instances, int[][] iArr, int[] iArr2, int[] iArr3) throws Exception {
        boolean z = true;
        int length = iArr2.length;
        int numInstances = instances.numInstances();
        int[] iArr4 = new int[numInstances];
        for (int i = 0; i < numInstances; i++) {
            iArr4[i] = 0;
        }
        if (iArr3 == null) {
            iArr3 = new int[length];
            for (int i2 = 0; i2 < length; i2++) {
                iArr3[i2] = -1;
            }
        }
        if (iArr == null) {
            iArr = new int[numInstances];
        }
        for (int i3 = 0; i3 < length; i3++) {
            int clusterProcessedInstance = clusterProcessedInstance(this.m_Instances.instance(iArr2[i3]), instances);
            if (z && clusterProcessedInstance != iArr3[i3]) {
                z = false;
            }
            iArr4[clusterProcessedInstance] = iArr4[clusterProcessedInstance] + 1;
            if (!z) {
                iArr3[i3] = clusterProcessedInstance;
            }
        }
        if (z) {
            PFD(D_FOLLOWSPLIT, "assignToCenters -> it has converged");
        } else {
            PFD(D_FOLLOWSPLIT, "assignToCenters -> it has NOT converged");
            for (int i4 = 0; i4 < numInstances; i4++) {
                iArr[i4] = new int[iArr4[i4]];
            }
            for (int i5 = 0; i5 < numInstances; i5++) {
                int i6 = -1;
                for (int i7 = 0; i7 < iArr4[i5]; i7++) {
                    i6 = nextAssignedOne(i5, i6, iArr3);
                    iArr[i5][i7] = iArr2[i6];
                }
            }
        }
        return z;
    }

    protected int nextAssignedOne(int i, int i2, int[] iArr) {
        int length = iArr.length;
        for (int i3 = i2 + 1; i3 < length; i3++) {
            if (iArr[i3] == i) {
                return i3;
            }
        }
        return -1;
    }

    protected Instances splitCenter(Random random, Instance instance, double d, Instances instances) throws Exception {
        AlgVector algVector;
        this.m_NumSplits++;
        Instances instances2 = new Instances(instances, 2);
        if (this.m_DebugVectorsFile.exists() && this.m_DebugVectorsFile.isFile()) {
            Instance nextDebugVectorsInstance = getNextDebugVectorsInstance(instances);
            PFD(D_RANDOMVECTOR, "Random Vector from File " + nextDebugVectorsInstance);
            algVector = new AlgVector(nextDebugVectorsInstance);
        } else {
            algVector = new AlgVector(instances, random);
        }
        algVector.changeLength(Math.pow(d, 0.5d));
        PFD(D_RANDOMVECTOR, "random vector *variance " + algVector);
        AlgVector algVector2 = new AlgVector(instance);
        AlgVector algVector3 = (AlgVector) algVector2.clone();
        Instance asInstance = algVector2.add(algVector).getAsInstance(instances, random);
        instances2.add(asInstance);
        PFD(D_FOLLOWSPLIT, "first child " + asInstance);
        Instance asInstance2 = algVector3.substract(algVector).getAsInstance(instances, random);
        instances2.add(asInstance2);
        PFD(D_FOLLOWSPLIT, "second child " + asInstance2);
        return instances2;
    }

    protected Instances splitCenters(Random random, Instances instances, Instances instances2) {
        Instances instances3 = new Instances(instances2, 2);
        int abs = Math.abs(random.nextInt()) % instances.numInstances();
        instances3.add(instances.instance(abs));
        int i = abs;
        int i2 = 0;
        while (i == abs && i2 < 10) {
            i2++;
            i = Math.abs(random.nextInt()) % instances.numInstances();
        }
        instances3.add(instances.instance(i));
        return instances3;
    }

    protected Instances makeCentersRandomly(Random random, Instances instances, int i) {
        Instances instances2 = new Instances(instances, i);
        this.m_NumClusters = i;
        for (int i2 = 0; i2 < i; i2++) {
            instances2.add(this.m_Instances.instance(Math.abs(random.nextInt()) % this.m_Instances.numInstances()));
        }
        return instances2;
    }

    protected double calculateBIC(int[] iArr, Instance instance, double d, Instances instances) {
        int[][] iArr2 = new int[1][iArr.length];
        for (int i = 0; i < iArr.length; i++) {
            iArr2[0][i] = iArr[i];
        }
        double[] dArr = {d};
        Instances instances2 = new Instances(instances, 1);
        instances2.add(instance);
        return calculateBIC(iArr2, instances2, dArr);
    }

    protected double calculateBIC(int[][] iArr, Instances instances, double[] dArr) {
        double d = 0.0d;
        int i = 0;
        int numInstances = instances.numInstances();
        int numAttributes = (numInstances - 1) + (numInstances * instances.numAttributes()) + numInstances;
        for (int i2 = 0; i2 < instances.numInstances(); i2++) {
            d += logLikelihoodEstimate(iArr[i2].length, instances.instance(i2), dArr[i2], instances.numInstances() * 2);
            i += iArr[i2].length;
        }
        return (d - (i * Math.log(i))) - ((numAttributes / 2.0d) * Math.log(i));
    }

    protected double logLikelihoodEstimate(int i, Instance instance, double d, int i2) {
        double d2 = 0.0d;
        if (i > 1) {
            double log = (-(i / 2.0d)) * Math.log(6.283185307179586d);
            double log2 = ((-(i * instance.numAttributes())) / 2) * Math.log(d / (i - 1.0d));
            d2 = log + log2 + ((-(i - 1.0d)) / 2.0d) + (i * Math.log(i));
        }
        return d2;
    }

    protected double[] distortion(int[][] iArr, Instances instances) {
        double[] dArr = new double[instances.numInstances()];
        for (int i = 0; i < instances.numInstances(); i++) {
            dArr[i] = 0.0d;
            for (int i2 = 0; i2 < iArr[i].length; i2++) {
                int i3 = i;
                dArr[i3] = dArr[i3] + this.m_DistanceF.distance(this.m_Instances.instance(iArr[i][i2]), instances.instance(i));
            }
        }
        return dArr;
    }

    protected int clusterProcessedInstance(Instance instance, Instances instances) {
        double d = 2.147483647E9d;
        int i = 0;
        for (int i2 = 0; i2 < instances.numInstances(); i2++) {
            double distance = this.m_DistanceF.distance(instance, instances.instance(i2));
            if (distance < d) {
                d = distance;
                i = i2;
            }
        }
        return i;
    }

    protected int clusterProcessedInstance(Instance instance) {
        double d = 2.147483647E9d;
        int i = 0;
        for (int i2 = 0; i2 < this.m_NumClusters; i2++) {
            double distance = this.m_DistanceF.distance(instance, this.m_ClusterCenters.instance(i2));
            if (distance < d) {
                d = distance;
                i = i2;
            }
        }
        return i;
    }

    public int clusterInstance(Instance instance) throws Exception {
        this.m_ReplaceMissingFilter.input(instance);
        return clusterProcessedInstance(this.m_ReplaceMissingFilter.output());
    }

    public int numberOfClusters() {
        return this.m_NumClusters;
    }

    public Enumeration listOptions() {
        Vector vector = new Vector();
        vector.addElement(new Option("\tmaximum number of overall iterations\n\t(default 1).", "I", 1, "-I <num>"));
        vector.addElement(new Option("\tmaximum number of iterations in the kMeans loop in\n\tthe Improve-Parameter part \n\t(default 1000).", "M", 1, "-M <num>"));
        vector.addElement(new Option("\tmaximum number of iterations in the kMeans loop\n\tfor the splitted centroids in the Improve-Structure part \n\t(default 1000).", "J", 1, "-J <num>"));
        vector.addElement(new Option("\tminimum number of clusters\n\t(default 2).", "L", 1, "-L <num>"));
        vector.addElement(new Option("\tmaximum number of clusters\n\t(default 4).", "H", 1, "-H <num>"));
        vector.addElement(new Option("\tdistance value for binary attributes\n\t(default 1.0).", "B", 1, "-B <value>"));
        vector.addElement(new Option("\tUses the KDTree internally\n\t(default no).", "use-kdtree", 0, "-use-kdtree"));
        vector.addElement(new Option("\tFull class name of KDTree class to use, followed\n\tby scheme options.\n\teg: \"weka.core.neighboursearch.kdtrees.KDTree -P\"\n\t(default no KDTree class used).", "K", 1, "-K <KDTree class specification>"));
        vector.addElement(new Option("\tcutoff factor, takes the given percentage of the splitted \n\tcentroids if none of the children win\n\t(default 0.0).", "C", 1, "-C <value>"));
        vector.addElement(new Option("\tFull class name of Distance function class to use, followed\n\tby scheme options.\n\t(default weka.core.EuclideanDistance).", "D", 1, "-D <distance function class specification>"));
        vector.addElement(new Option("\tfile to read starting centers from (ARFF format).", "N", 1, "-N <file name>"));
        vector.addElement(new Option("\tfile to write centers to (ARFF format).", "O", 1, "-O <file name>"));
        vector.addElement(new Option("\tThe debug level.\n\t(default 0)", "U", 1, "-U <int>"));
        vector.addElement(new Option("\tThe debug vectors file.", "Y", 1, "-Y <file name>"));
        Enumeration listOptions = super.listOptions();
        while (listOptions.hasMoreElements()) {
            vector.addElement(listOptions.nextElement());
        }
        return vector.elements();
    }

    public String minNumClustersTipText() {
        return "set minimum number of clusters";
    }

    public void setMinNumClusters(int i) {
        this.m_MinNumClusters = i;
    }

    public int getMinNumClusters() {
        return this.m_MinNumClusters;
    }

    public String maxNumClustersTipText() {
        return "set maximum number of clusters";
    }

    public void setMaxNumClusters(int i) {
        if (i >= this.m_MinNumClusters) {
            this.m_MaxNumClusters = i;
        }
    }

    public int getMaxNumClusters() {
        return this.m_MaxNumClusters;
    }

    public String maxIterationsTipText() {
        return "the maximum number of iterations to perform";
    }

    public void setMaxIterations(int i) throws Exception {
        if (i < 0) {
            throw new Exception("Only positive values for iteration number allowed (Option I).");
        }
        this.m_MaxIterations = i;
    }

    public int getMaxIterations() {
        return this.m_MaxIterations;
    }

    public String maxKMeansTipText() {
        return "the maximum number of iterations to perform in KMeans";
    }

    public void setMaxKMeans(int i) {
        this.m_MaxKMeans = i;
        this.m_MaxKMeansForChildren = i;
    }

    public int getMaxKMeans() {
        return this.m_MaxKMeans;
    }

    public String maxKMeansForChildrenTipText() {
        return "the maximum number of iterations KMeans that is performed on the child centers";
    }

    public void setMaxKMeansForChildren(int i) {
        this.m_MaxKMeansForChildren = i;
    }

    public int getMaxKMeansForChildren() {
        return this.m_MaxKMeansForChildren;
    }

    public String cutOffFactorTipText() {
        return "the cut-off factor to use";
    }

    public void setCutOffFactor(double d) {
        this.m_CutOffFactor = d;
    }

    public double getCutOffFactor() {
        return this.m_CutOffFactor;
    }

    public String binValueTipText() {
        return "Set the value that represents true in the new attributes.";
    }

    public double getBinValue() {
        return this.m_BinValue;
    }

    public void setBinValue(double d) {
        this.m_BinValue = d;
    }

    public String distanceFTipText() {
        return "The distance function to use.";
    }

    public void setDistanceF(DistanceFunction distanceFunction) {
        this.m_DistanceF = distanceFunction;
    }

    public DistanceFunction getDistanceF() {
        return this.m_DistanceF;
    }

    protected String getDistanceFSpec() {
        DistanceFunction distanceF = getDistanceF();
        return distanceF instanceof OptionHandler ? String.valueOf(distanceF.getClass().getName()) + " " + Utils.joinOptions(distanceF.getOptions()) : distanceF.getClass().getName();
    }

    public String debugVectorsFileTipText() {
        return "The file containing the debug vectors (only for debugging!).";
    }

    public void setDebugVectorsFile(File file) {
        this.m_DebugVectorsFile = file;
    }

    public File getDebugVectorsFile() {
        return this.m_DebugVectorsFile;
    }

    public void initDebugVectorsInput() throws Exception {
        this.m_DebugVectorsInput = new BufferedReader(new FileReader(this.m_DebugVectorsFile));
        this.m_DebugVectors = new Instances(this.m_DebugVectorsInput);
        this.m_DebugVectorsIndex = 0;
    }

    public Instance getNextDebugVectorsInstance(Instances instances) throws Exception {
        if (this.m_DebugVectorsIndex >= this.m_DebugVectors.numInstances()) {
            throw new Exception("no more prefabricated Vectors");
        }
        Instance instance = this.m_DebugVectors.instance(this.m_DebugVectorsIndex);
        instance.setDataset(instances);
        this.m_DebugVectorsIndex++;
        return instance;
    }

    public String inputCenterFileTipText() {
        return "The file to read the list of centers from.";
    }

    public void setInputCenterFile(File file) {
        this.m_InputCenterFile = file;
    }

    public File getInputCenterFile() {
        return this.m_InputCenterFile;
    }

    public String outputCenterFileTipText() {
        return "The file to write the list of centers to.";
    }

    public void setOutputCenterFile(File file) {
        this.m_OutputCenterFile = file;
    }

    public File getOutputCenterFile() {
        return this.m_OutputCenterFile;
    }

    public String KDTreeTipText() {
        return "The KDTree to use.";
    }

    public void setKDTree(KDTree kDTree) {
        this.m_KDTree = kDTree;
    }

    public KDTree getKDTree() {
        return this.m_KDTree;
    }

    public String useKDTreeTipText() {
        return "Whether to use the KDTree.";
    }

    public void setUseKDTree(boolean z) {
        this.m_UseKDTree = z;
    }

    public boolean getUseKDTree() {
        return this.m_UseKDTree;
    }

    protected String getKDTreeSpec() {
        KDTree kDTree = getKDTree();
        return kDTree instanceof OptionHandler ? String.valueOf(kDTree.getClass().getName()) + " " + Utils.joinOptions(kDTree.getOptions()) : kDTree.getClass().getName();
    }

    public String debugLevelTipText() {
        return "The debug level to use.";
    }

    public void setDebugLevel(int i) {
        this.m_DebugLevel = i;
    }

    public int getDebugLevel() {
        return this.m_DebugLevel;
    }

    protected void checkInstances() {
    }

    public void setOptions(String[] strArr) throws Exception {
        String option = Utils.getOption('I', strArr);
        if (option.length() != 0) {
            setMaxIterations(Integer.parseInt(option));
        } else {
            setMaxIterations(1);
        }
        String option2 = Utils.getOption('M', strArr);
        if (option2.length() != 0) {
            setMaxKMeans(Integer.parseInt(option2));
        } else {
            setMaxKMeans(1000);
        }
        String option3 = Utils.getOption('J', strArr);
        if (option3.length() != 0) {
            setMaxKMeansForChildren(Integer.parseInt(option3));
        } else {
            setMaxKMeansForChildren(1000);
        }
        String option4 = Utils.getOption('L', strArr);
        if (option4.length() != 0) {
            setMinNumClusters(Integer.parseInt(option4));
        } else {
            setMinNumClusters(2);
        }
        String option5 = Utils.getOption('H', strArr);
        if (option5.length() != 0) {
            setMaxNumClusters(Integer.parseInt(option5));
        } else {
            setMaxNumClusters(4);
        }
        String option6 = Utils.getOption('B', strArr);
        if (option6.length() != 0) {
            setBinValue(Double.parseDouble(option6));
        } else {
            setBinValue(1.0d);
        }
        setUseKDTree(Utils.getFlag("use-kdtree", strArr));
        if (getUseKDTree()) {
            String option7 = Utils.getOption('K', strArr);
            if (option7.length() != 0) {
                String[] splitOptions = Utils.splitOptions(option7);
                if (splitOptions.length == 0) {
                    throw new Exception("Invalid function specification string");
                }
                String str = splitOptions[0];
                splitOptions[0] = "";
                setKDTree((KDTree) Utils.forName(KDTree.class, str, splitOptions));
            } else {
                setKDTree(new KDTree());
            }
        } else {
            setKDTree(new KDTree());
        }
        String option8 = Utils.getOption('C', strArr);
        if (option8.length() != 0) {
            setCutOffFactor(Double.parseDouble(option8));
        } else {
            setCutOffFactor(0.0d);
        }
        String option9 = Utils.getOption('D', strArr);
        if (option9.length() != 0) {
            String[] splitOptions2 = Utils.splitOptions(option9);
            if (splitOptions2.length == 0) {
                throw new Exception("Invalid function specification string");
            }
            String str2 = splitOptions2[0];
            splitOptions2[0] = "";
            setDistanceF((DistanceFunction) Utils.forName(DistanceFunction.class, str2, splitOptions2));
        } else {
            setDistanceF(new EuclideanDistance());
        }
        String option10 = Utils.getOption('N', strArr);
        if (option10.length() != 0) {
            setInputCenterFile(new File(option10));
            this.m_CenterInput = new BufferedReader(new FileReader(option10));
        } else {
            setInputCenterFile(new File(System.getProperty("user.dir")));
            this.m_CenterInput = null;
        }
        String option11 = Utils.getOption('O', strArr);
        if (option11.length() != 0) {
            setOutputCenterFile(new File(option11));
            this.m_CenterOutput = new PrintWriter(new FileOutputStream(option11));
        } else {
            setOutputCenterFile(new File(System.getProperty("user.dir")));
            this.m_CenterOutput = null;
        }
        String option12 = Utils.getOption('U', strArr);
        int i = 0;
        if (option12.length() != 0) {
            try {
                i = Integer.parseInt(option12);
            } catch (NumberFormatException e) {
                throw new Exception(String.valueOf(option12) + "is an illegal value for option -U");
            }
        }
        setDebugLevel(i);
        String option13 = Utils.getOption('Y', strArr);
        if (option13.length() != 0) {
            setDebugVectorsFile(new File(option13));
        } else {
            setDebugVectorsFile(new File(System.getProperty("user.dir")));
            this.m_DebugVectorsInput = null;
            this.m_DebugVectors = null;
        }
        super.setOptions(strArr);
    }

    public String[] getOptions() {
        Vector vector = new Vector();
        vector.add("-I");
        vector.add(new StringBuilder().append(getMaxIterations()).toString());
        vector.add("-M");
        vector.add(new StringBuilder().append(getMaxKMeans()).toString());
        vector.add("-J");
        vector.add(new StringBuilder().append(getMaxKMeansForChildren()).toString());
        vector.add("-L");
        vector.add(new StringBuilder().append(getMinNumClusters()).toString());
        vector.add("-H");
        vector.add(new StringBuilder().append(getMaxNumClusters()).toString());
        vector.add("-B");
        vector.add(new StringBuilder().append(getBinValue()).toString());
        if (getUseKDTree()) {
            vector.add("-use-kdtree");
            vector.add("-K");
            vector.add(getKDTreeSpec());
        }
        vector.add("-C");
        vector.add(new StringBuilder().append(getCutOffFactor()).toString());
        if (getDistanceF() != null) {
            vector.add("-D");
            vector.add(getDistanceFSpec());
        }
        if (getInputCenterFile().exists() && getInputCenterFile().isFile()) {
            vector.add("-N");
            vector.add(new StringBuilder().append(getInputCenterFile()).toString());
        }
        if (getOutputCenterFile().exists() && getOutputCenterFile().isFile()) {
            vector.add("-O");
            vector.add(new StringBuilder().append(getOutputCenterFile()).toString());
        }
        if (getDebugLevel() > 0) {
            vector.add("-U");
            vector.add(new StringBuilder().append(getDebugLevel()).toString());
        }
        if (getDebugVectorsFile().exists() && getDebugVectorsFile().isFile()) {
            vector.add("-Y");
            vector.add(new StringBuilder().append(getDebugVectorsFile()).toString());
        }
        for (String str : super.getOptions()) {
            vector.add(str);
        }
        return (String[]) vector.toArray(new String[vector.size()]);
    }

    public String toString() {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("\nXMeans\n======\n");
        stringBuffer.append("Requested iterations            : " + this.m_MaxIterations + "\n");
        stringBuffer.append("Iterations performed            : " + this.m_IterationCount + "\n");
        if (this.m_KMeansStopped > 0) {
            stringBuffer.append("kMeans did not converge\n");
            stringBuffer.append("  but was stopped by max-loops " + this.m_KMeansStopped + " times (max kMeans-iter)\n");
        }
        stringBuffer.append("Splits prepared                 : " + this.m_NumSplits + "\n");
        stringBuffer.append("Splits performed                : " + this.m_NumSplitsDone + "\n");
        stringBuffer.append("Cutoff factor                   : " + this.m_CutOffFactor + "\n");
        stringBuffer.append("Percentage of splits accepted \nby cutoff factor                : " + Utils.doubleToString(this.m_NumSplitsDone > 0 ? (this.m_NumSplitsStillDone / this.m_NumSplitsDone) * 100.0d : 0.0d, 2) + " %\n");
        stringBuffer.append("------\n");
        stringBuffer.append("Cutoff factor                   : " + this.m_CutOffFactor + "\n");
        stringBuffer.append("------\n");
        stringBuffer.append("\nCluster centers                 : " + this.m_NumClusters + " centers\n");
        for (int i = 0; i < this.m_NumClusters; i++) {
            stringBuffer.append("\nCluster " + i + "\n           ");
            for (int i2 = 0; i2 < this.m_ClusterCenters.numAttributes(); i2++) {
                if (this.m_ClusterCenters.attribute(i2).isNominal()) {
                    stringBuffer.append(" " + this.m_ClusterCenters.attribute(i2).value((int) this.m_ClusterCenters.instance(i).value(i2)));
                } else {
                    stringBuffer.append(" " + this.m_ClusterCenters.instance(i).value(i2));
                }
            }
        }
        if (this.m_Mle != null) {
            stringBuffer.append("\n\nDistortion: " + Utils.doubleToString(Utils.sum(this.m_Mle), 6) + "\n");
        }
        stringBuffer.append("BIC-Value : " + Utils.doubleToString(this.m_Bic, 6) + "\n");
        return stringBuffer.toString();
    }

    public Instances getClusterCenters() {
        return this.m_ClusterCenters;
    }

    protected void PrCentersFD(int i) {
        if (i == this.m_DebugLevel) {
            for (int i2 = 0; i2 < this.m_ClusterCenters.numInstances(); i2++) {
                System.out.println(this.m_ClusterCenters.instance(i2));
            }
        }
    }

    protected boolean TFD(int i) {
        return i == this.m_DebugLevel;
    }

    protected void PFD(int i, String str) {
        if (i == this.m_DebugLevel) {
            System.out.println(str);
        }
    }

    protected void PFD_CURR(String str) {
        if (this.m_CurrDebugFlag) {
            System.out.println(str);
        }
    }

    public String getRevision() {
        return RevisionUtils.extract("$Revision: 8109 $");
    }

    public static void main(String[] strArr) {
        runClusterer(new XMeans(), strArr);
    }
}
