/*
 * Decompiled with CFR 0.152.
 */
package elki.clustering.kmeans;

import elki.clustering.kmeans.KMeans;
import elki.clustering.kmeans.XMeans;
import elki.clustering.kmeans.initialization.KMeansInitialization;
import elki.data.Cluster;
import elki.data.NumberVector;
import elki.data.VectorUtil;
import elki.data.model.MeanModel;
import elki.database.ids.DBIDIter;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDs;
import elki.database.relation.ProxyView;
import elki.database.relation.Relation;
import elki.database.relation.RelationUtil;
import elki.distance.NumberVectorDistance;
import elki.logging.Logging;
import elki.math.MathUtil;
import elki.math.linearalgebra.CovarianceMatrix;
import elki.math.linearalgebra.VMath;
import elki.math.statistics.tests.AndersonDarlingTest;
import elki.utilities.documentation.Reference;
import elki.utilities.documentation.Title;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.constraints.ParameterConstraint;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.DoubleParameter;
import elki.utilities.random.RandomFactory;
import java.util.Arrays;
import java.util.List;
import java.util.Random;

@Title(value="G-means")
@Reference(authors="G. Hamerly and C. Elkan", title="Learning the k in k-means", booktitle="Neural Information Processing Systems", url="https://www.researchgate.net/publication/2869155_Learning_the_K_in_K-Means", bibkey="DBLP:conf/nips/HamerlyE03")
public class GMeans<V extends NumberVector, M extends MeanModel>
extends XMeans<V, M> {
    private static final Logging LOG = Logging.getLogger(GMeans.class);
    protected double critical;

    public GMeans(NumberVectorDistance<? super V> distance, double critical, int k_min, int k_max, int maxiter, KMeans<V, M> innerKMeans, KMeansInitialization initializer, RandomFactory random) {
        super(distance, k_min, k_max, maxiter, innerKMeans, initializer, null, random);
        this.critical = critical;
    }

    @Override
    protected List<Cluster<M>> splitCluster(Cluster<M> parentCluster, Relation<V> relation) {
        if (parentCluster.size() <= 1) {
            return Arrays.asList(parentCluster);
        }
        this.splitInitializer.setInitialMeans(this.splitCentroid(parentCluster, relation));
        this.innerKMeans.setK(2);
        List childClustering = this.innerKMeans.run(new ProxyView(parentCluster.getIDs(), relation)).getAllClusters();
        assert (childClustering.size() == 2);
        double[] v = VMath.minus((double[])((MeanModel)childClustering.get(0).getModel()).getMean(), (double[])((MeanModel)childClustering.get(1).getModel()).getMean());
        double[] projectedValues = new double[parentCluster.size()];
        int i = 0;
        DBIDIter it = parentCluster.getIDs().iter();
        while (it.valid()) {
            projectedValues[i++] = VectorUtil.dot((NumberVector)((NumberVector)relation.get((DBIDRef)it)), (double[])v);
            it.advance();
        }
        Arrays.sort(projectedValues);
        double A2 = AndersonDarlingTest.A2Noncentral((double[])projectedValues);
        A2 = AndersonDarlingTest.removeBiasNormalDistribution((double)A2, (int)projectedValues.length);
        return A2 > this.critical ? childClustering : Arrays.asList(parentCluster);
    }

    @Override
    protected double[][] splitCentroid(Cluster<? extends MeanModel> parentCluster, Relation<V> relation) {
        CovarianceMatrix cov = CovarianceMatrix.make(relation, (DBIDs)parentCluster.getIDs());
        double[][] mat = cov.destroyToSampleMatrix();
        int dim = RelationUtil.dimensionality(relation);
        double[] s = VMath.normalizeEquals((double[])MathUtil.randomDoubleArray((int)dim, (Random)this.rnd.getSingleThreadedRandom()));
        for (int i = 0; i < 30; ++i) {
            s = VMath.normalizeEquals((double[])VMath.times((double[][])mat, (double[])s));
        }
        VMath.timesEquals((double[])s, (double)Math.sqrt(2.0 * VMath.transposeTimesTimes((double[])s, (double[][])mat, (double[])s) / Math.PI));
        return new double[][]{VMath.plus((double[])cov.getMeanVector(), (double[])s), VMath.minus((double[])cov.getMeanVector(), (double[])s)};
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Par<V extends NumberVector, M extends MeanModel>
    extends XMeans.Par<V, M> {
        public static final OptionID CRITICAL_ID = new OptionID("gmeans.critical", "Critical value for the Anderson Darling test. \u03b1=0.0001 is 1.8692, \u03b1=0.005 is 1.159 \u03b1=0.01 is 1.0348");
        protected double critical;

        @Override
        protected void configureInformationCriterion(Parameterization config) {
            ((DoubleParameter)new DoubleParameter(CRITICAL_ID, 1.8692).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE)).grab(config, x -> {
                this.critical = x;
            });
        }

        @Override
        public GMeans<V, M> make() {
            return new GMeans(this.distance, this.critical, this.k_min, this.k_max, this.maxiter, this.innerKMeans, this.initializer, this.random);
        }
    }
}

