/*
 * Decompiled with CFR 0.152.
 */
package elki.clustering.kmeans;

import elki.clustering.kmeans.AbstractKMeans;
import elki.clustering.kmeans.HamerlyKMeans;
import elki.clustering.kmeans.KMeans;
import elki.clustering.kmeans.initialization.KMeansInitialization;
import elki.clustering.kmeans.initialization.Predefined;
import elki.clustering.kmeans.quality.BayesianInformationCriterionXMeans;
import elki.clustering.kmeans.quality.KMeansQualityMeasure;
import elki.data.Cluster;
import elki.data.Clustering;
import elki.data.DoubleVector;
import elki.data.NumberVector;
import elki.data.model.MeanModel;
import elki.data.type.TypeInformation;
import elki.database.ids.DBIDIter;
import elki.database.ids.DBIDRef;
import elki.database.relation.ProxyView;
import elki.database.relation.Relation;
import elki.database.relation.RelationUtil;
import elki.distance.NumberVectorDistance;
import elki.distance.minkowski.SquaredEuclideanDistance;
import elki.logging.Logging;
import elki.logging.progress.MutableProgress;
import elki.logging.statistics.LongStatistic;
import elki.logging.statistics.Statistic;
import elki.logging.statistics.StringStatistic;
import elki.math.MathUtil;
import elki.math.linearalgebra.VMath;
import elki.result.Metadata;
import elki.utilities.documentation.Reference;
import elki.utilities.documentation.Title;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.ParameterException;
import elki.utilities.optionhandling.WrongParameterValueException;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.constraints.ParameterConstraint;
import elki.utilities.optionhandling.parameterization.ChainedParameterization;
import elki.utilities.optionhandling.parameterization.ListParameterization;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.IntParameter;
import elki.utilities.optionhandling.parameters.ObjectParameter;
import elki.utilities.optionhandling.parameters.Parameter;
import elki.utilities.optionhandling.parameters.RandomParameter;
import elki.utilities.random.RandomFactory;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;

@Title(value="X-means")
@Reference(authors="D. Pelleg, A. Moore", title="X-means: Extending K-means with Efficient Estimation on the Number of Clusters", booktitle="Proc. 17th Int. Conf. on Machine Learning (ICML 2000)", url="http://www.pelleg.org/shared/hp/download/xmeans.ps", bibkey="DBLP:conf/icml/PellegM00")
public class XMeans<V extends NumberVector, M extends MeanModel>
extends AbstractKMeans<V, M> {
    private static final Logging LOG = Logging.getLogger(XMeans.class);
    protected KMeans<V, M> innerKMeans;
    private int k_min;
    private int k_max;
    Predefined splitInitializer;
    KMeansQualityMeasure<V> informationCriterion;
    RandomFactory rnd;

    public XMeans(NumberVectorDistance<? super V> distance, int k_min, int k_max, int maxiter, KMeans<V, M> innerKMeans, KMeansInitialization initializer, KMeansQualityMeasure<V> informationCriterion, RandomFactory random) {
        super(distance, k_min, maxiter, initializer);
        this.k_min = k_min;
        this.k_max = k_max;
        this.innerKMeans = innerKMeans;
        this.splitInitializer = new Predefined((double[][])null);
        this.innerKMeans.setInitializer(this.splitInitializer);
        this.innerKMeans.setDistance(distance);
        this.informationCriterion = informationCriterion;
        this.rnd = random;
    }

    @Override
    public Clustering<M> run(Relation<V> relation) {
        String key = this.getClass().getName();
        MutableProgress prog = LOG.isVerbose() ? new MutableProgress("Number of clusters", this.k_max, LOG) : null;
        this.innerKMeans.setK(this.k_min);
        LOG.statistics((Statistic)new StringStatistic(key + ".initialization", this.initializer.toString()));
        this.splitInitializer.setInitialMeans(this.initializer.chooseInitialMeans(relation, this.k_min, this.distance));
        Clustering<M> clustering = this.innerKMeans.run(relation);
        if (prog != null) {
            prog.setProcessed(this.k_min, LOG);
        }
        ArrayList<Cluster<M>> clusters = new ArrayList<Cluster<M>>(clustering.getAllClusters());
        while (clusters.size() <= this.k_max) {
            ArrayList<Cluster<M>> nextClusters = new ArrayList<Cluster<M>>();
            for (Cluster<M> cluster : clusters) {
                List<Cluster<M>> childClusterList = this.splitCluster(cluster, relation);
                nextClusters.addAll(childClusterList);
                if (childClusterList.size() <= 1) continue;
                this.k += childClusterList.size() - 1;
                if (prog == null) continue;
                if (this.k >= this.k_max) {
                    prog.setTotal(this.k + 1);
                }
                prog.setProcessed(this.k, LOG);
            }
            if (clusters.size() == nextClusters.size()) break;
            this.innerKMeans.setInitializer(this.splitInitializer);
            this.splitInitializer.setInitialClusters(nextClusters);
            this.innerKMeans.setK(nextClusters.size());
            clustering = this.innerKMeans.run(relation);
            clusters.clear();
            clusters.addAll(clustering.getAllClusters());
        }
        if (prog != null) {
            prog.setTotal(this.k);
            prog.setProcessed(this.k, LOG);
        }
        LOG.statistics((Statistic)new LongStatistic(key + ".num-clusters", (long)clusters.size()));
        Clustering<M> result = new Clustering<M>(clusters);
        Metadata.of(result).setLongName("X-Means Clustering");
        return result;
    }

    protected List<Cluster<M>> splitCluster(Cluster<M> parentCluster, Relation<V> relation) {
        if (parentCluster.size() <= 1) {
            return Arrays.asList(parentCluster);
        }
        Clustering parentClustering = new Clustering(Arrays.asList(parentCluster));
        this.splitInitializer.setInitialMeans(this.splitCentroid(parentCluster, relation));
        this.innerKMeans.setK(2);
        Clustering<M> childClustering = this.innerKMeans.run((Relation<V>)new ProxyView(parentCluster.getIDs(), relation));
        double parentEvaluation = this.informationCriterion.quality(parentClustering, this.distance, relation);
        double childrenEvaluation = this.informationCriterion.quality(childClustering, this.distance, relation);
        if (LOG.isDebugging()) {
            LOG.debug((CharSequence)("parentEvaluation: " + parentEvaluation));
            LOG.debug((CharSequence)("childrenEvaluation: " + childrenEvaluation));
        }
        return this.informationCriterion.isBetter(parentEvaluation, childrenEvaluation) ? Arrays.asList(parentCluster) : childClustering.getAllClusters();
    }

    protected double[][] splitCentroid(Cluster<? extends MeanModel> parentCluster, Relation<V> relation) {
        double[] parentCentroid = (double[])parentCluster.getModel().getMean().clone();
        double radius = 0.0;
        NumberVectorDistance distance = this.distance;
        DBIDIter it = parentCluster.getIDs().iter();
        while (it.valid()) {
            double d = distance.distance((NumberVector)relation.get((DBIDRef)it), (NumberVector)DoubleVector.wrap((double[])parentCentroid));
            radius = d > radius ? d : radius;
            it.advance();
        }
        Random random = this.rnd.getSingleThreadedRandom();
        int dim = RelationUtil.dimensionality(relation);
        double[] randomVector = VMath.normalize((double[])MathUtil.randomDoubleArray((int)dim, (Random)random));
        VMath.timesEquals((double[])randomVector, (double)((0.4 + random.nextDouble() * 0.5) * radius));
        for (int d = 0; d < dim; ++d) {
            double a = parentCentroid[d];
            double b = randomVector[d];
            parentCentroid[d] = a - b;
            randomVector[d] = a + b;
        }
        return new double[][]{parentCentroid, randomVector};
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return this.innerKMeans.getInputTypeRestriction();
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public static class Par<V extends NumberVector, M extends MeanModel>
    extends AbstractKMeans.Par<V> {
        public static final OptionID INNER_KMEANS_ID = new OptionID("xmeans.kmeans", "kMeans algorithm to use.");
        public static final OptionID K_MIN_ID = new OptionID("xmeans.k_min", "The minimum number of clusters to find.");
        public static final OptionID SEED_ID = new OptionID("xmeans.seed", "Random seed for splitting clusters.");
        public static final OptionID INFORMATION_CRITERION_ID = new OptionID("xmeans.quality", "The quality measure to evaluate splits (e.g., AIC, BIC)");
        protected KMeans<V, M> innerKMeans;
        protected KMeansQualityMeasure<V> informationCriterion;
        protected int k_min;
        protected int k_max;
        protected RandomFactory random;

        @Override
        public void configure(Parameterization config) {
            IntParameter kMinP = (IntParameter)new IntParameter(K_MIN_ID, 2).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT);
            kMinP.grab(config, x -> {
                this.k_min = x;
            });
            IntParameter kMaxP = (IntParameter)new IntParameter(KMeans.K_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT);
            kMaxP.grab(config, x -> {
                this.k_max = x;
            });
            if (this.k_min > this.k_max) {
                config.reportError((ParameterException)new WrongParameterValueException((Parameter)kMinP, "must be at most", (Parameter)kMaxP, ""));
            }
            this.getParameterInitialization(config);
            this.getParameterMaxIter(config);
            this.getParameterDistance(config);
            new RandomParameter(SEED_ID).grab(config, x -> {
                this.random = x;
            });
            ObjectParameter innerKMeansP = new ObjectParameter(INNER_KMEANS_ID, KMeans.class, HamerlyKMeans.class);
            if (config.grab((Parameter)innerKMeansP)) {
                ChainedParameterization combinedConfig = new ChainedParameterization(new Parameterization[]{new ListParameterization().addParameter(KMeans.K_ID, (Object)this.k_min).addParameter(KMeans.INIT_ID, (Object)new Predefined((double[][])null)).addParameter(KMeans.MAXITER_ID, (Object)this.maxiter).addParameter(KMeans.DISTANCE_FUNCTION_ID, this.distance != null ? this.distance : SquaredEuclideanDistance.STATIC), config});
                combinedConfig.errorsTo(config);
                this.innerKMeans = (KMeans)innerKMeansP.instantiateClass((Parameterization)combinedConfig);
                this.configureInformationCriterion(config);
            }
        }

        protected void configureInformationCriterion(Parameterization config) {
            new ObjectParameter(INFORMATION_CRITERION_ID, KMeansQualityMeasure.class, BayesianInformationCriterionXMeans.class).grab(config, x -> {
                this.informationCriterion = x;
            });
        }

        @Override
        public XMeans<V, M> make() {
            return new XMeans<V, M>(this.distance, this.k_min, this.k_max, this.maxiter, this.innerKMeans, this.initializer, this.informationCriterion, this.random);
        }
    }
}

