/*
 * Decompiled with CFR 0.152.
 */
package elki.clustering.kmeans.initialization;

import elki.clustering.kmeans.KMeans;
import elki.clustering.kmeans.initialization.AbstractKMeansInitialization;
import elki.data.Cluster;
import elki.data.Clustering;
import elki.data.NumberVector;
import elki.data.model.ModelUtil;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.ids.DBIDUtil;
import elki.database.ids.DBIDs;
import elki.database.relation.ProxyView;
import elki.database.relation.Relation;
import elki.distance.NumberVectorDistance;
import elki.distance.minkowski.SquaredEuclideanDistance;
import elki.logging.LoggingUtil;
import elki.utilities.documentation.Reference;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.parameterization.ChainedParameterization;
import elki.utilities.optionhandling.parameterization.ListParameterization;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.DoubleParameter;
import elki.utilities.optionhandling.parameters.ObjectParameter;
import elki.utilities.optionhandling.parameters.Parameter;
import elki.utilities.random.RandomFactory;

@Reference(authors="P. S. Bradley, U. M. Fayyad", title="Refining Initial Points for K-Means Clustering", booktitle="Proc. 15th Int. Conf. on Machine Learning (ICML 1998)", bibkey="DBLP:conf/icml/BradleyF98")
public class SampleKMeans<V extends NumberVector>
extends AbstractKMeansInitialization {
    private KMeans<V, ?> innerkMeans;
    private double rate;

    public SampleKMeans(RandomFactory rnd, KMeans<V, ?> innerkMeans, double rate) {
        super(rnd);
        this.innerkMeans = innerkMeans;
        this.rate = rate;
    }

    @Override
    public double[][] chooseInitialMeans(Relation<? extends NumberVector> relation, int k, NumberVectorDistance<?> distance) {
        if (relation.size() < k) {
            throw new IllegalArgumentException("Cannot choose k=" + k + " means from N=" + relation.size() + " < k objects.");
        }
        DBIDs sample = DBIDUtil.randomSample((DBIDs)relation.getDBIDs(), (double)this.rate, (RandomFactory)this.rnd);
        if (sample.size() < k) {
            throw new IllegalArgumentException("Sampling rate=" + this.rate + " from N=" + relation.size() + " yields only " + sample.size() + " < k objects.");
        }
        Relation<? extends NumberVector> rel = relation;
        if (!distance.getInputTypeRestriction().isAssignableFromType((TypeInformation)TypeUtil.NUMBER_VECTOR_FIELD)) {
            LoggingUtil.warning((String)"Initializing k-means with k-means using specialized distance functions MAY fail, if the initialization method does require a distance defined on arbitrary number vectors.");
        }
        NumberVectorDistance<?> pdf = distance;
        this.innerkMeans.setK(k);
        this.innerkMeans.setDistance(pdf);
        Clustering<?> clusters = this.innerkMeans.run((Relation<V>)new ProxyView(sample, rel));
        double[][] means = new double[clusters.getAllClusters().size()][];
        int i = 0;
        for (Cluster<?> cluster : clusters.getAllClusters()) {
            means[i++] = ModelUtil.getPrototype(cluster.getModel(), relation).toArray();
        }
        return means;
    }

    public static class Par<V extends NumberVector>
    extends AbstractKMeansInitialization.Par {
        public static final OptionID KMEANS_ID = new OptionID("kmeans.algorithm", "KMeans variant to run multiple times.");
        public static final OptionID SAMPLE_ID = new OptionID("kmeans.samplesize", "Sample set size (if > 1) or sampling rante (if < 1).");
        protected KMeans<V, ?> innerkMeans;
        protected double rate;

        @Override
        public void configure(Parameterization config) {
            super.configure(config);
            ObjectParameter kMeansVariantP = new ObjectParameter(KMEANS_ID, KMeans.class);
            if (config.grab((Parameter)kMeansVariantP)) {
                ListParameterization kMeansVariantParameters = new ListParameterization();
                kMeansVariantParameters.addParameter(KMeans.K_ID, (Object)13);
                kMeansVariantParameters.addParameter(KMeans.DISTANCE_FUNCTION_ID, SquaredEuclideanDistance.class);
                ChainedParameterization combinedConfig = new ChainedParameterization(new Parameterization[]{kMeansVariantParameters, config});
                combinedConfig.errorsTo(config);
                this.innerkMeans = (KMeans)kMeansVariantP.instantiateClass((Parameterization)combinedConfig);
            }
            new DoubleParameter(SAMPLE_ID).grab(config, x -> {
                this.rate = x;
            });
        }

        public SampleKMeans<V> make() {
            return new SampleKMeans<V>(this.rnd, this.innerkMeans, this.rate);
        }
    }
}

