/*
 * Decompiled with CFR 0.152.
 */
package elki.clustering.hierarchical.birch;

import elki.clustering.ClusteringAlgorithm;
import elki.clustering.hierarchical.birch.BIRCHKMeansPlusPlus;
import elki.clustering.hierarchical.birch.CFTree;
import elki.clustering.hierarchical.birch.ClusteringFeature;
import elki.clustering.kmeans.KMeans;
import elki.data.Cluster;
import elki.data.Clustering;
import elki.data.NumberVector;
import elki.data.model.KMeansModel;
import elki.data.model.MeanModel;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.ids.DBIDIter;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDUtil;
import elki.database.ids.DBIDs;
import elki.database.ids.ModifiableDBIDs;
import elki.database.relation.Relation;
import elki.logging.Logging;
import elki.logging.statistics.DoubleStatistic;
import elki.logging.statistics.LongStatistic;
import elki.logging.statistics.Statistic;
import elki.math.linearalgebra.VMath;
import elki.result.Metadata;
import elki.utilities.documentation.Reference;
import elki.utilities.documentation.References;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.constraints.ParameterConstraint;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.IntParameter;
import java.util.Arrays;

@References(value={@Reference(authors="T. Zhang, R. Ramakrishnan, M. Livny", title="BIRCH: An Efficient Data Clustering Method for Very Large Databases", booktitle="Proc. 1996 ACM SIGMOD International Conference on Management of Data", url="https://doi.org/10.1145/233269.233324", bibkey="DBLP:conf/sigmod/ZhangRL96"), @Reference(authors="T. Zhang, R. Ramakrishnan, M. Livny", title="BIRCH: A New Data Clustering Algorithm and Its Applications", booktitle="Data Min. Knowl. Discovery", url="https://doi.org/10.1023/A:1009783824328", bibkey="DBLP:journals/datamine/ZhangRL97")})
public class BIRCHLloydKMeans
implements ClusteringAlgorithm<Clustering<MeanModel>> {
    private static final Logging LOG = Logging.getLogger(BIRCHLloydKMeans.class);
    CFTree.Factory cffactory;
    int k;
    int maxiter;
    BIRCHKMeansPlusPlus initialization;

    public BIRCHLloydKMeans(CFTree.Factory cffactory, int k, int maxiter, BIRCHKMeansPlusPlus initialization) {
        this.cffactory = cffactory;
        this.k = k;
        this.maxiter = maxiter;
        this.initialization = initialization;
    }

    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array((TypeInformation[])new TypeInformation[]{TypeUtil.NUMBER_VECTOR_FIELD});
    }

    public Clustering<KMeansModel> run(Relation<NumberVector> relation) {
        CFTree tree = this.cffactory.newTree(relation.getDBIDs(), relation);
        ClusteringFeature[] cfs = new ClusteringFeature[tree.leaves];
        double[][] cfmeans = new double[tree.leaves][];
        int z = 0;
        CFTree.LeafIterator iter = tree.leafIterator();
        while (iter.valid()) {
            ClusteringFeature f = cfs[z] = iter.get();
            cfmeans[z] = VMath.times((double[])f.ls, (double)(1.0 / (double)f.n));
            ++z;
            iter.advance();
        }
        int[] assignment = new int[tree.leaves];
        int[] weights = new int[this.k];
        Arrays.fill(assignment, -1);
        double[][] means = this.kmeans(cfmeans, cfs, assignment, weights);
        double[] varsum = new double[this.k];
        ModifiableDBIDs[] ids = new ModifiableDBIDs[this.k];
        for (int i = 0; i < this.k; ++i) {
            ids[i] = DBIDUtil.newArray((int)weights[i]);
        }
        DBIDIter iter2 = relation.iterDBIDs();
        while (iter2.valid()) {
            NumberVector fv = (NumberVector)relation.get((DBIDRef)iter2);
            double mindist = this.distance(fv, means[0]);
            int minIndex = 0;
            for (int i = 1; i < this.k; ++i) {
                double dist = this.distance(fv, means[i]);
                if (!(dist < mindist)) continue;
                minIndex = i;
                mindist = dist;
            }
            int n = minIndex;
            varsum[n] = varsum[n] + mindist;
            ids[minIndex].add((DBIDRef)iter2);
            iter2.advance();
        }
        Clustering<KMeansModel> result = new Clustering<KMeansModel>();
        for (int i = 0; i < ids.length; ++i) {
            KMeansModel model = new KMeansModel(means[i], varsum[i]);
            result.addToplevelCluster(new Cluster<KMeansModel>((DBIDs)ids[i], model));
        }
        Metadata.of(result).setLongName("BIRCH k-Means Clustering");
        return result;
    }

    private double[][] kmeans(double[][] cfmeans, ClusteringFeature[] cfs, int[] assignment, int[] weights) {
        double[][] means = this.initialization.run(cfmeans, this.k);
        for (int i = 1; i <= this.maxiter || this.maxiter <= 0; ++i) {
            double[][] dArray = means = i == 1 ? means : this.means(assignment, means, cfs, weights);
            if (i > 1 && LOG.isStatistics()) {
                double varsum = VMath.sum((double[])this.calculateVariances(assignment, means, cfs, weights));
                LOG.statistics((Statistic)new DoubleStatistic(this.getClass().getName() + "." + (i - 1) + ".varsum", varsum));
            }
            int changed = this.assignToNearestCluster(assignment, means, cfmeans, cfs, weights);
            if (LOG.isStatistics()) {
                LOG.statistics((Statistic)new LongStatistic(this.getClass().getName() + "." + i + ".reassigned", (long)changed));
            }
            if (changed == 0) break;
        }
        return means;
    }

    private double[][] means(int[] assignment, double[][] means, ClusteringFeature[] cfs, int[] weights) {
        int i;
        Arrays.fill(weights, 0);
        double[][] newMeans = new double[this.k][];
        for (i = 0; i < assignment.length; ++i) {
            int c = assignment[i];
            ClusteringFeature cf = cfs[i];
            if (newMeans[c] == null) {
                newMeans[c] = (double[])cf.ls.clone();
            } else {
                VMath.plusEquals((double[])newMeans[c], (double[])cf.ls);
            }
            int n = c;
            weights[n] = weights[n] + cf.n;
        }
        for (i = 0; i < this.k; ++i) {
            if (weights[i] == 0) {
                newMeans[i] = means[i];
                continue;
            }
            VMath.timesEquals((double[])newMeans[i], (double)(1.0 / (double)weights[i]));
        }
        return newMeans;
    }

    private int assignToNearestCluster(int[] assignment, double[][] means, double[][] cfmeans, ClusteringFeature[] cfs, int[] weights) {
        Arrays.fill(weights, 0);
        int changed = 0;
        for (int i = 0; i < cfmeans.length; ++i) {
            double mindist = this.distance(cfmeans[i], means[0]);
            int minIndex = 0;
            for (int j = 1; j < this.k; ++j) {
                double dist = this.distance(cfmeans[i], means[j]);
                if (!(dist < mindist)) continue;
                minIndex = j;
                mindist = dist;
            }
            if (assignment[i] != minIndex) {
                ++changed;
                assignment[i] = minIndex;
            }
            int n = minIndex;
            weights[n] = weights[n] + cfs[i].n;
        }
        return changed;
    }

    protected double distance(NumberVector x, double[] y) {
        double v = 0.0;
        for (int i = 0; i < y.length; ++i) {
            double d = x.doubleValue(i) - y[i];
            v += d * d;
        }
        return v;
    }

    protected double distance(double[] x, double[] y) {
        double v = 0.0;
        for (int i = 0; i < x.length; ++i) {
            double d = x[i] - y[i];
            v += d * d;
        }
        return v;
    }

    private double[] calculateVariances(int[] assignment, double[][] means, ClusteringFeature[] cfs, int[] weights) {
        int i;
        double[] ss = new double[this.k];
        for (i = 0; i < assignment.length; ++i) {
            int n = assignment[i];
            ss[n] = ss[n] + cfs[i].ss;
        }
        for (i = 0; i < this.k; ++i) {
            int n = i;
            ss[n] = ss[n] - VMath.squareSum((double[])means[i]) * (double)weights[i];
        }
        return ss;
    }

    public static class Par
    implements Parameterizer {
        CFTree.Factory cffactory;
        protected int k;
        protected int maxiter;
        protected BIRCHKMeansPlusPlus initialization;

        public void configure(Parameterization config) {
            this.cffactory = (CFTree.Factory)config.tryInstantiate(CFTree.Factory.class);
            ((IntParameter)new IntParameter(KMeans.K_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).grab(config, x -> {
                this.k = x;
            });
            ((IntParameter)new IntParameter(KMeans.MAXITER_ID, 0).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ZERO_INT)).grab(config, x -> {
                this.maxiter = x;
            });
            this.initialization = (BIRCHKMeansPlusPlus)config.tryInstantiate(BIRCHKMeansPlusPlus.class);
        }

        public BIRCHLloydKMeans make() {
            return new BIRCHLloydKMeans(this.cffactory, this.k, this.maxiter, this.initialization);
        }
    }
}

