/*
 * Decompiled with CFR 0.152.
 */
package hex.kmeans;

import hex.ClusteringModel;
import hex.ClusteringModelBuilder;
import hex.DataInfo;
import hex.Model;
import hex.ModelBuilder;
import hex.ModelCategory;
import hex.ModelMetrics;
import hex.ModelMetricsClustering;
import hex.ToEigenVec;
import hex.genmodel.GenModel;
import hex.kmeans.KMeansModel;
import hex.util.LinearAlgebraUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import water.DKV;
import water.H2O;
import water.Iced;
import water.IcedUtils;
import water.Job;
import water.Key;
import water.MRTask;
import water.exceptions.H2OModelBuilderIllegalArgumentException;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.Vec;
import water.util.ArrayUtils;
import water.util.Log;
import water.util.PrettyPrint;
import water.util.RandomUtils;
import water.util.TwoDimTable;

public class KMeans
extends ClusteringModelBuilder<KMeansModel, KMeansModel.KMeansParameters, KMeansModel.KMeansOutput> {
    private static final double TOLERANCE = 1.0E-4;

    public ToEigenVec getToEigenVec() {
        return LinearAlgebraUtils.toEigen;
    }

    public ModelCategory[] can_build() {
        return new ModelCategory[]{ModelCategory.Clustering};
    }

    public boolean havePojo() {
        return true;
    }

    public boolean haveMojo() {
        return true;
    }

    protected KMeansDriver trainModelImpl() {
        return new KMeansDriver();
    }

    public KMeans(KMeansModel.KMeansParameters parms) {
        super((ClusteringModel.ClusteringParameters)parms);
        this.init(false);
    }

    public KMeans(KMeansModel.KMeansParameters parms, Job job) {
        super((ClusteringModel.ClusteringParameters)parms, job);
        this.init(false);
    }

    public KMeans(boolean startup_once) {
        super((ClusteringModel.ClusteringParameters)new KMeansModel.KMeansParameters(), startup_once);
    }

    protected void checkMemoryFootPrint() {
        long max_mem;
        long mem_usage = 8 * ((KMeansModel.KMeansParameters)this._parms)._k * this._train.numCols() * (((KMeansModel.KMeansParameters)this._parms)._standardize ? 2 : 1);
        if (mem_usage > (max_mem = H2O.SELF._heartbeat.get_free_mem())) {
            String msg = "Centroids won't fit in the driver node's memory (" + PrettyPrint.bytes((long)mem_usage) + " > " + PrettyPrint.bytes((long)max_mem) + ") - try reducing the number of columns and/or the number of categorical factors.";
            this.error("_train", msg);
        }
    }

    public void init(boolean expensive) {
        super.init(expensive);
        if (expensive && ((KMeansModel.KMeansParameters)this._parms)._fold_column != null) {
            this._train.remove(((KMeansModel.KMeansParameters)this._parms)._fold_column);
        }
        if (((KMeansModel.KMeansParameters)this._parms)._max_iterations <= 0 || (double)((KMeansModel.KMeansParameters)this._parms)._max_iterations > 1000000.0) {
            this.error("_max_iterations", " max_iterations must be between 1 and 1e6");
        }
        if (this._train == null) {
            return;
        }
        if (((KMeansModel.KMeansParameters)this._parms)._init == Initialization.User && ((KMeansModel.KMeansParameters)this._parms)._user_points == null) {
            this.error("_user_y", "Must specify initial cluster centers");
        }
        if (((KMeansModel.KMeansParameters)this._parms)._user_points != null) {
            Frame user_points = (Frame)((KMeansModel.KMeansParameters)this._parms)._user_points.get();
            if (user_points == null) {
                this.error("_user_y", "User-specified points do not refer to a valid frame");
            } else if (user_points.numCols() != this._train.numCols() - this.numSpecialCols()) {
                this.error("_user_y", "The user-specified points must have the same number of columns (" + (this._train.numCols() - this.numSpecialCols()) + ") as the training observations");
            } else if (user_points.numRows() != (long)((KMeansModel.KMeansParameters)this._parms)._k) {
                this.error("_user_y", "The number of rows in the user-specified points is not equal to k = " + ((KMeansModel.KMeansParameters)this._parms)._k);
            }
        }
        if (((KMeansModel.KMeansParameters)this._parms)._estimate_k) {
            if (((KMeansModel.KMeansParameters)this._parms)._user_points != null) {
                this.error("_estimate_k", "Cannot estimate k if user_points are provided.");
            }
            this.info("_seed", "seed is ignored when estimate_k is enabled.");
            this.info("_init", "Initialization scheme is ignored when estimate_k is enabled - algorithm is deterministic.");
            if (expensive) {
                boolean numeric = false;
                for (Vec v : this._train.vecs()) {
                    if (!v.isNumeric()) continue;
                    numeric = true;
                    break;
                }
                if (!numeric) {
                    this.error("_estimate_k", "Cannot estimate k if data has no numeric columns.");
                }
            }
        }
        if (expensive && this.error_count() == 0) {
            this.checkMemoryFootPrint();
        }
    }

    private static double minSqr(double[][] centers, double[] point, String[][] isCats, ClusterDist cd) {
        return KMeans.closest((double[][])centers, (double[])point, (String[][])isCats, (ClusterDist)cd, (int)centers.length)._dist;
    }

    private static double minSqr(double[][] centers, double[] point, String[][] isCats, ClusterDist cd, int count) {
        return KMeans.closest((double[][])centers, (double[])point, (String[][])isCats, (ClusterDist)cd, (int)count)._dist;
    }

    private static ClusterDist closest(double[][] centers, double[] point, String[][] isCats, ClusterDist cd) {
        return KMeans.closest(centers, point, isCats, cd, centers.length);
    }

    private static ClusterDist closest(double[][] centers, double[] point, String[][] isCats, ClusterDist cd, int count) {
        int min = -1;
        double minSqr = Double.MAX_VALUE;
        for (int cluster = 0; cluster < count; ++cluster) {
            double sqr = GenModel.KMeans_distance((double[])centers[cluster], (double[])point, (String[][])isCats);
            if (!(sqr < minSqr)) continue;
            min = cluster;
            minSqr = sqr;
        }
        cd._cluster = min;
        cd._dist = minSqr;
        return cd;
    }

    private static double[][] recluster(double[][] points, Random rand, int N, Initialization init, String[][] isCats) {
        double[][] res = new double[N][];
        res[0] = points[0];
        int count = 1;
        ClusterDist cd = new ClusterDist();
        switch (init) {
            case Random: {
                break;
            }
            case PlusPlus: {
                block5: while (count < res.length) {
                    double sum = 0.0;
                    for (double[] point1 : points) {
                        sum += KMeans.minSqr(res, point1, isCats, cd, count);
                    }
                    for (double[] point : points) {
                        if (!(KMeans.minSqr(res, point, isCats, cd, count) >= rand.nextDouble() * sum)) continue;
                        res[count++] = point;
                        continue block5;
                    }
                }
                break;
            }
            case Furthest: {
                while (count < res.length) {
                    double max = 0.0;
                    int index = 0;
                    for (int i = 0; i < points.length; ++i) {
                        double sqr = KMeans.minSqr(res, points[i], isCats, cd, count);
                        if (!(sqr > max)) continue;
                        max = sqr;
                        index = i;
                    }
                    res[count++] = points[index];
                }
                break;
            }
            default: {
                throw H2O.fail();
            }
        }
        return res;
    }

    private void randomRow(Vec[] vecs, Random rand, double[] center, double[] means, double[] mults, int[] modes) {
        long row = Math.max(0L, (long)(rand.nextDouble() * (double)vecs[0].length()) - 1L);
        KMeans.data(center, vecs, row, means, mults, modes);
    }

    private static double[][] max_cats(double[][] centers, long[][][] cats, String[][] isCats) {
        for (int clu = 0; clu < centers.length; ++clu) {
            for (int col = 0; col < centers[0].length; ++col) {
                if (isCats[col] == null) continue;
                centers[clu][col] = ArrayUtils.maxIndex((long[])cats[clu][col]);
            }
        }
        return centers;
    }

    private static double[][] destandardize(double[][] centers, String[][] isCats, double[] means, double[] mults) {
        int K = centers.length;
        int N = centers[0].length;
        double[][] value = new double[K][N];
        for (int clu = 0; clu < K; ++clu) {
            System.arraycopy(centers[clu], 0, value[clu], 0, N);
            if (mults == null) continue;
            for (int col = 0; col < N; ++col) {
                if (isCats[col] != null) continue;
                value[clu][col] = value[clu][col] / mults[col] + means[col];
            }
        }
        return value;
    }

    private static void data(double[] values, Vec[] vecs, long row, double[] means, double[] mults, int[] modes) {
        for (int i = 0; i < values.length; ++i) {
            values[i] = GenModel.Kmeans_preprocessData((double)vecs[i].at(row), (int)i, (double[])means, (double[])mults, (int[])modes);
        }
    }

    private static void data(double[] values, Chunk[] chks, int row, double[] means, double[] mults, int[] modes) {
        for (int i = 0; i < values.length; ++i) {
            values[i] = GenModel.Kmeans_preprocessData((double)chks[i].atd(row), (int)i, (double[])means, (double[])mults, (int[])modes);
        }
    }

    private ModelMetricsClustering makeTrainingMetrics(KMeansModel model) {
        ModelMetricsClustering mm = new ModelMetricsClustering((Model)model, this.train());
        mm._size = ((KMeansModel.KMeansOutput)model._output)._size;
        mm._withinss = ((KMeansModel.KMeansOutput)model._output)._withinss;
        mm._betweenss = ((KMeansModel.KMeansOutput)model._output)._betweenss;
        mm._totss = ((KMeansModel.KMeansOutput)model._output)._totss;
        mm._tot_withinss = ((KMeansModel.KMeansOutput)model._output)._tot_withinss;
        model.addMetrics((ModelMetrics)mm);
        return mm;
    }

    private static class SplitTask
    extends MRTask<SplitTask> {
        double[][] _centers;
        double[] _means;
        double[] _mults;
        int[] _modes;
        final int _k;
        final String[][] _isCats;
        final boolean _hasWeight;
        final int _clusterToSplit;
        final int _dimToSplit;
        final double _splitPoint;
        double[][] _cMeans;
        long[] _size;

        SplitTask(double[][] centers, double[] means, double[] mults, int[] modes, String[][] isCats, int k, boolean hasWeight, int clusterToSplit, int dimToSplit, double splitPoint) {
            this._centers = centers;
            this._means = means;
            this._mults = mults;
            this._modes = modes;
            this._isCats = isCats;
            this._k = k;
            this._hasWeight = hasWeight;
            this._clusterToSplit = clusterToSplit;
            this._dimToSplit = dimToSplit;
            this._splitPoint = splitPoint;
        }

        public void map(Chunk[] cs) {
            int N = cs.length - (this._hasWeight ? 1 : 0) - 1;
            assert (this._centers[0].length == N);
            this._cMeans = new double[this._k][N];
            this._size = new long[this._k];
            Chunk assignment = cs[cs.length - 1];
            double[] values = new double[N];
            ClusterDist cd = new ClusterDist();
            for (int row = 0; row < cs[0]._len; ++row) {
                double weight;
                if (assignment.at8(row) != (long)this._clusterToSplit) continue;
                double d = weight = this._hasWeight ? cs[N].atd(row) : 1.0;
                if (weight == 0.0) continue;
                assert (weight == 1.0);
                KMeans.data(values, cs, row, this._means, this._mults, this._modes);
                assert (this._isCats[this._dimToSplit] == null);
                if (values[this._dimToSplit] > this._centers[this._clusterToSplit][this._dimToSplit]) {
                    cd._cluster = this._centers.length - 1;
                    assignment.set(row, (long)cd._cluster);
                } else {
                    cd._cluster = this._clusterToSplit;
                }
                int clu = cd._cluster;
                assert (clu != -1);
                for (int col = 0; col < N; ++col) {
                    double[] dArray = this._cMeans[clu];
                    int n = col;
                    dArray[n] = dArray[n] + values[col];
                }
                int n = clu;
                this._size[n] = this._size[n] + 1L;
            }
            for (int clu = 0; clu < this._k; ++clu) {
                if (this._size[clu] == 0L) continue;
                ArrayUtils.div((double[])this._cMeans[clu], (double)this._size[clu]);
            }
            this._centers = null;
            this._mults = null;
            this._means = null;
            this._modes = null;
        }

        public void reduce(SplitTask mr) {
            for (int clu = 0; clu < this._k; ++clu) {
                long ra = this._size[clu];
                long rb = mr._size[clu];
                double[] ma = this._cMeans[clu];
                double[] mb = mr._cMeans[clu];
                for (int c = 0; c < ma.length; ++c) {
                    if (ra + rb <= 0L) continue;
                    ma[c] = (ma[c] * (double)ra + mb[c] * (double)rb) / (double)(ra + rb);
                }
            }
            ArrayUtils.add((long[])this._size, (long[])mr._size);
        }
    }

    private static final class ClusterDist {
        int _cluster;
        double _dist;

        private ClusterDist() {
        }
    }

    private static class LloydsIterationTask
    extends MRTask<LloydsIterationTask> {
        double[][] _centers;
        double[] _means;
        double[] _mults;
        int[] _modes;
        final int _k;
        final String[][] _isCats;
        boolean _hasWeight;
        double[][] _lo;
        double[][] _hi;
        double _reassigned_count;
        double[][] _cMeans;
        long[][][] _cats;
        double[] _cSqr;
        long[] _size;
        long _worst_row;
        double _worst_err;

        LloydsIterationTask(double[][] centers, double[] means, double[] mults, int[] modes, String[][] isCats, int k, boolean hasWeight) {
            this._centers = centers;
            this._means = means;
            this._mults = mults;
            this._modes = modes;
            this._isCats = isCats;
            this._k = k;
            this._hasWeight = hasWeight;
        }

        public void map(Chunk[] cs) {
            int clu;
            int N = cs.length - (this._hasWeight ? 1 : 0) - 1;
            assert (this._centers[0].length == N);
            this._lo = new double[this._k][N];
            for (clu = 0; clu < this._k; ++clu) {
                Arrays.fill(this._lo[clu], Double.MAX_VALUE);
            }
            this._hi = new double[this._k][N];
            for (clu = 0; clu < this._k; ++clu) {
                Arrays.fill(this._hi[clu], -1.7976931348623157E308);
            }
            this._cMeans = new double[this._k][N];
            this._cSqr = new double[this._k];
            this._size = new long[this._k];
            this._cats = new long[this._k][N][];
            for (clu = 0; clu < this._k; ++clu) {
                for (int col = 0; col < N; ++col) {
                    this._cats[clu][col] = this._isCats[col] == null ? null : new long[cs[col].vec().cardinality()];
                }
            }
            this._worst_err = 0.0;
            Chunk assignment = cs[cs.length - 1];
            double[] values = new double[N];
            ClusterDist cd = new ClusterDist();
            for (int row = 0; row < cs[0]._len; ++row) {
                int col;
                int clu2;
                double weight;
                double d = weight = this._hasWeight ? cs[N].atd(row) : 1.0;
                if (weight == 0.0) continue;
                assert (weight == 1.0);
                KMeans.data(values, cs, row, this._means, this._mults, this._modes);
                KMeans.closest(this._centers, values, this._isCats, cd);
                if ((long)cd._cluster != assignment.at8(row)) {
                    this._reassigned_count += weight;
                    assignment.set(row, (long)cd._cluster);
                }
                for (clu2 = 0; clu2 < this._k; ++clu2) {
                    for (col = 0; col < N; ++col) {
                        if (cd._cluster != clu2) continue;
                        this._lo[clu2][col] = Math.min(values[col], this._lo[clu2][col]);
                        this._hi[clu2][col] = Math.max(values[col], this._hi[clu2][col]);
                    }
                }
                clu2 = cd._cluster;
                assert (clu2 != -1);
                int n = clu2;
                this._cSqr[n] = this._cSqr[n] + cd._dist;
                for (col = 0; col < N; ++col) {
                    if (this._isCats[col] != null) {
                        long[] lArray = this._cats[clu2][col];
                        int n2 = (int)values[col];
                        lArray[n2] = lArray[n2] + 1L;
                        continue;
                    }
                    double[] dArray = this._cMeans[clu2];
                    int n3 = col;
                    dArray[n3] = dArray[n3] + values[col];
                }
                int n4 = clu2;
                this._size[n4] = this._size[n4] + 1L;
                if (!(cd._dist > this._worst_err)) continue;
                this._worst_err = cd._dist;
                this._worst_row = cs[0].start() + (long)row;
            }
            for (int clu3 = 0; clu3 < this._k; ++clu3) {
                if (this._size[clu3] == 0L) continue;
                ArrayUtils.div((double[])this._cMeans[clu3], (double)this._size[clu3]);
            }
            this._centers = null;
            this._mults = null;
            this._means = null;
            this._modes = null;
        }

        public void reduce(LloydsIterationTask mr) {
            int clu;
            this._reassigned_count += mr._reassigned_count;
            for (clu = 0; clu < this._k; ++clu) {
                long ra = this._size[clu];
                long rb = mr._size[clu];
                double[] ma = this._cMeans[clu];
                double[] mb = mr._cMeans[clu];
                for (int c = 0; c < ma.length; ++c) {
                    if (ra + rb <= 0L) continue;
                    ma[c] = (ma[c] * (double)ra + mb[c] * (double)rb) / (double)(ra + rb);
                }
            }
            ArrayUtils.add((long[][][])this._cats, (long[][][])mr._cats);
            ArrayUtils.add((double[])this._cSqr, (double[])mr._cSqr);
            ArrayUtils.add((long[])this._size, (long[])mr._size);
            for (clu = 0; clu < this._k; ++clu) {
                for (int col = 0; col < this._lo[clu].length; ++col) {
                    this._lo[clu][col] = Math.min(mr._lo[clu][col], this._lo[clu][col]);
                    this._hi[clu][col] = Math.max(mr._hi[clu][col], this._hi[clu][col]);
                }
            }
            if (this._worst_err < mr._worst_err) {
                this._worst_err = mr._worst_err;
                this._worst_row = mr._worst_row;
            }
        }
    }

    private static class Sampler
    extends MRTask<Sampler> {
        double[][] _centers;
        double[] _means;
        double[] _mults;
        int[] _modes;
        final String[][] _isCats;
        final double _sqr;
        final double _probability;
        final long _seed;
        boolean _hasWeight;
        double[][] _sampled;

        Sampler(double[][] centers, double[] means, double[] mults, int[] modes, String[][] isCats, double sqr, double prob, long seed, boolean hasWeight) {
            this._centers = centers;
            this._means = means;
            this._mults = mults;
            this._modes = modes;
            this._isCats = isCats;
            this._sqr = sqr;
            this._probability = prob;
            this._seed = seed;
            this._hasWeight = hasWeight;
        }

        public void map(Chunk[] cs) {
            int N = cs.length - (this._hasWeight ? 1 : 0);
            double[] values = new double[N];
            ArrayList<Object> list = new ArrayList<Object>();
            Random rand = RandomUtils.getRNG((long[])new long[]{0L});
            ClusterDist cd = new ClusterDist();
            for (int row = 0; row < cs[0]._len; ++row) {
                rand.setSeed(this._seed + cs[0].start() + (long)row);
                KMeans.data(values, cs, row, this._means, this._mults, this._modes);
                double sqr = KMeans.minSqr(this._centers, values, this._isCats, cd);
                if (!(this._probability * sqr > rand.nextDouble() * this._sqr)) continue;
                list.add(values.clone());
            }
            this._sampled = new double[list.size()][];
            list.toArray((T[])this._sampled);
            this._centers = null;
            this._mults = null;
            this._means = null;
            this._modes = null;
        }

        public void reduce(Sampler other) {
            this._sampled = ArrayUtils.append((double[][])this._sampled, (double[][])other._sampled);
        }
    }

    private static class SumSqr
    extends MRTask<SumSqr> {
        double[][] _centers;
        double[] _means;
        double[] _mults;
        int[] _modes;
        final String[][] _isCats;
        double _sqr;

        SumSqr(double[][] centers, double[] means, double[] mults, int[] modes, String[][] isCats) {
            this._centers = centers;
            this._means = means;
            this._mults = mults;
            this._modes = modes;
            this._isCats = isCats;
        }

        public void map(Chunk[] cs) {
            double[] values = new double[cs.length];
            ClusterDist cd = new ClusterDist();
            for (int row = 0; row < cs[0]._len; ++row) {
                KMeans.data(values, cs, row, this._means, this._mults, this._modes);
                this._sqr += KMeans.minSqr(this._centers, values, this._isCats, cd);
            }
            this._mults = null;
            this._means = null;
            this._modes = null;
            this._centers = null;
        }

        public void reduce(SumSqr other) {
            this._sqr += other._sqr;
        }
    }

    private static class TotSS
    extends MRTask<TotSS> {
        final double[] _means;
        final double[] _mults;
        final int[] _modes;
        final String[][] _isCats;
        final int[] _card;
        double _tss;
        double[] _gc;

        TotSS(double[] means, double[] mults, int[] modes, String[][] isCats, int[] card) {
            this._means = means;
            this._mults = mults;
            this._modes = modes;
            this._tss = 0.0;
            this._isCats = isCats;
            this._card = card;
            this._gc = mults != null ? new double[means.length] : Arrays.copyOf(means, means.length);
            for (int i = 0; i < means.length; ++i) {
                if (isCats[i] == null) continue;
                this._gc[i] = this._modes[i];
            }
        }

        public void map(Chunk[] cs) {
            for (int row = 0; row < cs[0]._len; ++row) {
                double[] values = new double[cs.length];
                KMeans.data(values, cs, row, this._means, this._mults, this._modes);
                this._tss += GenModel.KMeans_distance((double[])this._gc, (double[])values, (String[][])this._isCats);
            }
        }

        public void reduce(TotSS other) {
            this._tss += other._tss;
        }
    }

    private final class KMeansDriver
    extends ModelBuilder.Driver {
        private String[][] _isCats;
        private transient int _reinit_attempts;

        private KMeansDriver() {
            super((ModelBuilder)KMeans.this);
        }

        double[][] initial_centers(KMeansModel model, Vec[] vecs, double[] means, double[] mults, int[] modes, int k) {
            double[][] centers;
            ((KMeansModel.KMeansOutput)model._output)._categorical_column_count = 0;
            this._isCats = new String[vecs.length][];
            for (int v = 0; v < vecs.length; ++v) {
                String[] stringArray = this._isCats[v] = vecs[v].isCategorical() ? new String[]{} : null;
                if (this._isCats[v] == null) continue;
                ++((KMeansModel.KMeansOutput)model._output)._categorical_column_count;
            }
            Random rand = RandomUtils.getRNG((long[])new long[]{((KMeansModel.KMeansParameters)KMeans.this._parms)._seed - 1L});
            if (null != ((KMeansModel.KMeansParameters)KMeans.this._parms)._user_points) {
                Frame user_points = (Frame)((KMeansModel.KMeansParameters)KMeans.this._parms)._user_points.get();
                int numCenters = (int)user_points.numRows();
                int numCols = ((KMeansModel.KMeansOutput)model._output).nfeatures();
                centers = new double[numCenters][numCols];
                Vec[] centersVecs = user_points.vecs();
                for (int r = 0; r < numCenters; ++r) {
                    for (int c = 0; c < numCols; ++c) {
                        centers[r][c] = centersVecs[c].at((long)r);
                        centers[r][c] = GenModel.Kmeans_preprocessData((double)centers[r][c], (int)c, (double[])means, (double[])mults, (int[])modes);
                    }
                }
            } else if (((KMeansModel.KMeansParameters)KMeans.this._parms)._init == Initialization.Random) {
                for (double[] center : centers = new double[k][((KMeansModel.KMeansOutput)model._output).nfeatures()]) {
                    KMeans.this.randomRow(vecs, rand, center, means, mults, modes);
                }
            } else {
                centers = new double[1][((KMeansModel.KMeansOutput)model._output).nfeatures()];
                KMeans.this.randomRow(vecs, rand, centers[0], means, mults, modes);
                ((KMeansModel.KMeansOutput)model._output)._iterations = 0;
                while (((KMeansModel.KMeansOutput)model._output)._iterations < 5) {
                    SumSqr sqr = (SumSqr)new SumSqr(centers, means, mults, modes, this._isCats).doAll(vecs);
                    Sampler sampler = (Sampler)new Sampler(centers, means, mults, modes, this._isCats, sqr._sqr, k * 3, ((KMeansModel.KMeansParameters)KMeans.this._parms).getOrMakeRealSeed(), KMeans.this.hasWeightCol()).doAll(vecs);
                    centers = ArrayUtils.append((double[][])centers, (double[][])sampler._sampled);
                    if (KMeans.this.stop_requested()) {
                        return null;
                    }
                    ((KMeansModel.KMeansOutput)model._output)._centers_raw = KMeans.destandardize(centers, this._isCats, means, mults);
                    ((KMeansModel.KMeansOutput)model._output)._tot_withinss = sqr._sqr / (double)KMeans.this._train.numRows();
                    ++((KMeansModel.KMeansOutput)model._output)._iterations;
                    model.update(KMeans.this._job);
                }
                centers = KMeans.recluster(centers, rand, k, ((KMeansModel.KMeansParameters)KMeans.this._parms)._init, this._isCats);
                ((KMeansModel.KMeansOutput)model._output)._iterations = 0;
            }
            assert (centers.length == k);
            return centers;
        }

        boolean cleanupBadClusters(LloydsIterationTask task, Vec[] vecs, double[][] centers, double[] means, double[] mults, int[] modes) {
            int clu;
            for (clu = 0; clu < centers.length && task._size[clu] != 0L; ++clu) {
            }
            if (clu == centers.length) {
                return false;
            }
            long row = task._worst_row;
            Log.warn((Object[])new Object[]{"KMeans: Re-initializing cluster " + clu + " to row " + row});
            centers[clu] = task._cMeans[clu];
            KMeans.data(centers[clu], vecs, row, means, mults, modes);
            task._size[clu] = 1L;
            for (clu = 0; clu < centers.length && task._size[clu] != 0L; ++clu) {
            }
            if (clu == centers.length) {
                return false;
            }
            Log.warn((Object[])new Object[]{"KMeans: Re-running Lloyds to re-init another cluster"});
            if (this._reinit_attempts++ < centers.length) {
                return true;
            }
            this._reinit_attempts = 0;
            return false;
        }

        double[][] computeStatsFillModel(LloydsIterationTask task, KMeansModel model, Vec[] vecs, double[] means, double[] mults, int[] modes, int k) {
            if (((KMeansModel.KMeansParameters)model._parms)._standardize) {
                ((KMeansModel.KMeansOutput)model._output)._centers_std_raw = task._cMeans;
            }
            ((KMeansModel.KMeansOutput)model._output)._centers_raw = KMeans.destandardize(task._cMeans, this._isCats, means, mults);
            ((KMeansModel.KMeansOutput)model._output)._size = task._size;
            ((KMeansModel.KMeansOutput)model._output)._withinss = task._cSqr;
            double ssq = 0.0;
            for (int i = 0; i < k; ++i) {
                ssq += ((KMeansModel.KMeansOutput)model._output)._withinss[i];
            }
            ((KMeansModel.KMeansOutput)model._output)._tot_withinss = ssq;
            if (k == 1) {
                ((KMeansModel.KMeansOutput)model._output)._totss = ((KMeansModel.KMeansOutput)model._output)._tot_withinss;
            } else {
                TotSS totss = (TotSS)new TotSS(means, mults, modes, KMeans.this.train().domains(), KMeans.this.train().cardinality()).doAll(vecs);
                ((KMeansModel.KMeansOutput)model._output)._totss = totss._tss;
            }
            ((KMeansModel.KMeansOutput)model._output)._betweenss = ((KMeansModel.KMeansOutput)model._output)._totss - ((KMeansModel.KMeansOutput)model._output)._tot_withinss;
            ++((KMeansModel.KMeansOutput)model._output)._iterations;
            ((KMeansModel.KMeansOutput)model._output)._history_withinss = ArrayUtils.copyAndFillOf((double[])((KMeansModel.KMeansOutput)model._output)._history_withinss, (int)(((KMeansModel.KMeansOutput)model._output)._history_withinss.length + 1), (double)((KMeansModel.KMeansOutput)model._output)._tot_withinss);
            ((KMeansModel.KMeansOutput)model._output)._k = ArrayUtils.copyAndFillOf((int[])((KMeansModel.KMeansOutput)model._output)._k, (int)(((KMeansModel.KMeansOutput)model._output)._k.length + 1), (int)k);
            ((KMeansModel.KMeansOutput)model._output)._training_time_ms = ArrayUtils.copyAndFillOf((long[])((KMeansModel.KMeansOutput)model._output)._training_time_ms, (int)(((KMeansModel.KMeansOutput)model._output)._training_time_ms.length + 1), (long)System.currentTimeMillis());
            ((KMeansModel.KMeansOutput)model._output)._reassigned_count = ArrayUtils.copyAndFillOf((double[])((KMeansModel.KMeansOutput)model._output)._reassigned_count, (int)(((KMeansModel.KMeansOutput)model._output)._reassigned_count.length + 1), (double)task._reassigned_count);
            ((KMeansModel.KMeansOutput)model._output)._model_summary = this.createModelSummaryTable((KMeansModel.KMeansOutput)model._output);
            ((KMeansModel.KMeansOutput)model._output)._scoring_history = this.createScoringHistoryTable((KMeansModel.KMeansOutput)model._output);
            ((KMeansModel.KMeansOutput)model._output)._training_metrics = KMeans.this.makeTrainingMetrics(model);
            return task._cMeans;
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         */
        public void computeImpl() {
            KMeansModel model = null;
            Key bestOutputKey = Key.make();
            try {
                KMeans.this.init(true);
                if (KMeans.this.error_count() > 0) {
                    throw H2OModelBuilderIllegalArgumentException.makeFromBuilder((ModelBuilder)KMeans.this);
                }
                String fold_column = ((KMeansModel.KMeansParameters)KMeans.this._parms)._fold_column;
                ((KMeansModel.KMeansParameters)KMeans.this._parms)._fold_column = null;
                model = new KMeansModel(KMeans.this.dest(), (KMeansModel.KMeansParameters)KMeans.this._parms, new KMeansModel.KMeansOutput(KMeans.this));
                model.delete_and_lock(KMeans.this._job);
                int startK = ((KMeansModel.KMeansParameters)KMeans.this._parms)._estimate_k ? 1 : ((KMeansModel.KMeansParameters)KMeans.this._parms)._k;
                Vec[] vecs = KMeans.this._train.vecs();
                double[] means = KMeans.this._train.means();
                double[] mults = ((KMeansModel.KMeansParameters)KMeans.this._parms)._standardize ? KMeans.this._train.mults() : null;
                int[] impute_cat = new int[vecs.length];
                for (int i = 0; i < vecs.length; ++i) {
                    impute_cat[i] = vecs[i].isNumeric() ? -1 : DataInfo.imputeCat(vecs[i], true);
                }
                ((KMeansModel.KMeansOutput)model._output)._normSub = means;
                ((KMeansModel.KMeansOutput)model._output)._normMul = mults;
                ((KMeansModel.KMeansOutput)model._output)._mode = impute_cat;
                double[][] centers = this.initial_centers(model, vecs, means, mults, impute_cat, startK);
                if (centers == null) {
                    return;
                }
                boolean work_unit_iter = !((KMeansModel.KMeansParameters)KMeans.this._parms)._estimate_k;
                double sum_squares = 0.0;
                double rel_improvement_cutoff = Math.min(0.02 + 10.0 / (double)KMeans.this._train.numRows() + 2.5 / Math.pow(((KMeansModel.KMeansOutput)model._output).nfeatures(), 2.0), 0.8);
                if (((KMeansModel.KMeansParameters)KMeans.this._parms)._estimate_k) {
                    Log.info((Object[])new Object[]{"Cutoff for relative improvement in within_cluster_sum_of_squares: " + rel_improvement_cutoff});
                }
                Vec[] vecs2 = Arrays.copyOf(vecs, vecs.length + 1);
                vecs2[vecs2.length - 1] = vecs2[0].makeCon(-1.0);
                for (int k = startK; k <= ((KMeansModel.KMeansParameters)KMeans.this._parms)._k; ++k) {
                    Log.info((Object[])new Object[]{"Running Lloyds iteration for " + k + " centroids."});
                    ((KMeansModel.KMeansOutput)model._output)._iterations = 0;
                    double[][] lo = null;
                    double[][] hi = null;
                    boolean stop = false;
                    do {
                        assert (centers.length == k);
                        LloydsIterationTask task = (LloydsIterationTask)new LloydsIterationTask(centers, means, mults, impute_cat, this._isCats, k, KMeans.this.hasWeightCol()).doAll(vecs2);
                        KMeans.max_cats(task._cMeans, task._cats, this._isCats);
                        if (!((KMeansModel.KMeansParameters)KMeans.this._parms)._estimate_k && this.cleanupBadClusters(task, vecs, centers, means, mults, impute_cat)) continue;
                        centers = this.computeStatsFillModel(task, model, vecs, means, mults, impute_cat, k);
                        if (((KMeansModel.KMeansParameters)model._parms)._score_each_iteration) {
                            Log.info((Object[])new Object[]{((KMeansModel.KMeansOutput)model._output)._model_summary});
                        }
                        lo = task._lo;
                        hi = task._hi;
                        if (work_unit_iter) {
                            model.update(KMeans.this._job);
                            KMeans.this._job.update(1L);
                        }
                        boolean bl = stop = task._reassigned_count < Math.max(1.0, (double)KMeans.this.train().numRows() * 1.0E-4) || ((KMeansModel.KMeansOutput)model._output)._iterations >= ((KMeansModel.KMeansParameters)KMeans.this._parms)._max_iterations;
                        if (!stop) continue;
                        if (((KMeansModel.KMeansOutput)model._output)._iterations < ((KMeansModel.KMeansParameters)KMeans.this._parms)._max_iterations) {
                            Log.info((Object[])new Object[]{"Lloyds converged after " + ((KMeansModel.KMeansOutput)model._output)._iterations + " iterations."});
                            continue;
                        }
                        Log.info((Object[])new Object[]{"Lloyds stopped after " + ((KMeansModel.KMeansOutput)model._output)._iterations + " iterations."});
                    } while (!stop);
                    double sum_squares_now = ((KMeansModel.KMeansOutput)model._output)._tot_withinss;
                    double rel_improvement = sum_squares == 0.0 ? 1.0 : (sum_squares - sum_squares_now) / sum_squares;
                    Log.info((Object[])new Object[]{"Relative improvement in total withinss: " + rel_improvement});
                    sum_squares = sum_squares_now;
                    if (((KMeansModel.KMeansParameters)KMeans.this._parms)._estimate_k && k > 1) {
                        boolean outerConverged;
                        boolean bl = outerConverged = rel_improvement < rel_improvement_cutoff;
                        if (outerConverged) {
                            KMeansModel.KMeansOutput best = (KMeansModel.KMeansOutput)DKV.getGet((Key)bestOutputKey);
                            model._output = best;
                            Log.info((Object[])new Object[]{"Converged. Retrieving the best model with k=" + ((KMeansModel.KMeansOutput)model._output)._k[((KMeansModel.KMeansOutput)model._output)._k.length - 1]});
                            break;
                        }
                    }
                    if (!work_unit_iter) {
                        DKV.put((Key)bestOutputKey, (Iced)IcedUtils.deepCopy((Iced)model._output));
                        model.update(KMeans.this._job);
                        KMeans.this._job.update(1L);
                    }
                    if (lo == null || hi == null || !((KMeansModel.KMeansParameters)KMeans.this._parms)._estimate_k) continue;
                    centers = this.splitLargestCluster(centers, lo, hi, means, mults, impute_cat, vecs2, k);
                }
                vecs2[vecs2.length - 1].remove();
                model.score(KMeans.this._train).delete();
                ((KMeansModel.KMeansOutput)model._output)._training_metrics = ModelMetrics.getFromDKV((Model)model, (Frame)KMeans.this._train);
                Log.info((Object[])new Object[]{((KMeansModel.KMeansOutput)model._output)._model_summary});
                Log.info((Object[])new Object[]{((KMeansModel.KMeansOutput)model._output)._scoring_history});
                Log.info((Object[])new Object[]{((ModelMetricsClustering)((KMeansModel.KMeansOutput)model._output)._training_metrics).createCentroidStatsTable().toString()});
                if (KMeans.this._valid != null) {
                    model.score(((KMeansModel.KMeansParameters)KMeans.this._parms).valid()).delete();
                    ((KMeansModel.KMeansOutput)model._output)._validation_metrics = ModelMetrics.getFromDKV((Model)model, (Frame)((KMeansModel.KMeansParameters)KMeans.this._parms).valid());
                }
                ((KMeansModel.KMeansParameters)model._parms)._fold_column = fold_column;
                model.update(KMeans.this._job);
            }
            finally {
                if (model != null) {
                    model.unlock(KMeans.this._job);
                }
                DKV.remove((Key)bestOutputKey);
            }
        }

        double[][] splitLargestCluster(double[][] centers, double[][] lo, double[][] hi, double[] means, double[] mults, int[] impute_cat, Vec[] vecs2, int k) {
            double[][] newCenters = (double[][])Arrays.copyOf(centers, centers.length + 1);
            for (int i = 0; i < centers.length; ++i) {
                newCenters[i] = (double[])centers[i].clone();
            }
            double maxRange = 0.0;
            int clusterToSplit = 0;
            int dimToSplit = 0;
            for (int i = 0; i < centers.length; ++i) {
                double[] range = new double[hi[i].length];
                for (int col = 0; col < hi[i].length; ++col) {
                    if (this._isCats[col] != null) continue;
                    range[col] = hi[i][col] - lo[i][col];
                    if (!((float)range[col] > (float)maxRange)) continue;
                    clusterToSplit = i;
                    dimToSplit = col;
                    maxRange = range[col];
                }
            }
            assert (this._isCats[dimToSplit] == null);
            double splitPoint = newCenters[clusterToSplit][dimToSplit];
            SplitTask task = (SplitTask)new SplitTask(newCenters, means, mults, impute_cat, this._isCats, k + 1, KMeans.this.hasWeightCol(), clusterToSplit, dimToSplit, splitPoint).doAll(vecs2);
            newCenters[clusterToSplit] = (double[])task._cMeans[clusterToSplit].clone();
            newCenters[newCenters.length - 1] = (double[])task._cMeans[newCenters.length - 1].clone();
            return newCenters;
        }

        private TwoDimTable createModelSummaryTable(KMeansModel.KMeansOutput output) {
            ArrayList<String> colHeaders = new ArrayList<String>();
            ArrayList<String> colTypes = new ArrayList<String>();
            ArrayList<String> colFormat = new ArrayList<String>();
            colHeaders.add("Number of Rows");
            colTypes.add("long");
            colFormat.add("%d");
            colHeaders.add("Number of Clusters");
            colTypes.add("long");
            colFormat.add("%d");
            colHeaders.add("Number of Categorical Columns");
            colTypes.add("long");
            colFormat.add("%d");
            colHeaders.add("Number of Iterations");
            colTypes.add("long");
            colFormat.add("%d");
            colHeaders.add("Within Cluster Sum of Squares");
            colTypes.add("double");
            colFormat.add("%.5f");
            colHeaders.add("Total Sum of Squares");
            colTypes.add("double");
            colFormat.add("%.5f");
            colHeaders.add("Between Cluster Sum of Squares");
            colTypes.add("double");
            colFormat.add("%.5f");
            boolean rows = true;
            TwoDimTable table = new TwoDimTable("Model Summary", null, new String[1], colHeaders.toArray(new String[0]), colTypes.toArray(new String[0]), colFormat.toArray(new String[0]), "");
            int row = 0;
            int col = 0;
            table.set(row, col++, (Object)Math.round((double)KMeans.this._train.numRows() * (KMeans.this.hasWeightCol() ? KMeans.this._train.lastVec().mean() : 1.0)));
            table.set(row, col++, (Object)output._centers_raw.length);
            table.set(row, col++, (Object)output._categorical_column_count);
            table.set(row, col++, (Object)(output._k.length - 1));
            table.set(row, col++, (Object)output._tot_withinss);
            table.set(row, col++, (Object)output._totss);
            table.set(row, col++, (Object)output._betweenss);
            return table;
        }

        private TwoDimTable createScoringHistoryTable(KMeansModel.KMeansOutput output) {
            ArrayList<String> colHeaders = new ArrayList<String>();
            ArrayList<String> colTypes = new ArrayList<String>();
            ArrayList<String> colFormat = new ArrayList<String>();
            colHeaders.add("Timestamp");
            colTypes.add("string");
            colFormat.add("%s");
            colHeaders.add("Duration");
            colTypes.add("string");
            colFormat.add("%s");
            colHeaders.add("Iteration");
            colTypes.add("long");
            colFormat.add("%d");
            if (((KMeansModel.KMeansParameters)KMeans.this._parms)._estimate_k) {
                colHeaders.add("Number of Clusters");
                colTypes.add("long");
                colFormat.add("%d");
            }
            colHeaders.add("Number of Reassigned Observations");
            colTypes.add("long");
            colFormat.add("%d");
            colHeaders.add("Within Cluster Sum Of Squares");
            colTypes.add("double");
            colFormat.add("%.5f");
            int rows = output._history_withinss.length;
            TwoDimTable table = new TwoDimTable("Scoring History", null, new String[rows], colHeaders.toArray(new String[0]), colTypes.toArray(new String[0]), colFormat.toArray(new String[0]), "");
            int row = 0;
            for (int i = 0; i < rows; ++i) {
                int col = 0;
                assert (row < table.getRowDim());
                assert (col < table.getColDim());
                DateTimeFormatter fmt = DateTimeFormat.forPattern((String)"yyyy-MM-dd HH:mm:ss");
                table.set(row, col++, (Object)fmt.print(output._training_time_ms[i]));
                table.set(row, col++, (Object)PrettyPrint.msecs((long)(output._training_time_ms[i] - KMeans.this._job.start_time()), (boolean)true));
                table.set(row, col++, (Object)i);
                if (((KMeansModel.KMeansParameters)KMeans.this._parms)._estimate_k) {
                    table.set(row, col++, (Object)output._k[i]);
                }
                table.set(row, col++, (Object)output._reassigned_count[i]);
                table.set(row, col++, (Object)output._history_withinss[i]);
                ++row;
            }
            return table;
        }
    }

    public static enum Initialization {
        Random,
        PlusPlus,
        Furthest,
        User;

    }
}

