/*
 * Decompiled with CFR 0.152.
 */
package elki.clustering.correlation;

import elki.clustering.ClusteringAlgorithm;
import elki.data.Cluster;
import elki.data.Clustering;
import elki.data.NumberVector;
import elki.data.model.Model;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.ids.ArrayModifiableDBIDs;
import elki.database.ids.DBIDIter;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDUtil;
import elki.database.ids.DBIDs;
import elki.database.ids.HashSetModifiableDBIDs;
import elki.database.ids.ModifiableDBIDs;
import elki.database.relation.Relation;
import elki.database.relation.RelationUtil;
import elki.logging.Logging;
import elki.logging.progress.FiniteProgress;
import elki.logging.progress.IndefiniteProgress;
import elki.math.MeanVariance;
import elki.math.linearalgebra.VMath;
import elki.result.Metadata;
import elki.utilities.datastructures.histogram.DoubleDynamicHistogram;
import elki.utilities.datastructures.histogram.DoubleHistogram;
import elki.utilities.documentation.Reference;
import elki.utilities.exceptions.TooManyRetriesException;
import elki.utilities.io.FormatUtil;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.constraints.ParameterConstraint;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.DoubleParameter;
import elki.utilities.optionhandling.parameters.IntParameter;
import elki.utilities.optionhandling.parameters.RandomParameter;
import elki.utilities.random.RandomFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import net.jafama.FastMath;

@Reference(authors="R. Haralick, R. Harpaz", title="Linear manifold clustering in high dimensional spaces by stochastic search", booktitle="Pattern Recognition volume 40, Issue 10", url="https://doi.org/10.1016/j.patcog.2007.01.020", bibkey="DBLP:journals/pr/HaralickH07")
public class LMCLUS
implements ClusteringAlgorithm<Clustering<Model>> {
    private static final Logging LOG = Logging.getLogger(LMCLUS.class);
    private static final double LOG_NOT_FROM_ONE_CLUSTER_PROBABILITY = Math.log(0.2);
    private static final int BINS = 50;
    private final double sensitivityThreshold;
    private final int maxLMDim;
    private final int minsize;
    private final int samplingLevel;
    private final RandomFactory rnd;

    public LMCLUS(int maxdim, int minsize, int samplingLevel, double sensitivityThreshold, RandomFactory rnd) {
        this.maxLMDim = maxdim;
        this.minsize = minsize;
        this.samplingLevel = samplingLevel;
        this.sensitivityThreshold = sensitivityThreshold;
        this.rnd = rnd;
    }

    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array((TypeInformation[])new TypeInformation[]{TypeUtil.NUMBER_VECTOR_FIELD});
    }

    public Clustering<Model> run(Relation<? extends NumberVector> relation) {
        Clustering<Model> ret = new Clustering<Model>();
        Metadata.of(ret).setLongName("LMCLUS Clustering");
        FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Clustered objects", relation.size(), LOG) : null;
        IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Clusters found", LOG) : null;
        HashSetModifiableDBIDs unclustered = DBIDUtil.newHashSet((DBIDs)relation.getDBIDs());
        Random r = this.rnd.getSingleThreadedRandom();
        int maxdim = Math.min(this.maxLMDim, RelationUtil.dimensionality(relation));
        int cnum = 0;
        while (unclustered.size() > this.minsize) {
            HashSetModifiableDBIDs current = unclustered;
            int lmDim = 1;
            block1: for (int k = 1; k <= maxdim; ++k) {
                while (true) {
                    Separation separation = this.findSeparation(relation, (DBIDs)current, k, r);
                    if (separation.goodness <= this.sensitivityThreshold) continue block1;
                    ArrayModifiableDBIDs subset = DBIDUtil.newArray((int)current.size());
                    DBIDIter iter = current.iter();
                    while (iter.valid()) {
                        if (this.deviation(VMath.minusEquals((double[])((NumberVector)relation.get((DBIDRef)iter)).toArray(), (double[])separation.originV), separation.basis) < separation.threshold) {
                            subset.add((DBIDRef)iter);
                        }
                        iter.advance();
                    }
                    if (subset.size() < this.minsize) continue block1;
                    current = subset;
                    lmDim = k;
                }
            }
            if (current.size() < this.minsize || current == unclustered) break;
            Cluster cluster = new Cluster((DBIDs)current);
            cluster.setName("Cluster_" + lmDim + "d_" + cnum);
            ++cnum;
            ret.addToplevelCluster(cluster);
            unclustered.removeDBIDs((DBIDs)current);
            if (progress != null) {
                progress.setProcessed(relation.size() - unclustered.size(), LOG);
            }
            if (cprogress == null) continue;
            cprogress.setProcessed(cnum, LOG);
        }
        if (unclustered.size() > 0) {
            ret.addToplevelCluster(new Cluster((DBIDs)unclustered, true));
        }
        if (progress != null) {
            progress.setProcessed(relation.size(), LOG);
            progress.ensureCompleted(LOG);
        }
        LOG.setCompleted(cprogress);
        return ret;
    }

    private double deviation(double[] delta, double[][] beta) {
        double b;
        double a = VMath.squareSum((double[])delta);
        return a > (b = VMath.squareSum((double[])VMath.transposeTimes((double[][])beta, (double[])delta))) ? Math.sqrt(a - b) : 0.0;
    }

    private Separation findSeparation(Relation<? extends NumberVector> relation, DBIDs currentids, int dimension, Random r) {
        Separation separation = new Separation();
        int samples = (int)Math.min(LOG_NOT_FROM_ONE_CLUSTER_PROBABILITY / FastMath.log1p((double)(-FastMath.powFast((double)this.samplingLevel, (int)(-dimension)))), (double)currentids.size());
        int remaining_retries = 100;
        for (int i = 1; i <= samples; ++i) {
            ModifiableDBIDs sample = DBIDUtil.randomSample((DBIDs)currentids, (int)(dimension + 1), (Random)r);
            DBIDIter iter = sample.iter();
            double[] originV = ((NumberVector)relation.get((DBIDRef)iter)).toArray();
            iter.advance();
            ArrayList<double[]> vectors = new ArrayList<double[]>(sample.size() - 1);
            while (iter.valid()) {
                double[] vec = ((NumberVector)relation.get((DBIDRef)iter)).toArray();
                vectors.add(VMath.minusEquals((double[])vec, (double[])originV));
                iter.advance();
            }
            double[][] basis = this.generateOrthonormalBasis(vectors);
            if (basis == null) {
                --i;
                if (--remaining_retries >= 0) continue;
                throw new TooManyRetriesException("Too many retries in sampling, and always a linear dependant data set.");
            }
            DoubleDynamicHistogram histogram = new DoubleDynamicHistogram(50);
            double w = 1.0 / (double)currentids.size();
            DBIDIter iter2 = currentids.iter();
            while (iter2.valid()) {
                if (!sample.contains((DBIDRef)iter2)) {
                    double[] vec = VMath.minusEquals((double[])((NumberVector)relation.get((DBIDRef)iter2)).toArray(), (double[])originV);
                    double distance = this.deviation(vec, basis);
                    histogram.increment(distance, w);
                }
                iter2.advance();
            }
            double[] th = this.findAndEvaluateThreshold(histogram);
            if (!(th[1] > separation.goodness)) continue;
            separation.goodness = th[1];
            separation.threshold = th[0];
            separation.originV = originV;
            separation.basis = basis;
        }
        return separation;
    }

    private double[][] generateOrthonormalBasis(List<double[]> vectors) {
        double[] first = VMath.normalizeEquals((double[])vectors.get(0));
        double[][] ret = new double[first.length][vectors.size()];
        VMath.setCol((double[][])ret, (int)0, (double[])first);
        for (int i = 1; i < vectors.size(); ++i) {
            double[] v_i = vectors.get(i);
            double[] u_i = (double[])v_i.clone();
            for (int j = 0; j < i; ++j) {
                double[] v_j = VMath.getCol((double[][])ret, (int)j);
                double f = VMath.transposeTimes((double[])v_i, (double[])v_j);
                if (Double.isNaN(f)) {
                    if (LOG.isDebuggingFine()) {
                        LOG.debugFine((CharSequence)("Zero vector encountered? " + FormatUtil.format((double[])v_j)));
                    }
                    return null;
                }
                VMath.minusTimesEquals((double[])u_i, (double[])v_j, (double)f);
            }
            double len_u_i = VMath.euclideanLength((double[])u_i);
            if (len_u_i < Double.MIN_NORMAL) {
                if (LOG.isDebuggingFine()) {
                    LOG.debugFine((CharSequence)"Points not independent - no orthonormalization.");
                }
                return null;
            }
            VMath.timesEquals((double[])u_i, (double)(1.0 / len_u_i));
            VMath.setCol((double[][])ret, (int)i, (double[])u_i);
        }
        return ret;
    }

    private double[] findAndEvaluateThreshold(DoubleDynamicHistogram histogram) {
        int n = histogram.getNumBins();
        double[] p1 = new double[n];
        double[] p2 = new double[n];
        double[] mu1 = new double[n];
        double[] mu2 = new double[n];
        double[] sigma1 = new double[n];
        double[] sigma2 = new double[n];
        double[] jt = new double[n];
        MeanVariance mv = new MeanVariance();
        DoubleHistogram.Iter forward = histogram.iter();
        int i = 0;
        while (forward.valid()) {
            p1[i] = forward.getValue() + (i > 0 ? p1[i - 1] : 0.0);
            mv.put((double)i, forward.getValue());
            mu1[i] = mv.getMean();
            sigma1[i] = mv.getPopulationStddev();
            ++i;
            forward.advance();
        }
        mv = new MeanVariance();
        DoubleHistogram.Iter backwards = histogram.iter().seek(histogram.getNumBins() - 1);
        int j = n - 1;
        while (backwards.valid()) {
            p2[j] = backwards.getValue() + (j + 1 < n ? p2[j + 1] : 0.0);
            mv.put((double)j, backwards.getValue());
            mu2[j] = mv.getMean();
            sigma2[j] = mv.getPopulationStddev();
            --j;
            backwards.retract();
        }
        for (int i2 = 0; i2 < n; ++i2) {
            jt[i2] = 1.0 + 2.0 * (p1[i2] * (FastMath.log((double)sigma1[i2]) - FastMath.log((double)p1[i2])) + p2[i2] * (FastMath.log((double)sigma2[i2]) - FastMath.log((double)p2[i2])));
        }
        int bestpos = -1;
        double bestgoodness = Double.NEGATIVE_INFINITY;
        double devPrev = jt[1] - jt[0];
        for (int i3 = 1; i3 < jt.length - 1; ++i3) {
            double devCur = jt[i3 + 1] - jt[i3];
            if (devCur >= 0.0 && devPrev <= 0.0) {
                double goodness;
                int j2;
                double lowestMaxima = Double.POSITIVE_INFINITY;
                for (j2 = i3 - 1; j2 > 0; --j2) {
                    if (!(jt[j2 - 1] < jt[j2])) continue;
                    lowestMaxima = Math.min(lowestMaxima, jt[j2]);
                    break;
                }
                for (j2 = i3 + 1; j2 < n - 2; ++j2) {
                    if (!(jt[j2 + 1] < jt[j2])) continue;
                    lowestMaxima = Math.min(lowestMaxima, jt[j2]);
                    break;
                }
                double localDepth = lowestMaxima - jt[i3];
                double mud = mu1[i3] - mu2[i3];
                double discriminability = mud * mud / (sigma1[i3] * sigma1[i3] + sigma2[i3] * sigma2[i3]);
                if (Double.isNaN(discriminability)) {
                    discriminability = -1.0;
                }
                if ((goodness = localDepth * discriminability) > bestgoodness) {
                    bestgoodness = goodness;
                    bestpos = i3;
                }
            }
            devPrev = devCur;
        }
        return new double[]{histogram.iter().seek(bestpos).getRight(), bestgoodness};
    }

    public static class Par
    implements Parameterizer {
        public static final OptionID MAXDIM_ID = new OptionID("lmclus.maxdim", "Maximum linear manifold dimension to search.");
        public static final OptionID MINSIZE_ID = new OptionID("lmclus.minsize", "Minimum cluster size to allow.");
        public static final OptionID SAMPLINGL_ID = new OptionID("lmclus.sampling-level", "A number used to determine how many samples are taken in each search.");
        public static final OptionID THRESHOLD_ID = new OptionID("lmclus.threshold", "Threshold to determine if a cluster was found.");
        public static final OptionID RANDOM_ID = new OptionID("lmclus.seed", "Random generator seed.");
        private int maxdim = Integer.MAX_VALUE;
        private int minsize;
        private int samplingLevel;
        private double threshold;
        private RandomFactory rnd;

        public void configure(Parameterization config) {
            ((IntParameter)((IntParameter)new IntParameter(MAXDIM_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).setOptional(true)).grab(config, x -> {
                this.maxdim = x;
            });
            ((IntParameter)new IntParameter(MINSIZE_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).grab(config, x -> {
                this.minsize = x;
            });
            new IntParameter(SAMPLINGL_ID, 100).grab(config, x -> {
                this.samplingLevel = x;
            });
            new DoubleParameter(THRESHOLD_ID).grab(config, x -> {
                this.threshold = x;
            });
            new RandomParameter(RANDOM_ID).grab(config, x -> {
                this.rnd = x;
            });
        }

        public LMCLUS make() {
            return new LMCLUS(this.maxdim, this.minsize, this.samplingLevel, this.threshold, this.rnd);
        }
    }

    private static class Separation {
        double goodness = Double.NEGATIVE_INFINITY;
        double threshold = Double.NEGATIVE_INFINITY;
        double[][] basis = null;
        double[] originV = null;

        private Separation() {
        }
    }
}

