/*
 * Decompiled with CFR 0.152.
 */
package elki.clustering.correlation;

import elki.algorithm.DependencyDerivator;
import elki.clustering.ClusteringAlgorithm;
import elki.clustering.correlation.cash.CASHInterval;
import elki.clustering.correlation.cash.CASHIntervalSplit;
import elki.clustering.correlation.cash.ParameterizationFunction;
import elki.data.Cluster;
import elki.data.Clustering;
import elki.data.DoubleVector;
import elki.data.FeatureVector;
import elki.data.HyperBoundingBox;
import elki.data.NumberVector;
import elki.data.model.ClusterModel;
import elki.data.model.CorrelationAnalysisSolution;
import elki.data.model.LinearEquationModel;
import elki.data.model.Model;
import elki.data.spatial.SpatialComparable;
import elki.data.spatial.SpatialUtil;
import elki.data.type.SimpleTypeInformation;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.data.type.VectorFieldTypeInformation;
import elki.database.ProxyDatabase;
import elki.database.datastore.DataStore;
import elki.database.datastore.DataStoreUtil;
import elki.database.datastore.WritableDataStore;
import elki.database.ids.DBIDIter;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDUtil;
import elki.database.ids.DBIDs;
import elki.database.ids.HashSetModifiableDBIDs;
import elki.database.ids.ModifiableDBIDs;
import elki.database.relation.MaterializedRelation;
import elki.database.relation.Relation;
import elki.datasource.filter.normalization.NonNumericFeaturesException;
import elki.logging.Logging;
import elki.logging.progress.FiniteProgress;
import elki.math.linearalgebra.LinearEquationSystem;
import elki.math.linearalgebra.VMath;
import elki.math.linearalgebra.pca.CovarianceMatrixBuilder;
import elki.math.linearalgebra.pca.PCARunner;
import elki.math.linearalgebra.pca.StandardCovarianceMatrixBuilder;
import elki.math.linearalgebra.pca.filter.EigenPairFilter;
import elki.math.linearalgebra.pca.filter.FirstNEigenPairFilter;
import elki.result.Metadata;
import elki.utilities.datastructures.heap.ComparatorMaxHeap;
import elki.utilities.datastructures.heap.ObjectHeap;
import elki.utilities.documentation.Description;
import elki.utilities.documentation.Reference;
import elki.utilities.documentation.Title;
import elki.utilities.io.FormatUtil;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.constraints.ParameterConstraint;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.DoubleParameter;
import elki.utilities.optionhandling.parameters.Flag;
import elki.utilities.optionhandling.parameters.IntParameter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import net.jafama.FastMath;

@Title(value="CASH: Robust clustering in arbitrarily oriented subspaces")
@Description(value="Subspace clustering algorithm based on the Hough transform.")
@Reference(authors="Elke Achtert, Christian B\u00f6hm, J\u00f6rn David, Peer Kr\u00f6ger, Arthur Zimek", title="Robust clustering in arbitrarily oriented subspaces", booktitle="Proc. 8th SIAM Int. Conf. on Data Mining (SDM'08)", url="https://doi.org/10.1137/1.9781611972788.69", bibkey="DBLP:conf/sdm/AchtertBDKZ08")
public class CASH
implements ClusteringAlgorithm<Clustering<Model>> {
    private static final Logging LOG = Logging.getLogger(CASH.class);
    protected int minPts;
    protected int maxLevel;
    protected int minDim;
    protected double jitter;
    protected boolean adjust;
    private int noiseDim;
    private ModifiableDBIDs processedIDs;
    private Relation<ParameterizationFunction> fulldatabase;

    public CASH(int minPts, int maxLevel, int minDim, double jitter, boolean adjust) {
        this.minPts = minPts;
        this.maxLevel = maxLevel;
        this.minDim = minDim;
        this.jitter = jitter;
        this.adjust = adjust;
    }

    public Clustering<Model> run(Relation<? extends NumberVector> rel) {
        this.fulldatabase = this.preprocess(rel);
        this.processedIDs = DBIDUtil.newHashSet((int)this.fulldatabase.size());
        this.noiseDim = CASH.dimensionality(this.fulldatabase);
        FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("CASH Clustering", this.fulldatabase.size(), LOG) : null;
        Clustering<Model> result = this.doRun(this.fulldatabase, progress);
        LOG.ensureCompleted(progress);
        if (LOG.isVerbose()) {
            StringBuilder msg = new StringBuilder(1000);
            for (Cluster c : result.getAllClusters()) {
                if (c.getModel() instanceof LinearEquationModel) {
                    LinearEquationModel s = (LinearEquationModel)c.getModel();
                    msg.append("\n Cluster: Dim: " + s.getLes().subspacedim() + " size: " + c.size());
                    continue;
                }
                msg.append("\n Cluster: " + c.getModel().getClass().getName() + " size: " + c.size());
            }
            LOG.verbose((CharSequence)msg.toString());
        }
        return result;
    }

    private Relation<ParameterizationFunction> preprocess(Relation<? extends NumberVector> vrel) {
        DBIDs ids = vrel.getDBIDs();
        SimpleTypeInformation type = new SimpleTypeInformation(ParameterizationFunction.class);
        WritableDataStore prep = DataStoreUtil.makeStorage((DBIDs)ids, (int)2, ParameterizationFunction.class);
        DBIDIter iter = ids.iter();
        while (iter.valid()) {
            prep.put((DBIDRef)iter, (Object)new ParameterizationFunction((NumberVector)vrel.get((DBIDRef)iter)));
            iter.advance();
        }
        return new MaterializedRelation(null, type, ids, (DataStore)prep);
    }

    private Clustering<Model> doRun(Relation<ParameterizationFunction> relation, FiniteProgress progress) {
        Clustering res = new Clustering();
        Metadata.of((Object)res).setLongName("CASH Clustering");
        int dim = CASH.dimensionality(relation);
        ComparatorMaxHeap heap = new ComparatorMaxHeap((Comparator)new Comparator<CASHInterval>(){

            @Override
            public int compare(CASHInterval o1, CASHInterval o2) {
                return Integer.compare(o1.priority(), o2.priority());
            }
        });
        HashSetModifiableDBIDs noiseIDs = DBIDUtil.newHashSet((DBIDs)relation.getDBIDs());
        this.initHeap((ObjectHeap<CASHInterval>)heap, relation, dim, (DBIDs)noiseIDs);
        if (LOG.isVerbose()) {
            LOG.verbose((CharSequence)("dim " + dim + " database.size " + relation.size()));
        }
        while (!heap.isEmpty()) {
            CASHInterval interval = this.determineNextIntervalAtMaxLevel((ObjectHeap<CASHInterval>)heap);
            if (LOG.isVerbose()) {
                LOG.verbose((CharSequence)("next interval in dim " + dim + ": " + interval));
            }
            if (interval == null) break;
            HashSetModifiableDBIDs clusterIDs = DBIDUtil.newHashSet();
            if (dim > this.minDim + 1) {
                double[][] basis_dim_minus_1;
                HashSetModifiableDBIDs ids;
                if (this.adjust) {
                    ids = DBIDUtil.newHashSet();
                    basis_dim_minus_1 = this.runDerivator(relation, dim, interval, (ModifiableDBIDs)ids);
                } else {
                    ids = interval.getIDs();
                    basis_dim_minus_1 = this.determineBasis(SpatialUtil.centroid((SpatialComparable)interval));
                }
                if (ids.size() != 0) {
                    MaterializedRelation<ParameterizationFunction> db = this.buildDB(dim, basis_dim_minus_1, (DBIDs)ids, relation);
                    Clustering<Model> res_dim_minus_1 = this.doRun((Relation<ParameterizationFunction>)db, progress);
                    for (Cluster cluster : res_dim_minus_1.getAllClusters()) {
                        res.addToplevelCluster(cluster);
                        noiseIDs.removeDBIDs(cluster.getIDs());
                        clusterIDs.addDBIDs(cluster.getIDs());
                        this.processedIDs.addDBIDs(cluster.getIDs());
                    }
                }
            } else {
                LinearEquationSystem les = this.runDerivator(relation, dim - 1, (DBIDs)interval.getIDs());
                Cluster c = new Cluster((DBIDs)interval.getIDs(), (Model)new LinearEquationModel(les));
                res.addToplevelCluster(c);
                noiseIDs.removeDBIDs((DBIDs)interval.getIDs());
                clusterIDs.addDBIDs((DBIDs)interval.getIDs());
                this.processedIDs.addDBIDs((DBIDs)interval.getIDs());
            }
            ArrayList<CASHInterval> heapVector = new ArrayList<CASHInterval>(heap.size());
            ObjectHeap.UnsortedIter iter = heap.unsortedIter();
            while (iter.valid()) {
                heapVector.add((CASHInterval)iter.get());
                iter.advance();
            }
            heap.clear();
            for (CASHInterval currentInterval : heapVector) {
                currentInterval.removeIDs((DBIDs)clusterIDs);
                if (currentInterval.getIDs().size() < this.minPts) continue;
                heap.add((Object)currentInterval);
            }
            if (progress == null) continue;
            progress.setProcessed(this.processedIDs.size(), LOG);
        }
        if (!noiseIDs.isEmpty()) {
            if (dim == this.noiseDim) {
                res.addToplevelCluster(new Cluster((DBIDs)noiseIDs, true, (Model)ClusterModel.CLUSTER));
                this.processedIDs.addDBIDs((DBIDs)noiseIDs);
            } else if (noiseIDs.size() >= this.minPts) {
                LinearEquationSystem les = this.runDerivator(this.fulldatabase, dim - 1, (DBIDs)noiseIDs);
                res.addToplevelCluster(new Cluster((DBIDs)noiseIDs, true, (Model)new LinearEquationModel(les)));
                this.processedIDs.addDBIDs((DBIDs)noiseIDs);
            }
        }
        if (LOG.isDebugging()) {
            StringBuilder msg = new StringBuilder();
            msg.append("noise fuer dim ").append(dim).append(": ").append(noiseIDs.size());
            for (Cluster c : res.getAllClusters()) {
                if (c.getModel() instanceof LinearEquationModel) {
                    msg.append("\n Cluster: Dim: ").append(((LinearEquationModel)c.getModel()).getLes().subspacedim());
                } else {
                    msg.append("\n Cluster: ").append(c.getModel().getClass().getName());
                }
                msg.append(" size: ").append(c.size());
            }
            LOG.debugFine((CharSequence)msg.toString());
        }
        if (progress != null) {
            progress.setProcessed(this.processedIDs.size(), LOG);
        }
        return res;
    }

    private static int dimensionality(Relation<ParameterizationFunction> relation) {
        return ((ParameterizationFunction)relation.get((DBIDRef)relation.iterDBIDs())).getDimensionality();
    }

    private void initHeap(ObjectHeap<CASHInterval> heap, Relation<ParameterizationFunction> relation, int dim, DBIDs ids) {
        CASHIntervalSplit split = new CASHIntervalSplit(relation, this.minPts);
        double[] minMax = this.determineMinMaxDistance(relation, dim);
        double d_min = minMax[0];
        double d_max = minMax[1];
        double dIntervalLength = d_max - d_min;
        int numDIntervals = (int)FastMath.ceil((double)(dIntervalLength / this.jitter));
        double dIntervalSize = dIntervalLength / (double)numDIntervals;
        double[] d_mins = new double[numDIntervals];
        double[] d_maxs = new double[numDIntervals];
        if (LOG.isVerbose()) {
            LOG.verbose((CharSequence)("d_min " + d_min + "\nd_max " + d_max + "\nnumDIntervals " + numDIntervals + "\ndIntervalSize " + dIntervalSize));
        }
        double[] alphaMin = new double[dim - 1];
        double[] alphaMax = new double[dim - 1];
        Arrays.fill(alphaMax, Math.PI);
        for (int i = 0; i < numDIntervals; ++i) {
            d_mins[i] = i == 0 ? d_min : d_maxs[i - 1];
            d_maxs[i] = i < numDIntervals - 1 ? d_mins[i] + dIntervalSize : d_max - d_mins[i];
            HyperBoundingBox alphaInterval = new HyperBoundingBox(alphaMin, alphaMax);
            ModifiableDBIDs intervalIDs = split.determineIDs(ids, alphaInterval, d_mins[i], d_maxs[i]);
            if (intervalIDs == null || intervalIDs.size() < this.minPts) continue;
            heap.add((Object)new CASHInterval(alphaMin, alphaMax, split, intervalIDs, -1, 0, d_mins[i], d_maxs[i]));
        }
        if (LOG.isDebuggingFiner()) {
            LOG.debugFiner((CharSequence)("heap.size: " + heap.size()));
        }
    }

    private MaterializedRelation<ParameterizationFunction> buildDB(int dim, double[][] basis, DBIDs ids, Relation<ParameterizationFunction> relation) {
        ProxyDatabase proxy = new ProxyDatabase(ids);
        SimpleTypeInformation type = new SimpleTypeInformation(ParameterizationFunction.class);
        WritableDataStore prep = DataStoreUtil.makeStorage((DBIDs)ids, (int)2, ParameterizationFunction.class);
        DBIDIter iter = ids.iter();
        while (iter.valid()) {
            prep.put((DBIDRef)iter, (Object)this.project(basis, (ParameterizationFunction)relation.get((DBIDRef)iter)));
            iter.advance();
        }
        if (LOG.isDebugging()) {
            LOG.debugFine((CharSequence)("db fuer dim " + (dim - 1) + ": " + ids.size()));
        }
        MaterializedRelation prel = new MaterializedRelation(null, type, ids, (DataStore)prep);
        proxy.addRelation((Relation)prel);
        return prel;
    }

    private ParameterizationFunction project(double[][] basis, ParameterizationFunction f) {
        double[] m = VMath.transposeTimes((double[][])basis, (double[])f.getColumnVector());
        return new ParameterizationFunction((NumberVector)DoubleVector.wrap((double[])m));
    }

    private double[][] determineBasis(double[] alpha) {
        int i;
        int dim = alpha.length;
        double[] nn = new double[dim + 1];
        for (int i2 = 0; i2 < nn.length; ++i2) {
            double alpha_i = i2 == alpha.length ? 0.0 : alpha[i2];
            nn[i2] = ParameterizationFunction.sinusProduct(0, i2, alpha) * FastMath.cos((double)alpha_i);
        }
        VMath.timesEquals((double[])nn, (double)(1.0 / VMath.euclideanLength((double[])nn)));
        double[][] basis = new double[dim][];
        int found = 0;
        for (i = 0; i < nn.length && found < dim; ++i) {
            double[] e_i = new double[nn.length];
            e_i[i] = 1.0;
            VMath.minusTimesEquals((double[])e_i, (double[])nn, (double)VMath.scalarProduct((double[])e_i, (double[])nn));
            double len = VMath.euclideanLength((double[])e_i);
            for (int j = 0; j < found && !(len < 1.0E-9); ++j) {
                VMath.minusTimesEquals((double[])e_i, (double[])basis[j], (double)VMath.scalarProduct((double[])e_i, (double[])basis[j]));
                len = VMath.euclideanLength((double[])e_i);
            }
            if (len < 1.0E-9) continue;
            VMath.timesEquals((double[])e_i, (double)(1.0 / len));
            basis[found++] = e_i;
        }
        if (found < dim) {
            for (i = found; i < dim; ++i) {
                basis[i] = new double[nn.length];
            }
        }
        return VMath.transpose((double[][])basis);
    }

    private CASHInterval determineNextIntervalAtMaxLevel(ObjectHeap<CASHInterval> heap) {
        CASHInterval next = this.doDetermineNextIntervalAtMaxLevel(heap);
        while (next == null) {
            if (heap.isEmpty()) {
                return null;
            }
            next = this.doDetermineNextIntervalAtMaxLevel(heap);
        }
        return next;
    }

    private CASHInterval doDetermineNextIntervalAtMaxLevel(ObjectHeap<CASHInterval> heap) {
        CASHInterval interval = (CASHInterval)heap.poll();
        int dim = interval.getDimensionality();
        while (interval.getLevel() < this.maxLevel || interval.getMaxSplitDimension() != dim - 1) {
            CASHInterval bestInterval;
            if (heap.size() % 10000 == 0 && LOG.isVerbose()) {
                LOG.verbose((CharSequence)("heap size " + heap.size()));
            }
            if (heap.size() >= 40000) {
                LOG.warning((CharSequence)"Heap size > 40.000! Stopping.");
                heap.clear();
                return null;
            }
            if (LOG.isDebuggingFiner()) {
                LOG.debugFiner((CharSequence)("split " + interval.toString() + " " + interval.getLevel() + "-" + interval.getMaxSplitDimension()));
            }
            interval.split();
            if (!interval.hasChildren()) {
                return null;
            }
            if (interval.getLeftChild() != null && interval.getRightChild() != null) {
                int comp = interval.getLeftChild().compareTo(interval.getRightChild());
                if (comp < 0) {
                    bestInterval = interval.getRightChild();
                    heap.add((Object)interval.getLeftChild());
                } else {
                    bestInterval = interval.getLeftChild();
                    heap.add((Object)interval.getRightChild());
                }
            } else {
                bestInterval = interval.getLeftChild() == null ? interval.getRightChild() : interval.getLeftChild();
            }
            interval = bestInterval;
        }
        return interval;
    }

    private double[] determineMinMaxDistance(Relation<ParameterizationFunction> relation, int dimensionality) {
        double[] min = new double[dimensionality - 1];
        double[] max = new double[dimensionality - 1];
        Arrays.fill(max, Math.PI);
        HyperBoundingBox box = new HyperBoundingBox(min, max);
        double d_min = Double.POSITIVE_INFINITY;
        double d_max = Double.NEGATIVE_INFINITY;
        DBIDIter iditer = relation.iterDBIDs();
        while (iditer.valid()) {
            ParameterizationFunction f = (ParameterizationFunction)relation.get((DBIDRef)iditer);
            HyperBoundingBox minMax = f.determineAlphaMinMax(box);
            double f_min = f.function(SpatialUtil.getMin((SpatialComparable)minMax));
            double f_max = f.function(SpatialUtil.getMax((SpatialComparable)minMax));
            d_min = Math.min(d_min, f_min);
            d_max = Math.max(d_max, f_max);
            iditer.advance();
        }
        return new double[]{d_min, d_max};
    }

    private double[][] runDerivator(Relation<ParameterizationFunction> relation, int dim, CASHInterval interval, ModifiableDBIDs outids) {
        CorrelationAnalysisSolution model = new DependencyDerivator<DoubleVector>(null, FormatUtil.NF4, new PCARunner((CovarianceMatrixBuilder)new StandardCovarianceMatrixBuilder()), (EigenPairFilter)new FirstNEigenPairFilter(dim - 1), 0, false).run(this.buildDerivatorDB(relation, (DBIDs)interval.getIDs()));
        double[][] weightMatrix = model.getSimilarityMatrix();
        double[] centroid = model.getCentroid();
        double eps = 0.25;
        outids.addDBIDs((DBIDs)interval.getIDs());
        DBIDIter iditer = relation.iterDBIDs();
        while (iditer.valid()) {
            double[] v = VMath.minusEquals((double[])((ParameterizationFunction)relation.get((DBIDRef)iditer)).getColumnVector(), (double[])centroid);
            double d = VMath.transposeTimesTimes((double[])v, (double[][])weightMatrix, (double[])v);
            if (d <= eps) {
                outids.add((DBIDRef)iditer);
            }
            iditer.advance();
        }
        double[][] basis = model.getStrongEigenvectors();
        return VMath.getMatrix((double[][])basis, (int)0, (int)basis.length, (int)0, (int)(dim - 1));
    }

    private LinearEquationSystem runDerivator(Relation<ParameterizationFunction> relation, int dimensionality, DBIDs ids) {
        try {
            CorrelationAnalysisSolution model = new DependencyDerivator<DoubleVector>(null, FormatUtil.NF4, new PCARunner((CovarianceMatrixBuilder)new StandardCovarianceMatrixBuilder()), (EigenPairFilter)new FirstNEigenPairFilter(dimensionality), 0, false).run(this.buildDerivatorDB(relation, ids));
            return model.getNormalizedLinearEquationSystem(null);
        }
        catch (NonNumericFeaturesException e) {
            throw new IllegalStateException("Error during normalization: " + e.getMessage(), e);
        }
    }

    private Relation<DoubleVector> buildDerivatorDB(Relation<ParameterizationFunction> relation, DBIDs ids) {
        VectorFieldTypeInformation type = new VectorFieldTypeInformation((FeatureVector.Factory)DoubleVector.FACTORY, CASH.dimensionality(relation));
        MaterializedRelation prep = new MaterializedRelation(null, (SimpleTypeInformation)type, ids);
        DBIDIter iter = ids.iter();
        while (iter.valid()) {
            prep.insert((DBIDRef)iter, (Object)DoubleVector.wrap((double[])((ParameterizationFunction)relation.get((DBIDRef)iter)).getColumnVector()));
            iter.advance();
        }
        return prep;
    }

    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array((TypeInformation[])new TypeInformation[]{TypeUtil.NUMBER_VECTOR_FIELD});
    }

    public static class Par
    implements Parameterizer {
        public static final OptionID MINPTS_ID = new OptionID("cash.minpts", "Threshold for minimum number of points in a cluster.");
        public static final OptionID MAXLEVEL_ID = new OptionID("cash.maxlevel", "The maximum level for splitting the hypercube.");
        public static final OptionID MINDIM_ID = new OptionID("cash.mindim", "The minimum dimensionality of the subspaces to be found.");
        public static final OptionID JITTER_ID = new OptionID("cash.jitter", "The maximum jitter for distance values.");
        public static final OptionID ADJUST_ID = new OptionID("cash.adjust", "Flag to indicate that an adjustment of the applied heuristic for choosing an interval is performed after an interval is selected.");
        protected int minPts;
        protected int maxLevel;
        protected int minDim;
        protected double jitter;
        protected boolean adjust;

        public void configure(Parameterization config) {
            ((IntParameter)new IntParameter(MINPTS_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).grab(config, x -> {
                this.minPts = x;
            });
            ((IntParameter)new IntParameter(MAXLEVEL_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).grab(config, x -> {
                this.maxLevel = x;
            });
            ((IntParameter)new IntParameter(MINDIM_ID, 1).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).grab(config, x -> {
                this.minDim = x;
            });
            ((DoubleParameter)new DoubleParameter(JITTER_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE)).grab(config, x -> {
                this.jitter = x;
            });
            new Flag(ADJUST_ID).grab(config, x -> {
                this.adjust = x;
            });
        }

        public CASH make() {
            return new CASH(this.minPts, this.maxLevel, this.minDim, this.jitter, this.adjust);
        }
    }
}

