/*
 * Decompiled with CFR 0.152.
 */
package elki.clustering.subspace;

import elki.clustering.subspace.SubspaceClusteringAlgorithm;
import elki.data.Cluster;
import elki.data.Clustering;
import elki.data.NumberVector;
import elki.data.Subspace;
import elki.data.model.SubspaceModel;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.ids.ArrayModifiableDBIDs;
import elki.database.ids.DBIDArrayMIter;
import elki.database.ids.DBIDIter;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDUtil;
import elki.database.ids.DBIDs;
import elki.database.ids.HashSetModifiableDBIDs;
import elki.database.ids.ModifiableDBIDs;
import elki.database.query.QueryBuilder;
import elki.database.query.distance.DistanceQuery;
import elki.database.relation.Relation;
import elki.database.relation.RelationUtil;
import elki.distance.Distance;
import elki.distance.subspace.SubspaceMaximumDistance;
import elki.logging.Logging;
import elki.logging.progress.AbstractProgress;
import elki.logging.progress.FiniteProgress;
import elki.logging.progress.IndefiniteProgress;
import elki.math.linearalgebra.Centroid;
import elki.result.Metadata;
import elki.utilities.datastructures.BitsUtil;
import elki.utilities.documentation.Reference;
import elki.utilities.documentation.Title;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.constraints.ParameterConstraint;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.DoubleParameter;
import elki.utilities.optionhandling.parameters.RandomParameter;
import elki.utilities.random.RandomFactory;
import java.util.Random;
import net.jafama.FastMath;

@Title(value="DOC: Density-based Optimal projective Clustering")
@Reference(authors="C. M. Procopiuc, M. Jones, P. K. Agarwal, T. M. Murali", title="A Monte Carlo algorithm for fast projective clustering", booktitle="Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '02)", url="https://doi.org/10.1145/564691.564739", bibkey="DBLP:conf/sigmod/ProcopiucJAM02")
public class DOC
implements SubspaceClusteringAlgorithm<SubspaceModel> {
    private static final Logging LOG = Logging.getLogger(DOC.class);
    protected double alpha;
    protected double beta;
    protected double w;
    protected RandomFactory rnd;

    public DOC(double alpha, double beta, double w, RandomFactory random) {
        this.alpha = alpha;
        this.beta = beta;
        this.w = w;
        this.rnd = random;
    }

    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array((TypeInformation[])new TypeInformation[]{TypeUtil.NUMBER_VECTOR_FIELD});
    }

    public Clustering<SubspaceModel> run(Relation<? extends NumberVector> relation) {
        Cluster<SubspaceModel> C;
        IndefiniteProgress cprogress;
        int d = RelationUtil.dimensionality(relation);
        ArrayModifiableDBIDs S = DBIDUtil.newArray((DBIDs)relation.getDBIDs());
        double r = Math.abs(FastMath.log((double)(2.0 * (double)d)) / FastMath.log((double)(this.beta * 0.5)));
        int n = (int)(2.0 / this.alpha);
        int m = (int)(FastMath.pow((double)(2.0 / this.alpha), (double)r) * FastMath.log((double)4.0));
        m = Math.min(m, Math.min(1000000, d * d));
        int minClusterSize = (int)(this.alpha * (double)S.size());
        Clustering<SubspaceModel> result = new Clustering<SubspaceModel>();
        Metadata.of(result).setLongName("DOC Clusters");
        IndefiniteProgress indefiniteProgress = cprogress = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
        while (S.size() > minClusterSize && (C = this.runDOC(relation, S, d, n, m, (int)r, minClusterSize)) != null) {
            result.addToplevelCluster(C);
            S.removeDBIDs(C.getIDs());
            if (cprogress == null) continue;
            cprogress.setProcessed(result.getAllClusters().size(), LOG);
        }
        if (S.size() > 0) {
            long[] alldims = BitsUtil.ones((int)d);
            result.addToplevelCluster(new Cluster<SubspaceModel>((DBIDs)S, true, new SubspaceModel(new Subspace(alldims), Centroid.make(relation, (DBIDs)S).getArrayRef())));
        }
        LOG.setCompleted(cprogress);
        return result;
    }

    protected Cluster<SubspaceModel> runDOC(Relation<? extends NumberVector> relation, ArrayModifiableDBIDs S, int d, int n, int m, int r, int minClusterSize) {
        DBIDs C = null;
        long[] D = null;
        double quality = Double.NEGATIVE_INFINITY;
        FiniteProgress iprogress = LOG.isVerbose() ? new FiniteProgress("Iteration progress for current cluster", m * n, LOG) : null;
        Random random = this.rnd.getSingleThreadedRandom();
        DBIDArrayMIter iter = S.iter();
        for (int i = 0; i < n; ++i) {
            iter.seek(random.nextInt(S.size()));
            for (int j = 0; j < m; ++j) {
                ModifiableDBIDs randomSet = DBIDUtil.randomSample((DBIDs)S, (int)r, (Random)random);
                long[] nD = BitsUtil.zero((int)d);
                for (int k = 0; k < d; ++k) {
                    if (!this.dimensionIsRelevant(k, relation, (DBIDs)randomSet)) continue;
                    BitsUtil.setI((long[])nD, (int)k);
                }
                if (BitsUtil.cardinality((long[])nD) > 0) {
                    DBIDs nC = this.findNeighbors((DBIDRef)iter, nD, S, relation);
                    if (LOG.isDebuggingFiner()) {
                        LOG.finer((CharSequence)("Testing a cluster candidate, |C| = " + nC.size() + ", |D| = " + BitsUtil.cardinality((long[])nD)));
                    }
                    if (nC.size() < minClusterSize) {
                        if (!LOG.isDebuggingFiner()) continue;
                        LOG.finer((CharSequence)"... but it's too small.");
                        continue;
                    }
                    double nQuality = this.computeClusterQuality(nC.size(), BitsUtil.cardinality((long[])nD));
                    if (nQuality > quality) {
                        if (LOG.isDebuggingFiner()) {
                            LOG.finer((CharSequence)("... and it's the best so far: " + nQuality + " vs. " + quality));
                        }
                        C = nC;
                        D = nD;
                        quality = nQuality;
                    } else if (LOG.isDebuggingFiner()) {
                        LOG.finer((CharSequence)"... but we already have a better one.");
                    }
                }
                LOG.incrementProcessed((AbstractProgress)iprogress);
            }
        }
        LOG.ensureCompleted(iprogress);
        return C != null ? this.makeCluster(relation, C, D) : null;
    }

    protected DBIDs findNeighbors(DBIDRef q, long[] nD, ArrayModifiableDBIDs S, Relation<? extends NumberVector> relation) {
        DistanceQuery dq = new QueryBuilder(relation, (Distance)new SubspaceMaximumDistance(nD)).distanceQuery();
        ArrayModifiableDBIDs nC = DBIDUtil.newArray();
        DBIDArrayMIter it = S.iter();
        while (it.valid()) {
            if (dq.distance(q, (DBIDRef)it) <= this.w) {
                nC.add((DBIDRef)it);
            }
            it.advance();
        }
        return nC;
    }

    protected boolean dimensionIsRelevant(int dimension, Relation<? extends NumberVector> relation, DBIDs points) {
        double min = Double.POSITIVE_INFINITY;
        double max = Double.NEGATIVE_INFINITY;
        DBIDIter iter = points.iter();
        while (iter.valid()) {
            double xV = ((NumberVector)relation.get((DBIDRef)iter)).doubleValue(dimension);
            min = xV < min ? xV : min;
            double d = max = xV > max ? xV : max;
            if (max - min > this.w) {
                return false;
            }
            iter.advance();
        }
        return true;
    }

    protected Cluster<SubspaceModel> makeCluster(Relation<? extends NumberVector> relation, DBIDs C, long[] D) {
        HashSetModifiableDBIDs ids = DBIDUtil.newHashSet((DBIDs)C);
        Cluster<SubspaceModel> cluster = new Cluster<SubspaceModel>((DBIDs)ids);
        cluster.setModel(new SubspaceModel(new Subspace(D), Centroid.make(relation, (DBIDs)ids).getArrayRef()));
        return cluster;
    }

    protected double computeClusterQuality(int clusterSize, int numRelevantDimensions) {
        return (double)clusterSize * FastMath.pow((double)(1.0 / this.beta), (double)numRelevantDimensions);
    }

    public static class Par
    implements Parameterizer {
        public static final OptionID ALPHA_ID = new OptionID("doc.alpha", "Minimum relative density for a set of points to be considered a cluster (|C|>=doc.alpha*|S|).");
        public static final OptionID BETA_ID = new OptionID("doc.beta", "Preference of cluster size versus number of relevant dimensions (higher value means higher priority on larger clusters).");
        public static final OptionID W_ID = new OptionID("doc.w", "Maximum extent of scattering of points along a single attribute for the attribute to be considered relevant.");
        public static final OptionID RANDOM_ID = new OptionID("doc.random-seed", "Random seed, for reproducible experiments.");
        protected double alpha;
        protected double beta;
        protected double w;
        protected RandomFactory random = RandomFactory.DEFAULT;

        public void configure(Parameterization config) {
            ((DoubleParameter)((DoubleParameter)new DoubleParameter(ALPHA_ID, 0.2).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE)).addConstraint((ParameterConstraint)CommonConstraints.LESS_EQUAL_ONE_DOUBLE)).grab(config, x -> {
                this.alpha = x;
            });
            ((DoubleParameter)((DoubleParameter)new DoubleParameter(BETA_ID, 0.8).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE)).addConstraint((ParameterConstraint)CommonConstraints.LESS_THAN_ONE_DOUBLE)).grab(config, x -> {
                this.beta = x;
            });
            ((DoubleParameter)new DoubleParameter(W_ID, 0.05).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE)).grab(config, x -> {
                this.w = x;
            });
            new RandomParameter(RANDOM_ID).grab(config, x -> {
                this.random = x;
            });
        }

        public DOC make() {
            return new DOC(this.alpha, this.beta, this.w, this.random);
        }
    }
}

