/*
 * Decompiled with CFR 0.152.
 */
package elki.clustering.subspace;

import elki.clustering.dbscan.DBSCAN;
import elki.clustering.subspace.SubspaceClusteringAlgorithm;
import elki.data.Cluster;
import elki.data.Clustering;
import elki.data.NumberVector;
import elki.data.Subspace;
import elki.data.model.Model;
import elki.data.model.SubspaceModel;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.ids.DBIDUtil;
import elki.database.ids.DBIDs;
import elki.database.ids.HashSetModifiableDBIDs;
import elki.database.ids.ModifiableDBIDs;
import elki.database.relation.ProxyView;
import elki.database.relation.Relation;
import elki.database.relation.RelationUtil;
import elki.distance.subspace.DimensionSelectingSubspaceDistance;
import elki.distance.subspace.SubspaceEuclideanDistance;
import elki.logging.Logging;
import elki.logging.progress.AbstractProgress;
import elki.logging.progress.FiniteProgress;
import elki.logging.progress.StepProgress;
import elki.math.linearalgebra.Centroid;
import elki.result.Metadata;
import elki.utilities.datastructures.BitsUtil;
import elki.utilities.documentation.Description;
import elki.utilities.documentation.Reference;
import elki.utilities.documentation.Title;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.constraints.ParameterConstraint;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.DoubleParameter;
import elki.utilities.optionhandling.parameters.IntParameter;
import elki.utilities.optionhandling.parameters.ObjectParameter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.TreeMap;

@Title(value="SUBCLU: Density connected Subspace Clustering")
@Description(value="Algorithm to detect arbitrarily shaped and positioned clusters in subspaces. SUBCLU delivers for each subspace the same clusters DBSCAN would have found, when applied to this subspace seperately.")
@Reference(authors="Karin Kailing, Hans-Peter Kriegel, Peer Kr\u00f6ger", title="Density Connected Subspace Clustering for High Dimensional Data", booktitle="Proc. SIAM Int. Conf. on Data Mining (SDM'04)", url="https://doi.org/10.1137/1.9781611972740.23", bibkey="DBLP:conf/sdm/KroegerKK04")
public class SUBCLU<V extends NumberVector>
implements SubspaceClusteringAlgorithm<SubspaceModel> {
    private static final Logging LOG = Logging.getLogger(SUBCLU.class);
    protected DimensionSelectingSubspaceDistance<V> distance;
    protected double epsilon;
    protected int minpts;
    protected int mindim;

    public SUBCLU(DimensionSelectingSubspaceDistance<V> distance, double epsilon, int minpts, int mindim) {
        this.distance = distance;
        this.epsilon = epsilon;
        this.minpts = minpts;
        this.mindim = mindim;
    }

    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array((TypeInformation[])new TypeInformation[]{TypeUtil.NUMBER_VECTOR_FIELD});
    }

    public Clustering<SubspaceModel> run(Relation<V> relation) {
        int d;
        int dimensionality = RelationUtil.dimensionality(relation);
        if (dimensionality <= 1) {
            throw new IllegalStateException("SUBCLU needs multivariate data.");
        }
        StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null;
        TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<Subspace, List<Cluster<Model>>>(Subspace.DIMENSION_COMPARATOR);
        LOG.beginStep(stepprog, 1, "Generate all 1-dimensional clusters.");
        ArrayList<Subspace> subspaces = new ArrayList<Subspace>();
        for (d = 0; d < dimensionality; ++d) {
            Subspace currentSubspace = new Subspace(d);
            List<Cluster<Model>> clusters = this.runDBSCAN(relation, null, currentSubspace);
            if (LOG.isDebuggingFiner()) {
                StringBuilder msg = new StringBuilder(1000).append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensionsToString()).append(':');
                for (Cluster cluster : clusters) {
                    msg.append("\n      ").append(cluster.getIDs());
                }
                LOG.debugFiner((CharSequence)msg.toString());
            }
            if (clusters.isEmpty()) continue;
            subspaces.add(currentSubspace);
            clusterMap.put(currentSubspace, clusters);
        }
        for (d = 2; d <= dimensionality; ++d) {
            if (stepprog != null) {
                stepprog.beginStep(d, "Generate " + d + "-dimensional clusters from " + (d - 1) + "-dimensional clusters.", LOG);
            }
            List<Subspace> candidates = this.generateSubspaceCandidates(subspaces);
            ArrayList<Subspace> s_d = new ArrayList<Subspace>();
            FiniteProgress substepprog = LOG.isVerbose() ? new FiniteProgress("Candidates of dimensionality " + d, candidates.size(), LOG) : null;
            for (Subspace subspace : candidates) {
                Object candidateClusters;
                Subspace bestSubspace = this.bestSubspace(subspaces, subspace, clusterMap);
                if (LOG.isDebuggingFine()) {
                    LOG.debugFine((CharSequence)("best subspace of " + subspace.dimensionsToString() + ": " + bestSubspace.dimensionsToString()));
                }
                ArrayList<Cluster<Model>> clusters = new ArrayList<Cluster<Model>>();
                for (Cluster<Model> cluster : clusterMap.get(bestSubspace)) {
                    if (cluster.size() < this.minpts || (candidateClusters = this.runDBSCAN(relation, cluster.getIDs(), subspace)).isEmpty()) continue;
                    clusters.addAll((Collection<Cluster<Model>>)candidateClusters);
                }
                if (LOG.isDebuggingFine() && !clusters.isEmpty()) {
                    StringBuilder msg = new StringBuilder(1000).append(clusters.size()).append(" cluster(s) in subspace ").append(subspace).append(':');
                    candidateClusters = clusters.iterator();
                    while (candidateClusters.hasNext()) {
                        Cluster c = (Cluster)candidateClusters.next();
                        msg.append("\n      ").append(c.getIDs());
                    }
                    LOG.debugFine((CharSequence)msg.toString());
                }
                if (!clusters.isEmpty()) {
                    s_d.add(subspace);
                    clusterMap.put(subspace, clusters);
                }
                LOG.incrementProcessed((AbstractProgress)substepprog);
            }
            LOG.ensureCompleted(substepprog);
            if (s_d.isEmpty()) {
                if (stepprog == null) break;
                for (int dim = d + 1; dim <= dimensionality; ++dim) {
                    stepprog.beginStep(dim, "Generation of " + dim + "-dimensional clusters not applicable, because no " + d + "-dimensional subspaces were found.", LOG);
                }
                break;
            }
            subspaces = s_d;
        }
        LOG.setCompleted(stepprog);
        int numClusters = 0;
        HashSetModifiableDBIDs noise = DBIDUtil.newHashSet((DBIDs)relation.getDBIDs());
        TreeMap<Subspace, HashSetModifiableDBIDs> filtered = new TreeMap<Subspace, HashSetModifiableDBIDs>(Subspace.DIMENSION_COMPARATOR);
        Clustering<SubspaceModel> result = new Clustering<SubspaceModel>();
        Metadata.of(result).setLongName("SUBCLU clustering");
        for (Subspace subspace : clusterMap.descendingKeySet()) {
            if (subspace.dimensionality() < this.mindim) continue;
            DBIDs blacklisted = (DBIDs)filtered.get(subspace);
            blacklisted = blacklisted != null ? blacklisted : DBIDUtil.EMPTYDBIDS;
            HashSetModifiableDBIDs blacklist = DBIDUtil.newHashSet((DBIDs)blacklisted);
            List<Cluster<Model>> clusters = clusterMap.get(subspace);
            for (Cluster<Model> cluster : clusters) {
                DBIDs ids = cluster.getIDs();
                ModifiableDBIDs newids = DBIDUtil.difference((DBIDs)ids, (DBIDs)blacklisted);
                Cluster<SubspaceModel> newCluster = new Cluster<SubspaceModel>((DBIDs)newids);
                newCluster.setModel(new SubspaceModel(subspace, Centroid.make(relation, (DBIDs)ids).getArrayRef()));
                newCluster.setName("cluster_" + numClusters++);
                result.addToplevelCluster(newCluster);
                blacklist.addDBIDs((DBIDs)newids);
                noise.removeDBIDs((DBIDs)newids);
            }
            if (subspace.dimensionality() <= this.mindim) continue;
            long[] tmp = BitsUtil.copy((long[])subspace.getDimensions());
            int pos = BitsUtil.nextSetBit((long[])tmp, (int)0);
            while (pos >= 0) {
                BitsUtil.clearI((long[])tmp, (int)pos);
                Subspace sub = new Subspace(BitsUtil.copy((long[])tmp));
                BitsUtil.setI((long[])tmp, (int)pos);
                ModifiableDBIDs bl = (ModifiableDBIDs)filtered.get(sub);
                if (bl != null) {
                    bl.addDBIDs((DBIDs)blacklist);
                } else {
                    filtered.put(sub, DBIDUtil.newHashSet((DBIDs)blacklist));
                }
                pos = BitsUtil.nextSetBit((long[])tmp, (int)(pos + 1));
            }
        }
        if (!noise.isEmpty()) {
            Cluster<SubspaceModel> newCluster = new Cluster<SubspaceModel>((DBIDs)noise);
            newCluster.setModel(new SubspaceModel(new Subspace(BitsUtil.zero((int)dimensionality)), Centroid.make(relation, (DBIDs)noise).getArrayRef()));
            newCluster.setName("noise");
            newCluster.setNoise(true);
            result.addToplevelCluster(newCluster);
        }
        return result;
    }

    private List<Cluster<Model>> runDBSCAN(Relation<V> relation, DBIDs ids, Subspace subspace) {
        this.distance.setSelectedDimensions(subspace.getDimensions());
        if (LOG.isVerbose()) {
            LOG.verbose((CharSequence)("Run DBSCAN on subspace " + subspace.dimensionsToString() + (ids == null ? "" : " on cluster of " + ids.size() + " objects.")));
        }
        relation = ids == null ? relation : new ProxyView(ids, (Relation)relation);
        DBSCAN<V> dbscan = new DBSCAN<V>(this.distance, this.epsilon, this.minpts);
        Clustering<Model> dbsres = dbscan.run((Relation<V>)relation);
        ArrayList<Cluster<Model>> clusters = new ArrayList<Cluster<Model>>();
        for (Cluster<Model> c : dbsres.getAllClusters()) {
            if (c.isNoise()) continue;
            clusters.add(c);
        }
        return clusters;
    }

    private List<Subspace> generateSubspaceCandidates(List<Subspace> subspaces) {
        StringBuilder msgFinest;
        if (subspaces.isEmpty()) {
            return Collections.emptyList();
        }
        StringBuilder stringBuilder = msgFinest = LOG.isDebuggingFinest() ? new StringBuilder(1000) : null;
        if (msgFinest != null) {
            msgFinest.append("subspaces ").append(subspaces).append('\n');
        }
        ArrayList<Subspace> candidates = new ArrayList<Subspace>();
        int d = subspaces.get(0).dimensionality() + 1;
        for (int i = 0; i < subspaces.size(); ++i) {
            Subspace s1 = subspaces.get(i);
            for (int j = i + 1; j < subspaces.size(); ++j) {
                Subspace candidate = s1.join(subspaces.get(j));
                if (candidate == null || d != 2 && !this.checkLower(candidate, subspaces)) continue;
                if (msgFinest != null) {
                    msgFinest.append("candidate: ").append(candidate.dimensionsToString()).append('\n');
                }
                candidates.add(candidate);
            }
        }
        if (msgFinest != null) {
            LOG.debugFinest((CharSequence)msgFinest.toString());
        }
        if (LOG.isDebugging()) {
            StringBuilder msg = new StringBuilder(1000).append(d).append("-dimensional candidate subspaces: ");
            for (Subspace candidate : candidates) {
                msg.append(candidate.dimensionsToString()).append(' ');
            }
            LOG.debug((CharSequence)msg.toString());
        }
        return candidates;
    }

    private boolean checkLower(Subspace candidate, List<Subspace> subspaces) {
        long[] dimensions = BitsUtil.copy((long[])candidate.getDimensions());
        int dim = BitsUtil.nextSetBit((long[])dimensions, (int)0);
        while (dim >= 0) {
            BitsUtil.clearI((long[])dimensions, (int)dim);
            if (!subspaces.contains(new Subspace(dimensions))) {
                return false;
            }
            BitsUtil.setI((long[])dimensions, (int)dim);
            dim = BitsUtil.nextSetBit((long[])dimensions, (int)(dim + 1));
        }
        return true;
    }

    private Subspace bestSubspace(List<Subspace> subspaces, Subspace candidate, TreeMap<Subspace, List<Cluster<Model>>> clusterMap) {
        Subspace bestSubspace = null;
        int min = Integer.MAX_VALUE;
        for (Subspace subspace : subspaces) {
            if (!subspace.isSubspace(candidate)) continue;
            int sum = 0;
            for (Cluster<Model> cluster : clusterMap.get(subspace)) {
                sum += cluster.size();
            }
            if (sum >= min) continue;
            min = sum;
            bestSubspace = subspace;
        }
        return bestSubspace;
    }

    public static class Par<V extends NumberVector>
    implements Parameterizer {
        public static final OptionID DISTANCE_FUNCTION_ID = new OptionID("subclu.distancefunction", "Distance function to determine the distance between database objects.");
        public static final OptionID EPSILON_ID = new OptionID("subclu.epsilon", "The maximum radius of the neighborhood to be considered.");
        public static final OptionID MINPTS_ID = new OptionID("subclu.minpts", "Threshold for minimum number of points in the epsilon-neighborhood of a point.");
        public static final OptionID MINDIM_ID = new OptionID("subclu.mindim", "Minimum dimensionality to generate clusters for.");
        protected DimensionSelectingSubspaceDistance<V> distance;
        protected double epsilon;
        protected int minpts;
        protected int mindim = 1;

        public void configure(Parameterization config) {
            new ObjectParameter(DISTANCE_FUNCTION_ID, DimensionSelectingSubspaceDistance.class, SubspaceEuclideanDistance.class).grab(config, x -> {
                this.distance = x;
            });
            new DoubleParameter(EPSILON_ID).grab(config, x -> {
                this.epsilon = x;
            });
            ((IntParameter)new IntParameter(MINPTS_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).grab(config, x -> {
                this.minpts = x;
            });
            ((IntParameter)((IntParameter)new IntParameter(MINDIM_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).setOptional(true)).grab(config, x -> {
                this.mindim = x;
            });
        }

        public SUBCLU<V> make() {
            return new SUBCLU<V>(this.distance, this.epsilon, this.minpts, this.mindim);
        }
    }
}

