/*
 * Decompiled with CFR 0.152.
 */
package elki.outlier.clustering;

import elki.Algorithm;
import elki.clustering.ClusteringAlgorithm;
import elki.clustering.kmeans.SortMeans;
import elki.data.Cluster;
import elki.data.Clustering;
import elki.data.NumberVector;
import elki.data.model.MeanModel;
import elki.data.model.Model;
import elki.data.model.ModelUtil;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.Database;
import elki.database.datastore.DataStoreUtil;
import elki.database.datastore.DoubleDataStore;
import elki.database.datastore.WritableDoubleDataStore;
import elki.database.ids.DBIDIter;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDs;
import elki.database.relation.DoubleRelation;
import elki.database.relation.MaterializedDoubleRelation;
import elki.database.relation.Relation;
import elki.distance.NumberVectorDistance;
import elki.distance.minkowski.EuclideanDistance;
import elki.logging.Logging;
import elki.logging.progress.StepProgress;
import elki.math.DoubleMinMax;
import elki.outlier.OutlierAlgorithm;
import elki.result.outlier.OutlierResult;
import elki.result.outlier.OutlierScoreMeta;
import elki.result.outlier.QuotientOutlierScoreMeta;
import elki.utilities.documentation.Reference;
import elki.utilities.documentation.Title;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.constraints.ParameterConstraint;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.DoubleParameter;
import elki.utilities.optionhandling.parameters.ObjectParameter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

@Title(value="Discovering cluster-based local outliers")
@Reference(authors="Z. He, X. Xu, S. Deng", title="Discovering cluster-based local outliers", booktitle="Pattern Recognition Letters 24(9-10)", url="https://doi.org/10.1016/S0167-8655(03)00003-5", bibkey="DBLP:journals/prl/HeXD03")
public class CBLOF<O extends NumberVector>
implements OutlierAlgorithm {
    private static final Logging LOG = Logging.getLogger(CBLOF.class);
    protected NumberVectorDistance<? super O> distance;
    protected ClusteringAlgorithm<Clustering<MeanModel>> clusteringAlgorithm;
    protected double alpha;
    protected double beta;

    public CBLOF(NumberVectorDistance<? super O> distance, ClusteringAlgorithm<Clustering<MeanModel>> clusteringAlgorithm, double alpha, double beta) {
        this.distance = distance;
        this.clusteringAlgorithm = clusteringAlgorithm;
        this.alpha = alpha;
        this.beta = beta;
    }

    public OutlierResult run(Database database, Relation<O> relation) {
        StepProgress stepprog = LOG.isVerbose() ? new StepProgress("CBLOF", 3) : null;
        DBIDs ids = relation.getDBIDs();
        LOG.beginStep(stepprog, 1, "Computing clustering.");
        Clustering clustering = this.clusteringAlgorithm.autorun(database);
        LOG.beginStep(stepprog, 2, "Computing boundary between large and small clusters.");
        List clusters = clustering.getAllClusters();
        Collections.sort(clusters, new Comparator<Cluster<MeanModel>>(){

            @Override
            public int compare(Cluster<MeanModel> o1, Cluster<MeanModel> o2) {
                return Integer.compare(o2.size(), o1.size());
            }
        });
        int clusterBoundary = this.getClusterBoundary(relation, clusters);
        List largeClusters = clusters.subList(0, clusterBoundary + 1);
        List smallClusters = clusters.subList(clusterBoundary + 1, clusters.size());
        LOG.beginStep(stepprog, 3, "Computing Cluster-Based Local Outlier Factors (CBLOF).");
        WritableDoubleDataStore cblofs = DataStoreUtil.makeDoubleStorage((DBIDs)ids, (int)30);
        DoubleMinMax cblofMinMax = new DoubleMinMax();
        this.computeCBLOFs(relation, cblofs, cblofMinMax, largeClusters, smallClusters);
        LOG.setCompleted(stepprog);
        MaterializedDoubleRelation scoreResult = new MaterializedDoubleRelation("Cluster-Based Local Outlier Factor", ids, (DoubleDataStore)cblofs);
        QuotientOutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(cblofMinMax.getMin(), cblofMinMax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
        return new OutlierResult((OutlierScoreMeta)scoreMeta, (DoubleRelation)scoreResult);
    }

    private int getClusterBoundary(Relation<O> relation, List<? extends Cluster<MeanModel>> clusters) {
        int totalSize = relation.size();
        int clusterBoundary = clusters.size() - 1;
        int cumulativeSize = 0;
        for (int i = 0; i < clusters.size() - 1; ++i) {
            if ((double)(cumulativeSize += clusters.get(i).size()) >= (double)totalSize * this.alpha) {
                clusterBoundary = i;
                break;
            }
            if (!((double)clusters.get(i).size() / (double)clusters.get(i + 1).size() >= this.beta)) continue;
            clusterBoundary = i;
            break;
        }
        return clusterBoundary;
    }

    private void computeCBLOFs(Relation<O> relation, WritableDoubleDataStore cblofs, DoubleMinMax cblofMinMax, List<? extends Cluster<MeanModel>> largeClusters, List<? extends Cluster<MeanModel>> smallClusters) {
        ArrayList<NumberVector> largeClusterMeans = new ArrayList<NumberVector>(largeClusters.size());
        for (Cluster<MeanModel> cluster : largeClusters) {
            NumberVector mean = ModelUtil.getPrototypeOrCentroid((Model)cluster.getModel(), relation, (DBIDs)cluster.getIDs());
            largeClusterMeans.add(mean);
            DBIDIter iter = cluster.getIDs().iter();
            while (iter.valid()) {
                double cblof = this.computeLargeClusterCBLOF((NumberVector)relation.get((DBIDRef)iter), this.distance, mean, cluster);
                this.storeCBLOFScore(cblofs, cblofMinMax, cblof, iter);
                iter.advance();
            }
        }
        for (Cluster<MeanModel> cluster : smallClusters) {
            DBIDIter iter = cluster.getIDs().iter();
            while (iter.valid()) {
                double cblof = this.computeSmallClusterCBLOF((NumberVector)relation.get((DBIDRef)iter), this.distance, largeClusterMeans, cluster);
                this.storeCBLOFScore(cblofs, cblofMinMax, cblof, iter);
                iter.advance();
            }
        }
    }

    private void storeCBLOFScore(WritableDoubleDataStore cblofs, DoubleMinMax cblofMinMax, double cblof, DBIDIter iter) {
        cblofs.putDouble((DBIDRef)iter, cblof);
        cblofMinMax.put(cblof);
    }

    private double computeSmallClusterCBLOF(O obj, NumberVectorDistance<? super O> distance, List<NumberVector> largeClusterMeans, Cluster<MeanModel> cluster) {
        double nearestLargeClusterDistance = Double.MAX_VALUE;
        for (NumberVector clusterMean : largeClusterMeans) {
            double clusterDistance = distance.distance(obj, clusterMean);
            if (!(clusterDistance < nearestLargeClusterDistance)) continue;
            nearestLargeClusterDistance = clusterDistance;
        }
        return (double)cluster.size() * nearestLargeClusterDistance;
    }

    private double computeLargeClusterCBLOF(O obj, NumberVectorDistance<? super O> distance, NumberVector clusterMean, Cluster<MeanModel> cluster) {
        return (double)cluster.size() * distance.distance(obj, clusterMean);
    }

    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array((TypeInformation[])new TypeInformation[]{this.distance.getInputTypeRestriction()});
    }

    public static class Par<O extends NumberVector>
    implements Parameterizer {
        public static final OptionID CLUSTERING_ID = new OptionID("cblof.algorithm", "Clustering algorithm to use for detecting outliers.");
        public static final OptionID ALPHPA_ID = new OptionID("cblof.alpha", "The ratio of the data that should be included in the large clusters");
        public static final OptionID BETA_ID = new OptionID("cblof.beta", "The ratio of the data that should be included in the large clusters");
        protected ClusteringAlgorithm<Clustering<MeanModel>> clusteringAlgorithm;
        protected double alpha;
        protected double beta;
        protected NumberVectorDistance<? super O> distance;

        public void configure(Parameterization config) {
            new ObjectParameter(Algorithm.Utils.DISTANCE_FUNCTION_ID, NumberVectorDistance.class, EuclideanDistance.class).grab(config, x -> {
                this.distance = x;
            });
            ((DoubleParameter)((DoubleParameter)new DoubleParameter(ALPHPA_ID).addConstraint((ParameterConstraint)CommonConstraints.LESS_THAN_ONE_DOUBLE)).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE)).grab(config, x -> {
                this.alpha = x;
            });
            ((DoubleParameter)new DoubleParameter(BETA_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ONE_DOUBLE)).grab(config, x -> {
                this.beta = x;
            });
            new ObjectParameter(CLUSTERING_ID, ClusteringAlgorithm.class, SortMeans.class).grab(config, x -> {
                this.clusteringAlgorithm = x;
            });
        }

        public CBLOF<O> make() {
            return new CBLOF<O>(this.distance, this.clusteringAlgorithm, this.alpha, this.beta);
        }
    }
}

