/*
 * Decompiled with CFR 0.152.
 */
package elki.outlier.meta;

import elki.data.NumberVector;
import elki.data.VectorUtil;
import elki.data.projection.NumericalFeatureSelection;
import elki.data.projection.Projection;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.Database;
import elki.database.ProxyDatabase;
import elki.database.datastore.DataStoreUtil;
import elki.database.datastore.DoubleDataStore;
import elki.database.datastore.WritableDoubleDataStore;
import elki.database.ids.ArrayDBIDs;
import elki.database.ids.ArrayModifiableDBIDs;
import elki.database.ids.DBIDArrayIter;
import elki.database.ids.DBIDIter;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDUtil;
import elki.database.ids.DBIDs;
import elki.database.relation.DoubleRelation;
import elki.database.relation.MaterializedDoubleRelation;
import elki.database.relation.ProjectedView;
import elki.database.relation.Relation;
import elki.database.relation.RelationUtil;
import elki.logging.Logging;
import elki.logging.progress.AbstractProgress;
import elki.logging.progress.FiniteProgress;
import elki.logging.progress.IndefiniteProgress;
import elki.math.DoubleMinMax;
import elki.math.statistics.tests.GoodnessOfFitTest;
import elki.math.statistics.tests.KolmogorovSmirnovTest;
import elki.outlier.OutlierAlgorithm;
import elki.outlier.lof.LOF;
import elki.result.outlier.BasicOutlierScoreMeta;
import elki.result.outlier.OutlierResult;
import elki.utilities.datastructures.BitsUtil;
import elki.utilities.datastructures.heap.Heap;
import elki.utilities.documentation.Description;
import elki.utilities.documentation.Reference;
import elki.utilities.documentation.Title;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.constraints.ParameterConstraint;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.DoubleParameter;
import elki.utilities.optionhandling.parameters.IntParameter;
import elki.utilities.optionhandling.parameters.ObjectParameter;
import elki.utilities.optionhandling.parameters.RandomParameter;
import elki.utilities.random.RandomFactory;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import net.jafama.FastMath;

@Title(value="HiCS: High Contrast Subspaces for Density-Based Outlier Ranking")
@Description(value="Algorithm to compute High Contrast Subspaces in a database as a pre-processing step for for density-based outlier ranking methods.")
@Reference(authors="F. Keller, E. M\u00fcller, K. B\u00f6hm", title="HiCS: High Contrast Subspaces for Density-Based Outlier Ranking", booktitle="Proc. IEEE 28th Int. Conf. on Data Engineering (ICDE 2012)", url="https://doi.org/10.1109/ICDE.2012.88", bibkey="DBLP:conf/icde/KellerMB12")
public class HiCS
implements OutlierAlgorithm {
    private static final Logging LOG = Logging.getLogger(HiCS.class);
    private static final int MAX_RETRIES = 100;
    private int m;
    private double alpha;
    private OutlierAlgorithm outlierAlgorithm;
    private GoodnessOfFitTest statTest;
    private int cutoff;
    private RandomFactory rnd;

    public HiCS(int m, double alpha, OutlierAlgorithm outlierAlgorithm, GoodnessOfFitTest statTest, int cutoff, RandomFactory rnd) {
        this.m = m;
        this.alpha = alpha;
        this.outlierAlgorithm = outlierAlgorithm;
        this.statTest = statTest;
        this.cutoff = cutoff;
        this.rnd = rnd;
    }

    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array((TypeInformation[])new TypeInformation[]{TypeUtil.NUMBER_VECTOR_FIELD});
    }

    public OutlierResult run(Relation<? extends NumberVector> relation) {
        DBIDs ids = relation.getDBIDs();
        ArrayList<ArrayDBIDs> subspaceIndex = this.buildOneDimIndexes(relation);
        Set<HiCSSubspace> subspaces = this.calculateSubspaces(relation, subspaceIndex, this.rnd.getSingleThreadedRandom());
        if (LOG.isVerbose()) {
            LOG.verbose((CharSequence)("Number of high-contrast subspaces: " + subspaces.size()));
        }
        ArrayList<DoubleRelation> results = new ArrayList<DoubleRelation>();
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), LOG) : null;
        for (HiCSSubspace dimset : subspaces) {
            if (LOG.isVerbose()) {
                LOG.verbose((CharSequence)("Performing outlier detection in subspace " + dimset));
            }
            ProxyDatabase pdb = new ProxyDatabase(ids);
            pdb.addRelation((Relation)new ProjectedView(relation, (Projection)new NumericalFeatureSelection(dimset.bits)));
            OutlierResult result = this.outlierAlgorithm.autorun((Database)pdb);
            results.add(result.getScores());
            LOG.incrementProcessed((AbstractProgress)prog);
        }
        LOG.ensureCompleted(prog);
        WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage((DBIDs)relation.getDBIDs(), (int)4);
        DoubleMinMax minmax = new DoubleMinMax();
        DBIDIter iditer = relation.iterDBIDs();
        while (iditer.valid()) {
            double sum = 0.0;
            for (DoubleRelation r : results) {
                double s = r.doubleValue((DBIDRef)iditer);
                if (Double.isNaN(s)) continue;
                sum += s;
            }
            scores.putDouble((DBIDRef)iditer, sum);
            minmax.put(sum);
            iditer.advance();
        }
        BasicOutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
        MaterializedDoubleRelation scoreres = new MaterializedDoubleRelation("HiCS", relation.getDBIDs(), (DoubleDataStore)scores);
        return new OutlierResult(meta, (DoubleRelation)scoreres);
    }

    private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector> relation) {
        int dim = RelationUtil.dimensionality(relation);
        ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<ArrayDBIDs>(dim + 1);
        VectorUtil.SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
        for (int i = 0; i < dim; ++i) {
            ArrayModifiableDBIDs amDBIDs = DBIDUtil.newArray((DBIDs)relation.getDBIDs());
            comp.setDimension(i);
            amDBIDs.sort((Comparator)comp);
            subspaceIndex.add((ArrayDBIDs)amDBIDs);
        }
        return subspaceIndex;
    }

    private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector> relation, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
        FiniteProgress dprog;
        int dbdim = RelationUtil.dimensionality(relation);
        FiniteProgress finiteProgress = dprog = LOG.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, LOG) : null;
        if (dprog != null) {
            dprog.setProcessed(2, LOG);
        }
        TreeSet<HiCSSubspace> subspaceList = new TreeSet<HiCSSubspace>(HiCSSubspace.SORT_BY_SUBSPACE);
        Heap dDimensionalList = new Heap(this.cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Generating two-element subsets", dbdim * (dbdim - 1) >> 1, LOG) : null;
        for (int i = 0; i < dbdim; ++i) {
            for (int j = i + 1; j < dbdim; ++j) {
                HiCSSubspace ts = new HiCSSubspace(dbdim).set(i).set(j);
                this.calculateContrast(relation, ts, subspaceIndex, random);
                dDimensionalList.add((Object)ts, this.cutoff);
                LOG.incrementProcessed((AbstractProgress)prog);
            }
        }
        LOG.ensureCompleted(prog);
        IndefiniteProgress qprog = LOG.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", LOG) : null;
        int d = 3;
        while (!dDimensionalList.isEmpty()) {
            if (dprog != null) {
                dprog.setProcessed(d, LOG);
            }
            ArrayList<HiCSSubspace> candidateList = new ArrayList<HiCSSubspace>(dDimensionalList.size());
            Heap.UnorderedIter it = dDimensionalList.unorderedIter();
            while (it.valid()) {
                subspaceList.add((HiCSSubspace)it.get());
                candidateList.add((HiCSSubspace)it.get());
                it.advance();
            }
            dDimensionalList.clear();
            Collections.sort(candidateList, HiCSSubspace.SORT_BY_SUBSPACE);
            for (int i = 0; i < candidateList.size() - 1; ++i) {
                for (int j = i + 1; j < candidateList.size(); ++j) {
                    HiCSSubspace joinedSet = new HiCSSubspace((HiCSSubspace)candidateList.get(i)).or((HiCSSubspace)candidateList.get(j));
                    if (joinedSet.dimensionality() != d) continue;
                    this.calculateContrast(relation, joinedSet, subspaceIndex, random);
                    dDimensionalList.add((Object)joinedSet, this.cutoff);
                    LOG.incrementProcessed((AbstractProgress)qprog);
                }
            }
            block6: for (HiCSSubspace cand : candidateList) {
                Heap.UnorderedIter it2 = dDimensionalList.unorderedIter();
                while (it2.valid()) {
                    if (((HiCSSubspace)it2.get()).contrast > cand.contrast) {
                        subspaceList.remove(cand);
                        continue block6;
                    }
                    it2.advance();
                }
            }
            ++d;
        }
        LOG.setCompleted(qprog);
        if (dprog != null) {
            dprog.setProcessed(dbdim, LOG);
            dprog.ensureCompleted(LOG);
        }
        return subspaceList;
    }

    private void calculateContrast(Relation<? extends NumberVector> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
        int card = subspace.dimensionality();
        double alpha1 = FastMath.pow((double)this.alpha, (double)(1.0 / (double)card));
        int windowsize = (int)((double)relation.size() * alpha1);
        FiniteProgress prog = LOG.isDebugging() ? new FiniteProgress("Monte-Carlo iterations", this.m, LOG) : null;
        int retries = 0;
        double deviationSum = 0.0;
        for (int i = 0; i < this.m; ++i) {
            DBIDArrayIter iter;
            int chosen = -1;
            for (int tmp = random.nextInt(card); tmp >= 0; --tmp) {
                chosen = subspace.nextSetBit(chosen + 1);
            }
            DBIDs conditionalSample = relation.getDBIDs();
            int j = subspace.nextSetBit(0);
            while (j >= 0) {
                if (j != chosen) {
                    ArrayDBIDs sortedIndices = subspaceIndex.get(j);
                    ArrayModifiableDBIDs indexBlock = DBIDUtil.newArray((int)windowsize);
                    iter = sortedIndices.iter();
                    iter.seek(random.nextInt(relation.size() - windowsize));
                    for (int k = 0; k < windowsize; ++k) {
                        indexBlock.add((DBIDRef)iter);
                        iter.advance();
                    }
                    conditionalSample = DBIDUtil.intersection((DBIDs)conditionalSample, (DBIDs)indexBlock);
                }
                j = subspace.nextSetBit(j + 1);
            }
            if (conditionalSample.size() < 10) {
                ++retries;
                if (LOG.isDebugging()) {
                    LOG.debug((CharSequence)("Sample size very small. Retry no. " + retries));
                }
                if (retries >= 100) {
                    LOG.warning((CharSequence)("Too many retries, for small samples: " + retries));
                } else {
                    --i;
                    continue;
                }
            }
            double[] sampleValues = new double[conditionalSample.size()];
            int l = 0;
            DBIDIter iter2 = conditionalSample.iter();
            while (iter2.valid()) {
                sampleValues[l++] = ((NumberVector)relation.get((DBIDRef)iter2)).doubleValue(chosen);
                iter2.advance();
            }
            double[] fullValues = new double[relation.size()];
            int l2 = 0;
            iter = subspaceIndex.get(chosen).iter();
            while (iter.valid()) {
                fullValues[l2++] = ((NumberVector)relation.get((DBIDRef)iter)).doubleValue(chosen);
                iter.advance();
            }
            double contrast = this.statTest.deviation(fullValues, sampleValues);
            if (Double.isNaN(contrast)) {
                --i;
                LOG.warning((CharSequence)"Contrast was NaN");
                continue;
            }
            deviationSum += contrast;
            LOG.incrementProcessed((AbstractProgress)prog);
        }
        LOG.ensureCompleted(prog);
        subspace.contrast = deviationSum / (double)this.m;
    }

    public static class Par
    implements Parameterizer {
        public static final OptionID M_ID = new OptionID("hics.m", "The number of iterations in the Monte-Carlo processing.");
        public static final OptionID ALPHA_ID = new OptionID("hics.alpha", "The discriminance value that determines the size of the test statistic .");
        public static final OptionID ALGO_ID = new OptionID("hics.algo", "The Algorithm that performs the actual outlier detection on the resulting set of subspace");
        public static final OptionID TEST_ID = new OptionID("hics.test", "The statistical test that is used to calculate the deviation of two data samples");
        public static final OptionID LIMIT_ID = new OptionID("hics.limit", "The threshold that determines how many d-dimensional subspace candidates to retain in each step of the generation");
        public static final OptionID SEED_ID = new OptionID("hics.seed", "The random seed.");
        private int m = 50;
        private double alpha = 0.1;
        private OutlierAlgorithm outlierAlgorithm;
        private GoodnessOfFitTest statTest;
        private int cutoff = 400;
        private RandomFactory rnd;

        public void configure(Parameterization config) {
            ((IntParameter)new IntParameter(M_ID, 50).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ONE_INT)).grab(config, x -> {
                this.m = x;
            });
            ((DoubleParameter)new DoubleParameter(ALPHA_ID, 0.1).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE)).grab(config, x -> {
                this.alpha = x;
            });
            new ObjectParameter(ALGO_ID, OutlierAlgorithm.class, LOF.class).grab(config, x -> {
                this.outlierAlgorithm = x;
            });
            new ObjectParameter(TEST_ID, GoodnessOfFitTest.class, KolmogorovSmirnovTest.class).grab(config, x -> {
                this.statTest = x;
            });
            ((IntParameter)new IntParameter(LIMIT_ID, 100).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ONE_INT)).grab(config, x -> {
                this.cutoff = x;
            });
            new RandomParameter(SEED_ID).grab(config, x -> {
                this.rnd = x;
            });
        }

        public HiCS make() {
            return new HiCS(this.m, this.alpha, this.outlierAlgorithm, this.statTest, this.cutoff, this.rnd);
        }
    }

    public static class HiCSSubspace {
        protected long[] bits;
        protected double contrast;
        public static final Comparator<HiCSSubspace> SORT_BY_CONTRAST_ASC = new Comparator<HiCSSubspace>(){

            @Override
            public int compare(HiCSSubspace o1, HiCSSubspace o2) {
                return o1.contrast == o2.contrast ? 0 : (o1.contrast > o2.contrast ? 1 : -1);
            }
        };
        public static final Comparator<HiCSSubspace> SORT_BY_CONTRAST_DESC = new Comparator<HiCSSubspace>(){

            @Override
            public int compare(HiCSSubspace o1, HiCSSubspace o2) {
                return o1.contrast == o2.contrast ? 0 : (o1.contrast < o2.contrast ? 1 : -1);
            }
        };
        public static final Comparator<HiCSSubspace> SORT_BY_SUBSPACE = new Comparator<HiCSSubspace>(){

            @Override
            public int compare(HiCSSubspace o1, HiCSSubspace o2) {
                int dim1 = o1.nextSetBit(0);
                int dim2 = o2.nextSetBit(0);
                while (dim1 >= 0 && dim2 >= 0) {
                    if (dim1 < dim2) {
                        return -1;
                    }
                    if (dim1 > dim2) {
                        return 1;
                    }
                    dim1 = o1.nextSetBit(dim1 + 1);
                    dim2 = o2.nextSetBit(dim2 + 1);
                }
                return 0;
            }
        };

        protected HiCSSubspace(int maxdim) {
            this.bits = BitsUtil.zero((int)maxdim);
        }

        protected HiCSSubspace(HiCSSubspace other) {
            this.bits = (long[])other.bits.clone();
        }

        public int dimensionality() {
            return BitsUtil.cardinality((long[])this.bits);
        }

        protected HiCSSubspace set(int i) {
            BitsUtil.setI((long[])this.bits, (int)i);
            return this;
        }

        protected HiCSSubspace or(HiCSSubspace other) {
            BitsUtil.orI((long[])this.bits, (long[])other.bits);
            return this;
        }

        public String toString() {
            StringBuilder buf = new StringBuilder(1000).append("[contrast=").append(this.contrast);
            int i = this.nextSetBit(0);
            while (i >= 0) {
                buf.append(' ').append(i);
                i = this.nextSetBit(i + 1);
            }
            return buf.append(']').toString();
        }

        public int nextSetBit(int start) {
            return BitsUtil.nextSetBit((long[])this.bits, (int)start);
        }
    }
}

