/*
 * Decompiled with CFR 0.152.
 */
package elki.outlier.density;

import elki.data.NumberVector;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.datastore.DataStoreUtil;
import elki.database.datastore.DoubleDataStore;
import elki.database.datastore.WritableDoubleDataStore;
import elki.database.ids.ArrayModifiableDBIDs;
import elki.database.ids.DBIDArrayMIter;
import elki.database.ids.DBIDIter;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDUtil;
import elki.database.ids.DBIDs;
import elki.database.relation.DoubleRelation;
import elki.database.relation.MaterializedDoubleRelation;
import elki.database.relation.Relation;
import elki.database.relation.RelationUtil;
import elki.logging.Logging;
import elki.logging.progress.AbstractProgress;
import elki.logging.progress.FiniteProgress;
import elki.logging.progress.StepProgress;
import elki.math.DoubleMinMax;
import elki.math.MathUtil;
import elki.outlier.OutlierAlgorithm;
import elki.result.outlier.BasicOutlierScoreMeta;
import elki.result.outlier.OutlierResult;
import elki.utilities.documentation.Reference;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.constraints.ParameterConstraint;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.IntParameter;
import elki.utilities.optionhandling.parameters.RandomParameter;
import elki.utilities.random.RandomFactory;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;
import net.jafama.FastMath;

@Reference(authors="F. T. Liu, K. M. Ting, Z.-H. Zhou", title="Isolation-Based Anomaly Detection", booktitle="Transactions on Knowledge Discovery from Data (TKDD)", url="https://doi.org/10.1145/2133360.2133363", bibkey="DBLP:journals/tkdd/LiuTZ12")
public class IsolationForest
implements OutlierAlgorithm {
    private static final Logging LOG = Logging.getLogger(IsolationForest.class);
    protected int numTrees = 100;
    protected int subsampleSize = 256;
    private RandomFactory rnd;

    public IsolationForest(int numTrees, int subsampleSize, RandomFactory rnd) {
        this.numTrees = numTrees;
        this.subsampleSize = subsampleSize;
        this.rnd = rnd;
    }

    public OutlierResult run(Relation<? extends NumberVector> relation) {
        if (relation.size() < this.subsampleSize) {
            this.subsampleSize = relation.size();
        }
        StepProgress stepprog = LOG.isVerbose() ? new StepProgress(2) : null;
        LOG.beginStep(stepprog, 1, "Generating isolation trees.");
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Isolation forest construction", this.numTrees, LOG) : null;
        Random random = this.rnd.getSingleThreadedRandom();
        ArrayList<Node> trees = new ArrayList<Node>(this.numTrees);
        ForestBuilder builder = new ForestBuilder(relation, this.subsampleSize, random);
        for (int i = 0; i < this.numTrees; ++i) {
            trees.add(builder.newTree());
            LOG.incrementProcessed((AbstractProgress)prog);
        }
        LOG.ensureCompleted(prog);
        LOG.beginStep(stepprog, 2, "Computing isolation forest scores.");
        prog = LOG.isVerbose() ? new FiniteProgress("Isolation forest scores", relation.size(), LOG) : null;
        WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage((DBIDs)relation.getDBIDs(), (int)30);
        DoubleMinMax minmax = new DoubleMinMax();
        double f = -MathUtil.LOG2 / ((double)trees.size() * IsolationForest.c(this.subsampleSize));
        DBIDIter iter = relation.iterDBIDs();
        while (iter.valid()) {
            NumberVector v = (NumberVector)relation.get((DBIDRef)iter);
            double avgPathLength = 0.0;
            for (Node tree : trees) {
                avgPathLength += this.isolationScore(tree, v);
            }
            double score = FastMath.exp((double)(f * avgPathLength));
            scores.putDouble((DBIDRef)iter, score);
            minmax.put(score);
            LOG.incrementProcessed((AbstractProgress)prog);
            iter.advance();
        }
        LOG.ensureCompleted(prog);
        LOG.ensureCompleted((FiniteProgress)stepprog);
        BasicOutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
        MaterializedDoubleRelation rel = new MaterializedDoubleRelation("IsolationForest", relation.getDBIDs(), (DoubleDataStore)scores);
        return new OutlierResult(meta, (DoubleRelation)rel);
    }

    protected static double c(double n) {
        return n <= 1.0 ? 0.0 : 2.0 * (FastMath.log((double)(n - 1.0)) + 0.577215664901532) - 2.0 * (n - 1.0) / n;
    }

    protected double isolationScore(Node n, NumberVector v) {
        int depth = 0;
        while (n != null) {
            ++depth;
            if (n.dim < 0) {
                return IsolationForest.c(n.size) + (double)depth;
            }
            n = v.doubleValue(n.dim) <= n.split ? n.left : n.right;
        }
        throw new IllegalStateException("Encountered unexpected null.");
    }

    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array((TypeInformation[])new TypeInformation[]{TypeUtil.NUMBER_VECTOR_FIELD});
    }

    public static class Par
    implements Parameterizer {
        public static final OptionID NUM_TREES_ID = new OptionID("iforest.numtrees", "Number of trees to use.");
        public static final OptionID SUBSAMPLE_SIZE_ID = new OptionID("iforest.subsample", "Subsampling size.");
        public static final OptionID SEED_ID = new OptionID("iforest.seed", "The seed to use for initializing Random.");
        protected int numTrees = 100;
        protected int subsampleSize = 256;
        protected RandomFactory rnd;

        public void configure(Parameterization config) {
            ((IntParameter)new IntParameter(NUM_TREES_ID, 100).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).grab(config, x -> {
                this.numTrees = x;
            });
            ((IntParameter)new IntParameter(SUBSAMPLE_SIZE_ID, 256).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ONE_INT)).grab(config, x -> {
                this.subsampleSize = x;
            });
            new RandomParameter(SEED_ID).grab(config, x -> {
                this.rnd = x;
            });
        }

        public IsolationForest make() {
            return new IsolationForest(this.numTrees, this.subsampleSize, this.rnd);
        }
    }

    protected static class Node {
        int dim;
        double split;
        int size;
        Node left;
        Node right;

        public Node(int dim, double split, int size, Node left, Node right) {
            this.dim = dim;
            this.split = split;
            this.size = size;
            this.left = left;
            this.right = right;
        }
    }

    protected static class ForestBuilder {
        Relation<? extends NumberVector> relation;
        ArrayModifiableDBIDs ids;
        DBIDArrayMIter iter;
        double[] min;
        double[] max;
        int[] active;
        int maxheight;
        Random rnd;
        int subsampleSize;

        protected ForestBuilder(Relation<? extends NumberVector> relation, int subsampleSize, Random random) {
            this.relation = relation;
            int dim = RelationUtil.dimensionality(relation);
            this.subsampleSize = subsampleSize;
            this.maxheight = (int)FastMath.ceil((float)FastMath.log2((int)subsampleSize));
            this.min = new double[dim];
            this.max = new double[dim];
            this.active = new int[dim];
            this.ids = DBIDUtil.newArray((DBIDs)relation.getDBIDs());
            this.iter = this.ids.iter();
            this.rnd = random;
        }

        protected Node newTree() {
            DBIDUtil.randomShuffle((ArrayModifiableDBIDs)this.ids, (Random)this.rnd, (int)this.subsampleSize);
            return this.build(0, this.subsampleSize, 0);
        }

        protected Node build(int s, int e, int h) {
            double v;
            int d;
            int size = e - s;
            if (h >= this.maxheight || size <= 1) {
                return new Node(-1, Double.NaN, size, null, null);
            }
            Arrays.fill(this.min, Double.MAX_VALUE);
            Arrays.fill(this.max, -1.7976931348623157E308);
            int dim = this.min.length;
            this.iter.seek(s);
            while (this.iter.getOffset() < e) {
                NumberVector o = (NumberVector)this.relation.get((DBIDRef)this.iter);
                for (d = 0; d < dim; ++d) {
                    v = o.doubleValue(d);
                    this.min[d] = v < this.min[d] ? v : this.min[d];
                    this.max[d] = v > this.max[d] ? v : this.max[d];
                }
                this.iter.advance();
            }
            int numactive = 0;
            for (int j = 0; j < dim; ++j) {
                if (!(this.min[j] < this.max[j])) continue;
                this.active[numactive++] = j;
            }
            if (numactive == 0) {
                return new Node(-1, Double.NaN, size, null, null);
            }
            d = this.active[this.rnd.nextInt(numactive)];
            v = this.min[d] + (this.max[d] - this.min[d]) * this.rnd.nextDouble();
            int i = s;
            int j = e - 1;
            while (i < j) {
                while (i < j && ((NumberVector)this.relation.get((DBIDRef)this.iter.seek(i))).doubleValue(d) <= v) {
                    ++i;
                }
                while (i < j && ((NumberVector)this.relation.get((DBIDRef)this.iter.seek(j))).doubleValue(d) > v) {
                    --j;
                }
                if (i >= j) continue;
                this.ids.swap(i++, j--);
            }
            return new Node(d, v, size, this.build(s, i, h + 1), this.build(i, e, h + 1));
        }
    }
}

