/*
 * Decompiled with CFR 0.152.
 */
package elki.algorithm.statistics;

import elki.Algorithm;
import elki.data.DoubleVector;
import elki.data.NumberVector;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.ids.DBIDMIter;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDUtil;
import elki.database.ids.DBIDs;
import elki.database.ids.ModifiableDBIDs;
import elki.database.query.QueryBuilder;
import elki.database.query.knn.KNNSearcher;
import elki.database.relation.Relation;
import elki.database.relation.RelationUtil;
import elki.distance.NumberVectorDistance;
import elki.distance.minkowski.EuclideanDistance;
import elki.logging.Logging;
import elki.logging.statistics.DoubleStatistic;
import elki.logging.statistics.LongStatistic;
import elki.logging.statistics.Statistic;
import elki.math.MathUtil;
import elki.math.MeanVariance;
import elki.math.statistics.distribution.BetaDistribution;
import elki.utilities.documentation.Reference;
import elki.utilities.exceptions.AbortException;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.ParameterException;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.WrongParameterValueException;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.constraints.ParameterConstraint;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.DoubleListParameter;
import elki.utilities.optionhandling.parameters.IntParameter;
import elki.utilities.optionhandling.parameters.ObjectParameter;
import elki.utilities.optionhandling.parameters.Parameter;
import elki.utilities.optionhandling.parameters.RandomParameter;
import elki.utilities.random.RandomFactory;
import java.util.Arrays;
import java.util.Random;

@Reference(authors="B. Hopkins, J. G. Skellam", title="A new method for determining the type of distribution of plant individuals", booktitle="Annals of Botany, 18(2), 213-227", url="https://doi.org/10.1093/oxfordjournals.aob.a083391", bibkey="doi:10.1093/oxfordjournals.aob.a083391")
public class HopkinsStatisticClusteringTendency
implements Algorithm {
    private static final Logging LOG = Logging.getLogger(HopkinsStatisticClusteringTendency.class);
    protected int sampleSize;
    protected int rep;
    protected int k;
    protected RandomFactory random;
    private double[] maxima = new double[0];
    private double[] minima = new double[0];
    protected NumberVectorDistance<? super NumberVector> distance;

    public HopkinsStatisticClusteringTendency(NumberVectorDistance<? super NumberVector> distance, int samplesize, RandomFactory random, int rep, int k, double[] minima, double[] maxima) {
        this.distance = distance;
        this.sampleSize = samplesize;
        this.random = random;
        this.rep = rep;
        this.k = k;
        this.minima = minima;
        this.maxima = maxima;
    }

    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array((TypeInformation[])new TypeInformation[]{TypeUtil.NUMBER_VECTOR_FIELD});
    }

    public Double run(Relation<NumberVector> relation) {
        int dim = RelationUtil.dimensionality(relation);
        QueryBuilder qb = new QueryBuilder(relation, this.distance);
        KNNSearcher knnQuery = qb.kNNByObject(this.k + 1);
        KNNSearcher intQuery = qb.kNNByDBID(this.k + 1);
        double[] min = new double[dim];
        double[] extend = new double[dim];
        this.initializeDataExtends(relation, dim, min, extend);
        if (!LOG.isStatistics()) {
            LOG.warning((CharSequence)("This algorithm must be used with at least logging level " + Logging.Level.STATISTICS));
        }
        MeanVariance hmean = new MeanVariance();
        MeanVariance umean = new MeanVariance();
        MeanVariance wmean = new MeanVariance();
        for (int j = 0; j < this.rep; ++j) {
            double w = this.computeNNForRealData((KNNSearcher<DBIDRef>)intQuery, relation, dim);
            double u = this.computeNNForUniformData((KNNSearcher<NumberVector>)knnQuery, min, extend);
            double h = u / (u + w);
            hmean.put(h);
            umean.put(u);
            wmean.put(w);
        }
        String prefix = this.getClass().getName();
        LOG.statistics((Statistic)new LongStatistic(prefix + ".samplesize", (long)this.sampleSize));
        LOG.statistics((Statistic)new LongStatistic(prefix + ".dim", (long)dim));
        LOG.statistics((Statistic)new LongStatistic(prefix + ".hopkins.nearest-neighbor", (long)this.k));
        LOG.statistics((Statistic)new DoubleStatistic(prefix + ".hopkins.h.mean", hmean.getMean()));
        LOG.statistics((Statistic)new DoubleStatistic(prefix + ".hopkins.u.mean", umean.getMean()));
        LOG.statistics((Statistic)new DoubleStatistic(prefix + ".hopkins.w.mean", wmean.getMean()));
        if (this.rep > 1) {
            LOG.statistics((Statistic)new DoubleStatistic(prefix + ".hopkins.h.std", hmean.getSampleStddev()));
            LOG.statistics((Statistic)new DoubleStatistic(prefix + ".hopkins.u.std", umean.getSampleStddev()));
            LOG.statistics((Statistic)new DoubleStatistic(prefix + ".hopkins.w.std", wmean.getSampleStddev()));
        }
        double x = hmean.getMean();
        double ix = BetaDistribution.regularizedIncBeta((double)x, (double)this.sampleSize, (double)this.sampleSize);
        double p = x > 0.5 ? 1.0 - ix : ix;
        LOG.statistics((Statistic)new DoubleStatistic(prefix + ".hopkins.p", p));
        return p;
    }

    protected double computeNNForRealData(KNNSearcher<DBIDRef> knnQuery, Relation<NumberVector> relation, int dim) {
        double w = 0.0;
        ModifiableDBIDs dataSampleIds = DBIDUtil.randomSample((DBIDs)relation.getDBIDs(), (int)this.sampleSize, (RandomFactory)this.random);
        DBIDMIter iter = dataSampleIds.iter();
        while (iter.valid()) {
            double kdist = knnQuery.getKNN((Object)iter, this.k + 1).getKNNDistance();
            w += MathUtil.powi((double)kdist, (int)dim);
            iter.advance();
        }
        return w;
    }

    protected double computeNNForUniformData(KNNSearcher<NumberVector> knnQuery, double[] min, double[] extend) {
        Random rand = this.random.getSingleThreadedRandom();
        int dim = min.length;
        double[] buf = new double[dim];
        double u = 0.0;
        for (int i = 0; i < this.sampleSize; ++i) {
            for (int d = 0; d < buf.length; ++d) {
                buf[d] = min[d] + rand.nextDouble() * extend[d];
            }
            double kdist = knnQuery.getKNN((Object)DoubleVector.wrap((double[])buf), this.k).getKNNDistance();
            u += MathUtil.powi((double)kdist, (int)dim);
        }
        return u;
    }

    protected void initializeDataExtends(Relation<NumberVector> relation, int dim, double[] min, double[] extend) {
        assert (min.length == dim && extend.length == dim);
        if (this.minima == null || this.maxima == null || this.minima.length == 0 || this.maxima.length == 0) {
            double[][] minmax = RelationUtil.computeMinMax(relation);
            double[] dmin = minmax[0];
            double[] dmax = minmax[1];
            for (int d = 0; d < dim; ++d) {
                min[d] = dmin[d];
                extend[d] = dmax[d] - dmin[d];
            }
            return;
        }
        if (this.minima.length == dim) {
            System.arraycopy(this.minima, 0, min, 0, dim);
        } else if (this.minima.length == 1) {
            Arrays.fill(min, this.minima[0]);
        } else {
            throw new AbortException("Invalid minima specified: expected " + dim + " got minima dimensionality: " + this.minima.length);
        }
        if (this.maxima.length == dim) {
            for (int d = 0; d < dim; ++d) {
                extend[d] = this.maxima[d] - min[d];
            }
            return;
        }
        if (this.maxima.length == 1) {
            for (int d = 0; d < dim; ++d) {
                extend[d] = this.maxima[0] - min[d];
            }
            return;
        }
        throw new AbortException("Invalid maxima specified: expected " + dim + " got maxima dimensionality: " + this.maxima.length);
    }

    public static class Par
    implements Parameterizer {
        public static final OptionID SAMPLESIZE_ID = new OptionID("hopkins.samplesize", "Number of object / random samples to analyze.");
        public static final OptionID REP_ID = new OptionID("hopkins.rep", "The number of times to repeat the experiment (default: 1)");
        public static final OptionID SEED_ID = new OptionID("hopkins.seed", "The random number generator.");
        public static final OptionID MINIMA_ID = new OptionID("hopkins.min", "Minimum values in each dimension. If no value is specified, the minimum value in each dimension will be used. If only one value is specified, this value will be used for all dimensions.");
        public static final OptionID MAXIMA_ID = new OptionID("hopkins.max", "Maximum values in each dimension. If no value is specified, the maximum value in each dimension will be used. If only one value is specified, this value will be used for all dimensions.");
        public static final OptionID K_ID = new OptionID("hopkins.k", "Nearest neighbor to use for the statistic");
        protected NumberVectorDistance<? super NumberVector> distance;
        protected int sampleSize = 0;
        protected int rep = 1;
        protected int k = 1;
        protected RandomFactory random;
        protected double[] maxima = null;
        protected double[] minima = null;

        public void configure(Parameterization config) {
            new ObjectParameter(Algorithm.Utils.DISTANCE_FUNCTION_ID, NumberVectorDistance.class, EuclideanDistance.class).grab(config, x -> {
                this.distance = x;
            });
            ((IntParameter)new IntParameter(REP_ID, 1).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).grab(config, x -> {
                this.rep = x;
            });
            ((IntParameter)new IntParameter(K_ID, 1).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).grab(config, x -> {
                this.k = x;
            });
            ((IntParameter)new IntParameter(SAMPLESIZE_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).grab(config, x -> {
                this.sampleSize = x;
            });
            new RandomParameter(SEED_ID).grab(config, x -> {
                this.random = x;
            });
            DoubleListParameter minimaP = (DoubleListParameter)new DoubleListParameter(MINIMA_ID).setOptional(true);
            minimaP.grab(config, x -> {
                this.minima = (double[])x.clone();
            });
            DoubleListParameter maximaP = (DoubleListParameter)new DoubleListParameter(MAXIMA_ID).setOptional(this.minima == null);
            maximaP.grab(config, x -> {
                this.maxima = (double[])x.clone();
            });
            if (this.minima != null && this.maxima != null && this.minima.length != this.maxima.length) {
                config.reportError((ParameterException)new WrongParameterValueException((Parameter)minimaP, "and", (Parameter)maximaP, "must have the same number of values."));
            }
        }

        public HopkinsStatisticClusteringTendency make() {
            return new HopkinsStatisticClusteringTendency(this.distance, this.sampleSize, this.random, this.rep, this.k, this.minima, this.maxima);
        }
    }
}

