/*
 * This file is part of ELKI:
 * Environment for Developing KDD-Applications Supported by Index-Structures
 *
 * Copyright (C) 2022
 * ELKI Development Team
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
package elki.outlier.lof;

import elki.Algorithm;
import elki.data.spatial.SpatialComparable;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.datastore.DataStoreFactory;
import elki.database.datastore.DataStoreUtil;
import elki.database.datastore.DoubleDataStore;
import elki.database.datastore.WritableDoubleDataStore;
import elki.database.ids.*;
import elki.database.query.QueryBuilder;
import elki.database.query.knn.KNNSearcher;
import elki.database.relation.DoubleRelation;
import elki.database.relation.MaterializedDoubleRelation;
import elki.database.relation.Relation;
import elki.database.relation.RelationUtil;
import elki.distance.Distance;
import elki.distance.minkowski.EuclideanDistance;
import elki.logging.Logging;
import elki.logging.progress.FiniteProgress;
import elki.logging.progress.StepProgress;
import elki.math.DoubleMinMax;
import elki.math.MathUtil;
import elki.math.statistics.distribution.GammaDistribution;
import elki.outlier.OutlierAlgorithm;
import elki.result.outlier.BasicOutlierScoreMeta;
import elki.result.outlier.OutlierResult;
import elki.result.outlier.OutlierScoreMeta;
import elki.utilities.documentation.Reference;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.IntParameter;
import elki.utilities.optionhandling.parameters.ObjectParameter;

import net.jafama.FastMath;

/**
 * Variance of Volume for outlier detection.
 * <p>
 * The volume is estimated by the distance to the k-nearest neighbor, then
 * the variance of volume is computed.
 * <p>
 * Unfortunately, this approach needs an enormous numerical precision, and may
 * not work for high-dimensional, non-normalized data. We therefore divide each
 * volume by the average across the data set. This means values are even less
 * comparable across data sets, but this avoids some of the numerical problems
 * of this method.
 * <p>
 * Reference:
 * <p>
 * T. Hu, S. Y. Sung<br>
 * Detecting pattern-based outliers<br>
 * Pattern Recognition Letters 24(16)
 *
 * @author Erich Schubert
 * @since 0.7.0
 *
 * @has - - - KNNSearcher
 *
 * @param <O> the type of data objects handled by this algorithm
 */
@Reference(authors = "T. Hu, S. Y. Sung", //
    title = "Detecting pattern-based outliers", //
    booktitle = "Pattern Recognition Letters 24(16)", //
    url = "https://doi.org/10.1016/S0167-8655(03)00165-X", //
    bibkey = "DBLP:journals/prl/HuS03")
public class VarianceOfVolume<O extends SpatialComparable> implements OutlierAlgorithm {
  /**
   * The logger for this class.
   */
  private static final Logging LOG = Logging.getLogger(VarianceOfVolume.class);

  /**
   * Distance function used.
   */
  protected Distance<? super O> distance;

  /**
   * The number of neighbors to query (plus the query point!)
   */
  protected int kplus;

  /**
   * Constructor.
   *
   * @param k number of neighbors to use for comparison
   * @param distance the neighborhood distance function
   */
  public VarianceOfVolume(int k, Distance<? super O> distance) {
    super();
    this.distance = distance;
    this.kplus = k + 1; // + query point
  }

  @Override
  public TypeInformation[] getInputTypeRestriction() {
    return TypeUtil.array(distance.getInputTypeRestriction());
  }

  /**
   * Runs the VOV algorithm on the given database.
   *
   * @param relation Data to process
   * @return VOV outlier result
   */
  public OutlierResult run(Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("VOV", 3) : null;
    DBIDs ids = relation.getDBIDs();
    int dim = RelationUtil.dimensionality(relation);

    LOG.beginStep(stepprog, 1, "Materializing nearest-neighbor sets.");
    KNNSearcher<DBIDRef> knnq = new QueryBuilder<>(relation, distance).precomputed().kNNByDBID(kplus);

    // Compute Volumes
    LOG.beginStep(stepprog, 2, "Computing Volumes.");
    WritableDoubleDataStore vols = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    computeVolumes(knnq, dim, ids, vols);

    // compute VOV of each object
    LOG.beginStep(stepprog, 3, "Computing Variance of Volumes (VOV).");
    WritableDoubleDataStore vovs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
    // track the maximum value for normalization.
    DoubleMinMax vovminmax = new DoubleMinMax();
    computeVOVs(knnq, ids, vols, vovs, vovminmax);

    LOG.setCompleted(stepprog);

    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Variance of Volume", ids, vovs);
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(vovminmax.getMin(), vovminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    return new OutlierResult(scoreMeta, scoreResult);
  }

  /**
   * Compute volumes
   *
   * @param knnq KNN query
   * @param dim Data dimensionality
   * @param ids IDs to process
   * @param vols Volume storage
   */
  private void computeVolumes(KNNSearcher<DBIDRef> knnq, int dim, DBIDs ids, WritableDoubleDataStore vols) {
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Volume", ids.size(), LOG) : null;
    double scaleconst = MathUtil.SQRTPI * FastMath.pow(GammaDistribution.gamma(1 + dim * .5), -1. / dim);
    boolean warned = false;
    double sum = 0.;
    for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
      double dk = knnq.getKNN(iter, kplus).getKNNDistance();
      double vol = dk > 0 ? MathUtil.powi(dk * scaleconst, dim) : 0.;
      if(vol == Double.POSITIVE_INFINITY && !warned) {
        LOG.warning("Variance of Volumes has hit double precision limits, results are not reliable.");
        warned = true;
      }
      vols.putDouble(iter, vol);
      sum += vol;
      LOG.incrementProcessed(prog);
    }
    double scaling = ids.size() / sum;
    for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
      vols.putDouble(iter, vols.doubleValue(iter) * scaling);
    }
    LOG.ensureCompleted(prog);
  }

  /**
   * Compute variance of volumes.
   *
   * @param knnq KNN query
   * @param ids IDs to process
   * @param vols Volumes
   * @param vovs Variance of Volume storage
   * @param vovminmax Score minimum/maximum tracker
   */
  private void computeVOVs(KNNSearcher<DBIDRef> knnq, DBIDs ids, DoubleDataStore vols, WritableDoubleDataStore vovs, DoubleMinMax vovminmax) {
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Variance of Volume", ids.size(), LOG) : null;
    boolean warned = false;
    for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
      KNNList knns = knnq.getKNN(iter, kplus);
      DoubleDBIDListIter it = knns.iter();
      double vbar = 0.;
      for(; it.valid(); it.advance()) {
        vbar += vols.doubleValue(it);
      }
      vbar /= knns.size(); // Average
      double vov = 0.;
      for(it.seek(0); it.valid(); it.advance()) {
        double v = vols.doubleValue(it) - vbar;
        vov += v * v;
      }
      if(!(vov < Double.POSITIVE_INFINITY) && !warned) {
        LOG.warning("Variance of Volumes has hit double precision limits, results are not reliable.");
        warned = true;
      }
      vov = (vov < Double.POSITIVE_INFINITY) ? vov / (knns.size() - 1) : Double.POSITIVE_INFINITY;
      vovs.putDouble(iter, vov);
      // update minimum and maximum
      vovminmax.put(vov);
      LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
  }

  /**
   * Parameterization class.
   *
   * @author Erich Schubert
   *
   * @hidden
   *
   * @param <O> Object type
   */
  public static class Par<O extends SpatialComparable> implements Parameterizer {
    /**
     * Parameter to specify the number of nearest neighbors of an object to be
     * considered for computing its VOV score, must be an integer greater than
     * or equal to 1.
     */
    public static final OptionID K_ID = new OptionID("vov.k", "The number of nearest neighbors (not including the query point) of an object to be considered for computing its VOV score.");

    /**
     * The distance function to use.
     */
    protected Distance<? super O> distance;

    /**
     * The neighborhood size to use.
     */
    protected int k = 2;

    @Override
    public void configure(Parameterization config) {
      new ObjectParameter<Distance<? super O>>(Algorithm.Utils.DISTANCE_FUNCTION_ID, Distance.class, EuclideanDistance.class) //
          .grab(config, x -> distance = x);
      new IntParameter(K_ID) //
          .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT) //
          .grab(config, x -> k = x);
    }

    @Override
    public VarianceOfVolume<O> make() {
      return new VarianceOfVolume<>(k, distance);
    }
  }
}
