/*
 * Decompiled with CFR 0.152.
 */
package elki.algorithm.statistics;

import elki.Algorithm;
import elki.clustering.trivial.ByLabelOrAllInOneClustering;
import elki.data.Cluster;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.Database;
import elki.database.ids.ArrayModifiableDBIDs;
import elki.database.ids.DBID;
import elki.database.ids.DBIDArrayMIter;
import elki.database.ids.DBIDIter;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDUtil;
import elki.database.ids.DoubleDBIDPair;
import elki.database.ids.HashSetModifiableDBIDs;
import elki.database.query.QueryBuilder;
import elki.database.query.distance.DistanceQuery;
import elki.database.relation.Relation;
import elki.distance.Distance;
import elki.distance.minkowski.EuclideanDistance;
import elki.logging.Logging;
import elki.logging.progress.AbstractProgress;
import elki.logging.progress.FiniteProgress;
import elki.logging.progress.StepProgress;
import elki.math.DoubleMinMax;
import elki.math.MeanVariance;
import elki.result.HistogramResult;
import elki.result.Metadata;
import elki.utilities.datastructures.histogram.AbstractObjDynamicHistogram;
import elki.utilities.datastructures.histogram.ObjHistogram;
import elki.utilities.documentation.Description;
import elki.utilities.documentation.Title;
import elki.utilities.exceptions.EmptyDataException;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.constraints.ParameterConstraint;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.Flag;
import elki.utilities.optionhandling.parameters.IntParameter;
import elki.utilities.optionhandling.parameters.ObjectParameter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.TreeSet;
import net.jafama.FastMath;

@Title(value="Distance Histogram")
@Description(value="Computes a histogram over the distances occurring in the data set.")
public class DistanceStatisticsWithClasses<O>
implements Algorithm {
    private static final Logging LOG = Logging.getLogger(DistanceStatisticsWithClasses.class);
    protected Distance<? super O> distance;
    protected int numbin;
    protected boolean sampling = false;
    protected boolean exact = false;

    public DistanceStatisticsWithClasses(Distance<? super O> distance, int numbins, boolean exact, boolean sampling) {
        this.distance = distance;
        this.numbin = numbins;
        this.exact = exact;
        this.sampling = sampling;
    }

    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array((TypeInformation[])new TypeInformation[]{this.distance.getInputTypeRestriction()});
    }

    public HistogramResult run(Database database, Relation<O> relation) {
        DistanceQuery dq = new QueryBuilder(relation, this.distance).distanceQuery();
        StepProgress stepprog = LOG.isVerbose() ? new StepProgress("Distance statistics", 2) : null;
        DoubleMinMax gminmax = new DoubleMinMax();
        List split = new ByLabelOrAllInOneClustering().autorun(database).getAllClusters();
        DoubleMinMax giminmax = new DoubleMinMax();
        DoubleMinMax gominmax = new DoubleMinMax();
        MeanVariance mimin = new MeanVariance();
        MeanVariance mimax = new MeanVariance();
        MeanVariance midif = new MeanVariance();
        MeanVariance momin = new MeanVariance();
        MeanVariance momax = new MeanVariance();
        MeanVariance modif = new MeanVariance();
        LOG.beginStep(stepprog, 1, "Prepare histogram.");
        if (this.exact) {
            gminmax = this.exactMinMax(relation, dq);
        } else if (this.sampling) {
            gminmax = this.sampleMinMax(relation, dq);
        }
        Object histogram = gminmax.isValid() ? new ObjHistogram(this.numbin, gminmax.getMin(), gminmax.getMax(), () -> new long[2]) : new AbstractObjDynamicHistogram<long[]>(this.numbin){

            protected long[] downsample(Object[] data, int start, int end, int size) {
                long[] ret = new long[2];
                for (int i = start; i < end; ++i) {
                    long[] existing = (long[])data[i];
                    if (existing == null) continue;
                    for (int c = 0; c < 2; ++c) {
                        int n = c;
                        ret[n] = ret[n] + existing[c];
                    }
                }
                return ret;
            }

            protected long[] aggregate(long[] first, long[] second) {
                for (int c = 0; c < 2; ++c) {
                    int n = c;
                    first[n] = first[n] + second[c];
                }
                return first;
            }

            protected long[] cloneForCache(long[] data) {
                return (long[])data.clone();
            }

            protected long[] makeObject() {
                return new long[2];
            }
        };
        LOG.beginStep(stepprog, 2, "Build histogram.");
        FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Distance computations", relation.size(), LOG) : null;
        for (Cluster c1 : split) {
            DBIDIter id1 = c1.getIDs().iter();
            while (id1.valid()) {
                DoubleMinMax iminmax = new DoubleMinMax();
                DBIDIter iter2 = c1.getIDs().iter();
                while (iter2.valid()) {
                    if (!DBIDUtil.equal((DBIDRef)id1, (DBIDRef)iter2)) {
                        double d = dq.distance((DBIDRef)id1, (DBIDRef)iter2);
                        long[] lArray = (long[])histogram.get(d);
                        lArray[0] = lArray[0] + 1L;
                        iminmax.put(d);
                    }
                    iter2.advance();
                }
                mimin.put(iminmax.getMin());
                mimax.put(iminmax.getMax());
                midif.put(iminmax.getDiff());
                giminmax.put(iminmax.getMin());
                giminmax.put(iminmax.getMax());
                DoubleMinMax ominmax = new DoubleMinMax();
                for (Cluster c2 : split) {
                    if (c2 == c1) continue;
                    DBIDIter iter22 = c2.getIDs().iter();
                    while (iter22.valid()) {
                        if (!DBIDUtil.equal((DBIDRef)id1, (DBIDRef)iter22)) {
                            double d = dq.distance((DBIDRef)id1, (DBIDRef)iter22);
                            long[] lArray = (long[])histogram.get(d);
                            lArray[1] = lArray[1] + 1L;
                            ominmax.put(d);
                        }
                        iter22.advance();
                    }
                }
                momin.put(ominmax.getMin());
                momax.put(ominmax.getMax());
                modif.put(ominmax.getDiff());
                gominmax.put(ominmax.getMin());
                gominmax.put(ominmax.getMax());
                LOG.incrementProcessed((AbstractProgress)progress);
                id1.advance();
            }
        }
        LOG.ensureCompleted(progress);
        gminmax.put(gominmax);
        LOG.setCompleted(stepprog);
        long inum = 0L;
        long onum = 0L;
        ObjHistogram.Iter iter = histogram.iter();
        while (iter.valid()) {
            inum += ((long[])iter.getValue())[0];
            onum += ((long[])iter.getValue())[1];
            iter.advance();
        }
        long bnum = inum + onum;
        ArrayList<double[]> binstat = new ArrayList<double[]>(this.numbin);
        ObjHistogram.Iter iter2 = histogram.iter();
        while (iter2.valid()) {
            long[] value = (long[])iter2.getValue();
            double icof = inum == 0L ? 0.0 : (double)value[0] / (double)inum / histogram.getBinsize();
            double icaf = (double)value[0] / (double)bnum / histogram.getBinsize();
            double ocof = onum == 0L ? 0.0 : (double)value[1] / (double)onum / histogram.getBinsize();
            double ocaf = (double)value[1] / (double)bnum / histogram.getBinsize();
            binstat.add(new double[]{iter2.getCenter(), icof, icaf, ocof, ocaf});
            iter2.advance();
        }
        HistogramResult result = new HistogramResult(binstat);
        Metadata.of((Object)result).setLongName("Distance Histogram");
        result.addHeader("Absolute minimum distance (abs): " + gminmax.getMin());
        result.addHeader("Absolute maximum distance (abs): " + gminmax.getMax());
        result.addHeader("In-Cluster minimum distance (abs, avg, stddev): " + giminmax.getMin() + " " + mimin.getMean() + " " + mimin.getSampleStddev());
        result.addHeader("In-Cluster maximum distance (abs, avg, stddev): " + giminmax.getMax() + " " + mimax.getMean() + " " + mimax.getSampleStddev());
        result.addHeader("Other-Cluster minimum distance (abs, avg, stddev): " + gominmax.getMin() + " " + momin.getMean() + " " + momin.getSampleStddev());
        result.addHeader("Other-Cluster maximum distance (abs, avg, stddev): " + gominmax.getMax() + " " + momax.getMean() + " " + momax.getSampleStddev());
        result.addHeader("Column description: bin center, in-cluster only frequency, in-cluster all frequency, other-cluster only frequency, other cluster all frequency");
        result.addHeader("In-cluster value count: " + inum + " other cluster value count: " + onum);
        return result;
    }

    private DoubleMinMax sampleMinMax(Relation<O> relation, DistanceQuery<O> distance) {
        int size = relation.size();
        Random rnd = new Random();
        int k = (int)Math.max(25.0, FastMath.pow((double)relation.size(), (double)0.2));
        TreeSet<DoubleDBIDPair> minhotset = new TreeSet<DoubleDBIDPair>();
        TreeSet<DoubleDBIDPair> maxhotset = new TreeSet<DoubleDBIDPair>(Collections.reverseOrder());
        int randomsize = (int)Math.max(25.0, FastMath.pow((double)relation.size(), (double)0.2));
        double rprob = (double)randomsize / (double)size;
        ArrayModifiableDBIDs randomset = DBIDUtil.newArray((int)randomsize);
        DBIDIter iter = relation.iterDBIDs();
        if (!iter.valid()) {
            throw new EmptyDataException();
        }
        DBID firstid = DBIDUtil.deref((DBIDRef)iter);
        iter.advance();
        minhotset.add(DBIDUtil.newPair((double)Double.MAX_VALUE, (DBIDRef)firstid));
        maxhotset.add(DBIDUtil.newPair((double)Double.MIN_VALUE, (DBIDRef)firstid));
        while (iter.valid()) {
            ArrayList<DoubleDBIDPair> np = new ArrayList<DoubleDBIDPair>(k * 2 + randomsize * 2);
            for (DoubleDBIDPair pair : minhotset) {
                if (DBIDUtil.equal((DBIDRef)iter, (DBIDRef)pair)) continue;
                double d = distance.distance((DBIDRef)iter, (DBIDRef)pair);
                np.add(DBIDUtil.newPair((double)d, (DBIDRef)iter));
                np.add(DBIDUtil.newPair((double)d, (DBIDRef)pair));
            }
            DBIDArrayMIter iter2 = randomset.iter();
            while (iter2.valid()) {
                double d = distance.distance((DBIDRef)iter, (DBIDRef)iter2);
                np.add(DBIDUtil.newPair((double)d, (DBIDRef)iter));
                np.add(DBIDUtil.newPair((double)d, (DBIDRef)iter2));
                iter2.advance();
            }
            minhotset.addAll(np);
            DistanceStatisticsWithClasses.shrinkHeap(minhotset, k);
            ArrayList<DoubleDBIDPair> np2 = new ArrayList<DoubleDBIDPair>(k * 2 + randomsize * 2);
            for (DoubleDBIDPair pair : minhotset) {
                if (DBIDUtil.equal((DBIDRef)iter, (DBIDRef)pair)) continue;
                double d = distance.distance((DBIDRef)iter, (DBIDRef)pair);
                np2.add(DBIDUtil.newPair((double)d, (DBIDRef)iter));
                np2.add(DBIDUtil.newPair((double)d, (DBIDRef)pair));
            }
            DBIDArrayMIter iter22 = randomset.iter();
            while (iter22.valid()) {
                double d = distance.distance((DBIDRef)iter, (DBIDRef)iter22);
                np.add(DBIDUtil.newPair((double)d, (DBIDRef)iter));
                np.add(DBIDUtil.newPair((double)d, (DBIDRef)iter22));
                iter22.advance();
            }
            maxhotset.addAll(np2);
            DistanceStatisticsWithClasses.shrinkHeap(maxhotset, k);
            if (randomset.size() < randomsize) {
                randomset.add((DBIDRef)iter);
            } else if (rnd.nextDouble() < rprob) {
                randomset.set((int)Math.floor(rnd.nextDouble() * (double)randomsize), (DBIDRef)iter);
            }
            iter.advance();
        }
        return new DoubleMinMax(((DoubleDBIDPair)minhotset.first()).doubleValue(), ((DoubleDBIDPair)maxhotset.first()).doubleValue());
    }

    private DoubleMinMax exactMinMax(Relation<O> relation, DistanceQuery<O> distance) {
        FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Exact fitting distance computations", relation.size(), LOG) : null;
        DoubleMinMax minmax = new DoubleMinMax();
        DBIDIter iditer = relation.iterDBIDs();
        while (iditer.valid()) {
            DBIDIter iditer2 = relation.iterDBIDs();
            while (iditer2.valid()) {
                if (!DBIDUtil.equal((DBIDRef)iditer, (DBIDRef)iditer2)) {
                    minmax.put(distance.distance((DBIDRef)iditer, (DBIDRef)iditer2));
                }
                iditer2.advance();
            }
            LOG.incrementProcessed((AbstractProgress)progress);
            iditer.advance();
        }
        LOG.ensureCompleted(progress);
        return minmax;
    }

    private static void shrinkHeap(TreeSet<DoubleDBIDPair> hotset, int k) {
        HashSetModifiableDBIDs seenids = DBIDUtil.newHashSet((int)(2 * k));
        int cnt = 0;
        Iterator<DoubleDBIDPair> i = hotset.iterator();
        while (i.hasNext()) {
            DoubleDBIDPair p = i.next();
            if (cnt > k || seenids.contains((DBIDRef)p)) {
                i.remove();
                continue;
            }
            seenids.add((DBIDRef)p);
            ++cnt;
        }
    }

    public static class Par<O>
    implements Parameterizer {
        public static final OptionID EXACT_ID = new OptionID("diststat.exact", "In a first pass, compute the exact minimum and maximum, at the cost of O(2*n*n) instead of O(n*n). The number of resulting bins is guaranteed to be as requested.");
        public static final OptionID SAMPLING_ID = new OptionID("diststat.sampling", "Enable sampling of O(n) size to determine the minimum and maximum distances approximately. The resulting number of bins can be larger than the given n.");
        public static final OptionID HISTOGRAM_BINS_ID = new OptionID("diststat.bins", "Number of bins to use in the histogram. By default, it is only guaranteed to be within 1*n and 2*n of the given number.");
        protected Distance<? super O> distance;
        protected int numbin = 20;
        protected boolean sampling = false;
        protected boolean exact = false;

        public void configure(Parameterization config) {
            new ObjectParameter(Algorithm.Utils.DISTANCE_FUNCTION_ID, Distance.class, EuclideanDistance.class).grab(config, x -> {
                this.distance = x;
            });
            ((IntParameter)new IntParameter(HISTOGRAM_BINS_ID, 20).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ONE_INT)).grab(config, x -> {
                this.numbin = x;
            });
            new Flag(EXACT_ID).grab(config, x -> {
                this.exact = x;
            });
            if (!this.exact) {
                new Flag(SAMPLING_ID).grab(config, x -> {
                    this.sampling = x;
                });
            }
        }

        public DistanceStatisticsWithClasses<O> make() {
            return new DistanceStatisticsWithClasses<O>(this.distance, this.numbin, this.exact, this.sampling);
        }
    }
}

