/*
 * Decompiled with CFR 0.152.
 */
package elki.clustering.kmedoids;

import elki.clustering.kmedoids.PAM;
import elki.clustering.kmedoids.initialization.KMedoidsInitialization;
import elki.data.Clustering;
import elki.data.model.MedoidModel;
import elki.database.datastore.DataStoreUtil;
import elki.database.datastore.WritableIntegerDataStore;
import elki.database.ids.ArrayDBIDs;
import elki.database.ids.ArrayModifiableDBIDs;
import elki.database.ids.DBIDArrayIter;
import elki.database.ids.DBIDIter;
import elki.database.ids.DBIDMIter;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDUtil;
import elki.database.ids.DBIDs;
import elki.database.ids.HashSetModifiableDBIDs;
import elki.database.query.QueryBuilder;
import elki.database.query.distance.DistanceQuery;
import elki.database.relation.Relation;
import elki.distance.Distance;
import elki.logging.Logging;
import elki.logging.progress.AbstractProgress;
import elki.logging.progress.FiniteProgress;
import elki.logging.statistics.DoubleStatistic;
import elki.logging.statistics.Statistic;
import elki.utilities.documentation.Reference;
import elki.utilities.documentation.References;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.constraints.ParameterConstraint;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.DoubleParameter;
import elki.utilities.optionhandling.parameters.Flag;
import elki.utilities.optionhandling.parameters.IntParameter;
import elki.utilities.optionhandling.parameters.RandomParameter;
import elki.utilities.random.RandomFactory;
import it.unimi.dsi.fastutil.longs.Long2DoubleOpenHashMap;
import java.util.Random;

@References(value={@Reference(authors="L. Kaufman, P. J. Rousseeuw", title="Clustering Large Data Sets", booktitle="Pattern Recognition in Practice", url="https://doi.org/10.1016/B978-0-444-87877-9.50039-X", bibkey="doi:10.1016/B978-0-444-87877-9.50039-X"), @Reference(authors="L. Kaufman, P. J. Rousseeuw", title="Clustering Large Applications (Program CLARA)", booktitle="Finding Groups in Data: An Introduction to Cluster Analysis", url="https://doi.org/10.1002/9780470316801.ch3", bibkey="doi:10.1002/9780470316801.ch3")})
public class CLARA<V>
extends PAM<V> {
    private static final Logging LOG = Logging.getLogger(CLARA.class);
    double sampling;
    int numsamples;
    boolean keepmed;
    RandomFactory random;

    public CLARA(Distance<? super V> distance, int k, int maxiter, KMedoidsInitialization<V> initializer, int numsamples, double sampling, boolean keepmed, RandomFactory random) {
        super(distance, k, maxiter, initializer);
        this.numsamples = numsamples;
        this.sampling = sampling;
        this.random = random;
        this.keepmed = keepmed;
    }

    @Override
    public Clustering<MedoidModel> run(Relation<V> relation) {
        return this.run(relation, this.k, (DistanceQuery<? super V>)new QueryBuilder(relation, this.distance).distanceQuery());
    }

    @Override
    public Clustering<MedoidModel> run(Relation<V> relation, int k, DistanceQuery<? super V> distQ) {
        DBIDs ids = relation.getDBIDs();
        int samplesize = Math.min(ids.size(), (int)(this.sampling <= 1.0 ? this.sampling * (double)ids.size() : this.sampling));
        if (samplesize < 3 * k) {
            LOG.warning((CharSequence)"The sampling size is set to a very small value, it should be much larger than k.");
        }
        CachedDistanceQuery<V> cachedQ = new CachedDistanceQuery<V>(distQ, samplesize * (samplesize - 1) >> 1);
        double best = Double.POSITIVE_INFINITY;
        ArrayModifiableDBIDs bestmedoids = null;
        WritableIntegerDataStore bestclusters = null;
        Random rnd = this.random.getSingleThreadedRandom();
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Processing random samples", this.numsamples, LOG) : null;
        for (int j = 0; j < this.numsamples; ++j) {
            DBIDs rids = CLARA.randomSample(ids, samplesize, rnd, this.keepmed ? bestmedoids : null);
            cachedQ.clear();
            ArrayModifiableDBIDs medoids = DBIDUtil.newArray((DBIDs)this.initializer.chooseInitialMedoids(k, rids, cachedQ));
            WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage((DBIDs)ids, (int)3, (int)-1);
            double score = new PAM.Instance(cachedQ, rids, assignment).run(medoids, this.maxiter) + CLARA.assignRemainingToNearestCluster((ArrayDBIDs)medoids, ids, rids, assignment, distQ);
            if (LOG.isStatistics()) {
                LOG.statistics((Statistic)new DoubleStatistic(this.getClass().getName() + ".sample-" + j + ".cost", score));
            }
            if (score < best) {
                best = score;
                bestmedoids = medoids;
                bestclusters = assignment;
            }
            if (cachedQ.hasUncachedQueries()) {
                LOG.warning((CharSequence)"Some distance queries were not cached; maybe the initialization is not optimized for k-medoids.");
            }
            LOG.incrementProcessed((AbstractProgress)prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics((Statistic)new DoubleStatistic(this.getClass().getName() + ".final-cost", best));
        }
        if (bestmedoids == null) {
            throw new IllegalStateException("numsamples must be larger than 0.");
        }
        return CLARA.wrapResult(ids, bestclusters, bestmedoids, "CLARA Clustering");
    }

    static DBIDs randomSample(DBIDs ids, int samplesize, Random rnd, DBIDs previous) {
        if (previous == null) {
            return DBIDUtil.randomSample((DBIDs)ids, (int)samplesize, (Random)rnd);
        }
        HashSetModifiableDBIDs sample = DBIDUtil.newHashSet((int)samplesize);
        sample.addDBIDs(previous);
        sample.addDBIDs((DBIDs)DBIDUtil.randomSample((DBIDs)ids, (int)(samplesize - previous.size()), (Random)rnd));
        if (sample.size() < samplesize) {
            DBIDMIter it = DBIDUtil.randomSample((DBIDs)ids, (int)samplesize, (Random)rnd).iter();
            while (sample.size() < samplesize && it.valid()) {
                sample.add((DBIDRef)it);
                it.advance();
            }
        }
        return sample;
    }

    protected static double assignRemainingToNearestCluster(ArrayDBIDs means, DBIDs ids, DBIDs rids, WritableIntegerDataStore assignment, DistanceQuery<?> distQ) {
        rids = DBIDUtil.ensureSet((DBIDs)rids);
        double distsum = 0.0;
        DBIDArrayIter miter = means.iter();
        DBIDIter iditer = distQ.getRelation().iterDBIDs();
        while (iditer.valid()) {
            if (!rids.contains((DBIDRef)iditer)) {
                double mindist = Double.POSITIVE_INFINITY;
                int minIndex = 0;
                miter.seek(0);
                int i = 0;
                while (miter.valid()) {
                    double dist = distQ.distance((DBIDRef)iditer, (DBIDRef)miter);
                    if (dist < mindist) {
                        minIndex = i;
                        mindist = dist;
                    }
                    miter.advance();
                    ++i;
                }
                distsum += mindist;
                assignment.put((DBIDRef)iditer, minIndex);
            }
            iditer.advance();
        }
        return distsum;
    }

    public static class Par<V>
    extends PAM.Par<V> {
        public static final OptionID NUMSAMPLES_ID = new OptionID("clara.samples", "Number of samples (iterations) to run.");
        public static final OptionID SAMPLESIZE_ID = new OptionID("clara.samplesize", "The size of the sample.");
        public static final OptionID NOKEEPMED_ID = new OptionID("clara.independent", "Draw independent samples (default is to keep the previous best medoids in the sample).");
        public static final OptionID RANDOM_ID = new OptionID("clara.random", "Random generator seed.");
        double sampling;
        int numsamples;
        boolean keepmed;
        RandomFactory random;

        @Override
        public void configure(Parameterization config) {
            super.configure(config);
            ((IntParameter)new IntParameter(NUMSAMPLES_ID, 5).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).grab(config, x -> {
                this.numsamples = x;
            });
            ((DoubleParameter)new DoubleParameter(SAMPLESIZE_ID, (double)(40 + 2 * this.k)).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE)).grab(config, x -> {
                this.sampling = x;
            });
            if (this.numsamples > 1) {
                new Flag(NOKEEPMED_ID).grab(config, x -> {
                    this.keepmed = !x;
                });
            }
            new RandomParameter(RANDOM_ID).grab(config, x -> {
                this.random = x;
            });
        }

        @Override
        public CLARA<V> make() {
            return new CLARA(this.distance, this.k, this.maxiter, this.initializer, this.numsamples, this.sampling, this.keepmed, this.random);
        }
    }

    protected static class CachedDistanceQuery<V>
    implements DistanceQuery<V> {
        DistanceQuery<V> inner;
        Long2DoubleOpenHashMap cache;
        int bad;

        public CachedDistanceQuery(DistanceQuery<V> inner, int size) {
            this.inner = inner;
            this.cache = new Long2DoubleOpenHashMap(size);
            this.cache.defaultReturnValue(Double.NaN);
        }

        public boolean hasUncachedQueries() {
            return this.bad > 0;
        }

        public void clear() {
            this.cache.clear();
            this.bad = 0;
        }

        public double distance(DBIDRef id1, DBIDRef id2) {
            int j;
            if (DBIDUtil.equal((DBIDRef)id1, (DBIDRef)id2)) {
                return 0.0;
            }
            if (DBIDUtil.compare((DBIDRef)id1, (DBIDRef)id2) > 0) {
                return this.distance(id2, id1);
            }
            int i = id1.internalGetIndex();
            long idx = (long)i << 32 | (long)(j = id2.internalGetIndex());
            double v = this.cache.get(idx);
            if (Double.isNaN(v)) {
                v = this.inner.distance(id1, id2);
                this.cache.put(idx, v);
            }
            return v;
        }

        public double distance(V o1, DBIDRef id2) {
            ++this.bad;
            return this.inner.distance(o1, id2);
        }

        public double distance(DBIDRef id1, V o2) {
            ++this.bad;
            return this.inner.distance(id1, o2);
        }

        public double distance(V o1, V o2) {
            ++this.bad;
            return this.inner.distance(o1, o2);
        }

        public Distance<? super V> getDistance() {
            return this.inner.getDistance();
        }

        public Relation<? extends V> getRelation() {
            return this.inner.getRelation();
        }
    }
}

