/*
 * Decompiled with CFR 0.152.
 */
package elki.clustering;

import elki.Algorithm;
import elki.clustering.ClusteringAlgorithm;
import elki.data.Cluster;
import elki.data.Clustering;
import elki.data.model.PrototypeModel;
import elki.data.model.SimplePrototypeModel;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.ids.ArrayModifiableDBIDs;
import elki.database.ids.DBIDMIter;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDUtil;
import elki.database.ids.DBIDVar;
import elki.database.ids.DBIDs;
import elki.database.ids.HashSetModifiableDBIDs;
import elki.database.query.QueryBuilder;
import elki.database.query.distance.DistanceQuery;
import elki.database.relation.Relation;
import elki.distance.Distance;
import elki.distance.minkowski.EuclideanDistance;
import elki.logging.Logging;
import elki.logging.progress.FiniteProgress;
import elki.result.Metadata;
import elki.utilities.documentation.Reference;
import elki.utilities.exceptions.AbortException;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.ParameterException;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.WrongParameterValueException;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.DoubleParameter;
import elki.utilities.optionhandling.parameters.ObjectParameter;
import elki.utilities.optionhandling.parameters.Parameter;
import java.util.ArrayList;

@Reference(authors="A. McCallum, K. Nigam, L. H. Ungar", title="Efficient Clustering of High Dimensional Data Sets with Application to Reference Matching", booktitle="Proc. 6th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining", url="https://doi.org/10.1145/347090.347123", bibkey="DBLP:conf/kdd/McCallumNU00")
public class CanopyPreClustering<O>
implements ClusteringAlgorithm<Clustering<PrototypeModel<O>>> {
    private static final Logging LOG = Logging.getLogger(CanopyPreClustering.class);
    private Distance<? super O> distance;
    private double t1;
    private double t2;

    public CanopyPreClustering(Distance<? super O> distance, double t1, double t2) {
        this.distance = distance;
        this.t1 = t1;
        this.t2 = t2;
    }

    public Clustering<PrototypeModel<O>> run(Relation<O> relation) {
        if (!(this.t1 >= this.t2)) {
            throw new AbortException("T1 must be at least as large as T2.");
        }
        DistanceQuery dq = new QueryBuilder(relation, this.distance).distanceQuery();
        HashSetModifiableDBIDs ids = DBIDUtil.newHashSet((DBIDs)relation.getDBIDs());
        ArrayList clusters = new ArrayList();
        int size = relation.size();
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Canopy clustering", size, LOG) : null;
        DBIDVar first = DBIDUtil.newVar();
        while (!ids.isEmpty()) {
            ids.pop(first);
            ArrayModifiableDBIDs cids = DBIDUtil.newArray();
            cids.add((DBIDRef)first);
            DBIDMIter iter = ids.iter();
            while (iter.valid()) {
                double dist = dq.distance((DBIDRef)first, (DBIDRef)iter);
                if (!(dist > this.t1)) {
                    cids.add((DBIDRef)iter);
                    if (dist <= this.t2) {
                        iter.remove();
                    }
                }
                iter.advance();
            }
            clusters.add(new Cluster<SimplePrototypeModel<Object>>((DBIDs)cids, new SimplePrototypeModel<Object>(relation.get((DBIDRef)first))));
            if (prog == null) continue;
            prog.setProcessed(size - ids.size(), LOG);
        }
        LOG.ensureCompleted(prog);
        Clustering<PrototypeModel<O>> clustering = new Clustering<PrototypeModel<O>>(clusters);
        Metadata.of(clustering).setLongName("Canopy clustering");
        return clustering;
    }

    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array((TypeInformation[])new TypeInformation[]{this.distance.getInputTypeRestriction()});
    }

    public static class Par<O>
    implements Parameterizer {
        public static final OptionID T1_ID = new OptionID("canopy.t1", "Inclusion threshold for canopy clustering. t1 >= t2!");
        public static final OptionID T2_ID = new OptionID("canopy.t2", "Removal threshold for canopy clustering. t1 >= t2!");
        private double t1;
        private double t2;
        protected Distance<? super O> distance;

        public void configure(Parameterization config) {
            new ObjectParameter(Algorithm.Utils.DISTANCE_FUNCTION_ID, Distance.class, EuclideanDistance.class).grab(config, x -> {
                this.distance = x;
            });
            DoubleParameter t1P = new DoubleParameter(T1_ID);
            t1P.grab(config, x -> {
                this.t1 = x;
            });
            DoubleParameter t2P = new DoubleParameter(T2_ID);
            t2P.grab(config, x -> {
                this.t2 = x;
            });
            if (this.t1 < this.t2) {
                config.reportError((ParameterException)new WrongParameterValueException((Parameter)t1P, "must be larger than", (Parameter)t2P, ""));
            }
        }

        public CanopyPreClustering<O> make() {
            return new CanopyPreClustering<O>(this.distance, this.t1, this.t2);
        }
    }
}

