/*
 * Decompiled with CFR 0.152.
 */
package elki.clustering;

import elki.clustering.ClusteringAlgorithm;
import elki.data.Cluster;
import elki.data.Clustering;
import elki.data.NumberVector;
import elki.data.model.EMModel;
import elki.data.model.MeanModel;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.ids.ArrayModifiableDBIDs;
import elki.database.ids.DBIDIter;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDUtil;
import elki.database.ids.DBIDs;
import elki.database.ids.ModifiableDBIDs;
import elki.database.relation.Relation;
import elki.database.relation.RelationUtil;
import elki.index.tree.betula.CFTree;
import elki.index.tree.betula.features.ClusterFeature;
import elki.logging.Logging;
import elki.logging.statistics.DoubleStatistic;
import elki.logging.statistics.Statistic;
import elki.math.linearalgebra.VMath;
import elki.result.Metadata;
import elki.utilities.Priority;
import elki.utilities.documentation.Reference;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.Flag;
import it.unimi.dsi.fastutil.objects.Reference2ObjectOpenHashMap;
import java.util.Map;

@Priority(value=-100)
@Reference(authors="Andreas Lang and Erich Schubert", title="BETULA: Fast Clustering of Large Data with Improved BIRCH CF-Trees", booktitle="Information Systems", url="https://doi.org/10.1016/j.is.2021.101918", bibkey="DBLP:journals/is/LangS22")
public class BetulaLeafPreClustering
implements ClusteringAlgorithm<Clustering<MeanModel>> {
    private static final Logging LOG = Logging.getLogger(BetulaLeafPreClustering.class);
    CFTree.Factory<?> cffactory;
    boolean storeIds = false;

    public BetulaLeafPreClustering(CFTree.Factory<?> cffactory, boolean storeIds) {
        this.cffactory = cffactory;
        this.storeIds = storeIds;
    }

    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array((TypeInformation[])new TypeInformation[]{TypeUtil.NUMBER_VECTOR_FIELD});
    }

    /*
     * WARNING - void declaration
     */
    public Clustering<MeanModel> run(Relation<NumberVector> relation) {
        int dim = RelationUtil.dimensionality(relation);
        CFTree<?> tree = this.cffactory.newTree(relation.getDBIDs(), relation, this.storeIds);
        Reference2ObjectOpenHashMap idmap = new Reference2ObjectOpenHashMap(tree.numLeaves());
        if (this.storeIds) {
            CFTree.LeafIterator<?> it = tree.leafIterator();
            while (it.valid()) {
                idmap.put(it.get(), tree.getDBIDs((ClusterFeature)it.get()));
                it.advance();
            }
        } else {
            DBIDIter iter = relation.iterDBIDs();
            while (iter.valid()) {
                void var7_7;
                Object cf = tree.findLeaf((NumberVector)relation.get((DBIDRef)iter));
                ModifiableDBIDs modifiableDBIDs = (ModifiableDBIDs)idmap.get(cf);
                if (modifiableDBIDs == null) {
                    ArrayModifiableDBIDs arrayModifiableDBIDs = DBIDUtil.newArray((int)cf.getWeight());
                    idmap.put(cf, arrayModifiableDBIDs);
                }
                var7_7.add((DBIDRef)iter);
                iter.advance();
            }
        }
        Clustering<MeanModel> result = new Clustering<MeanModel>();
        for (Map.Entry entry : idmap.entrySet()) {
            ClusterFeature leaf = (ClusterFeature)entry.getKey();
            double[] center = leaf.toArray();
            double[] variance = new double[dim];
            for (int i = 0; i < dim; ++i) {
                variance[i] = leaf.variance(i);
            }
            result.addToplevelCluster(new Cluster<EMModel>((DBIDs)entry.getValue(), new EMModel(center, VMath.diagonal((double[])variance))));
        }
        DoubleStatistic varstat = new DoubleStatistic(this.getClass().getName() + ".varsum");
        double d = 0.0;
        CFTree.LeafIterator<?> iter = tree.leafIterator();
        while (iter.valid()) {
            d += iter.get().sumdev();
            iter.advance();
        }
        LOG.statistics((Statistic)varstat.setDouble(d));
        Metadata.of(result).setLongName("BETULA Leaf Nodes");
        return result;
    }

    public static class Par
    implements Parameterizer {
        public static final OptionID STORE_IDS_ID = new OptionID("betula.storeids", "Store IDs when building the tree, and use when assigning to leaves.");
        CFTree.Factory<?> cffactory;
        boolean storeIds = false;

        public void configure(Parameterization config) {
            this.cffactory = (CFTree.Factory)config.tryInstantiate(CFTree.Factory.class);
            new Flag(STORE_IDS_ID).grab(config, x -> {
                this.storeIds = x;
            });
        }

        public BetulaLeafPreClustering make() {
            return new BetulaLeafPreClustering(this.cffactory, this.storeIds);
        }
    }
}

