/*
 * Decompiled with CFR 0.152.
 */
package elki.clustering.hierarchical.extraction;

import elki.Algorithm;
import elki.clustering.ClusteringAlgorithm;
import elki.clustering.hierarchical.ClusterMergeHistory;
import elki.clustering.hierarchical.HierarchicalClusteringAlgorithm;
import elki.data.Cluster;
import elki.data.Clustering;
import elki.data.model.Model;
import elki.data.type.TypeInformation;
import elki.database.Database;
import elki.database.ids.ArrayModifiableDBIDs;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDUtil;
import elki.database.ids.DBIDVar;
import elki.database.ids.DBIDs;
import elki.database.ids.ModifiableDBIDs;
import elki.logging.Logging;
import elki.logging.progress.AbstractProgress;
import elki.logging.progress.FiniteProgress;
import elki.result.Metadata;
import elki.utilities.Priority;
import elki.utilities.documentation.Reference;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.constraints.ParameterConstraint;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.IntParameter;
import elki.utilities.optionhandling.parameters.ObjectParameter;
import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
import java.util.ArrayList;

@Reference(authors="Erich Schubert, Michael Gertz", title="Semantic Word Clouds with Background Corpus Normalization and t-distributed Stochastic Neighbor Embedding", booktitle="ArXiV preprint, 1708.03569", url="http://arxiv.org/abs/1708.03569", bibkey="DBLP:journals/corr/abs-1708-03569")
@Priority(value=206)
public class ClustersWithNoiseExtraction
implements ClusteringAlgorithm<Clustering<Model>> {
    private static final Logging LOG = Logging.getLogger(ClustersWithNoiseExtraction.class);
    private int numCl = 1;
    private int minClSize = 1;
    private HierarchicalClusteringAlgorithm algorithm;

    public ClustersWithNoiseExtraction(HierarchicalClusteringAlgorithm algorithm, int numCl, int minClSize) {
        this.algorithm = algorithm;
        this.numCl = numCl;
        this.minClSize = minClSize;
    }

    @Override
    public Clustering<Model> autorun(Database database) {
        return this.run(this.algorithm.autorun(database));
    }

    public Clustering<Model> run(ClusterMergeHistory merges) {
        Clustering<Model> result = new Instance(merges).run();
        Metadata.hierarchyOf(result).addChild((Object)merges);
        return result;
    }

    public TypeInformation[] getInputTypeRestriction() {
        return this.algorithm.getInputTypeRestriction();
    }

    public static class Par
    implements Parameterizer {
        public static final OptionID K_ID = new OptionID("extract.k", "The number of clusters to extract.");
        public static final OptionID MINCLUSTERSIZE_ID = new OptionID("extract.minclsize", "The minimum cluster size.");
        int numCl = 1;
        int minClSize = 1;
        HierarchicalClusteringAlgorithm algorithm;

        public void configure(Parameterization config) {
            new ObjectParameter(Algorithm.Utils.ALGORITHM_ID, HierarchicalClusteringAlgorithm.class).grab(config, x -> {
                this.algorithm = x;
            });
            ((IntParameter)new IntParameter(K_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).grab(config, x -> {
                this.numCl = x;
            });
            ((IntParameter)new IntParameter(MINCLUSTERSIZE_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).grab(config, x -> {
                this.minClSize = x;
            });
        }

        public ClustersWithNoiseExtraction make() {
            return new ClustersWithNoiseExtraction(this.algorithm, this.numCl, this.minClSize);
        }
    }

    protected class Instance {
        protected ClusterMergeHistory merges;

        public Instance(ClusterMergeHistory merges) {
            this.merges = merges;
        }

        public Clustering<Model> run() {
            int n = this.merges.size();
            int curGood = 0;
            int bestCl = n;
            int bestOff = n - bestCl - 1;
            FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Finding best threshold", this.merges.numMerges(), LOG) : null;
            int m = this.merges.numMerges();
            for (int i = 0; i < m; ++i) {
                int a = this.merges.getMergeA(i);
                int b = this.merges.getMergeB(i);
                int sa = a < n ? 1 : this.merges.getSize(a - n);
                int sb = b < n ? 1 : this.merges.getSize(b - n);
                int sc = this.merges.getSize(i);
                if ((curGood += (sa >= ClustersWithNoiseExtraction.this.minClSize ? -1 : 0) + (sb >= ClustersWithNoiseExtraction.this.minClSize ? -1 : 0) + (sc >= ClustersWithNoiseExtraction.this.minClSize ? 1 : 0)) == ClustersWithNoiseExtraction.this.numCl || Math.abs(curGood - ClustersWithNoiseExtraction.this.numCl) < Math.abs(bestCl - ClustersWithNoiseExtraction.this.numCl)) {
                    bestCl = curGood;
                    bestOff = i;
                }
                LOG.incrementProcessed((AbstractProgress)progress);
            }
            LOG.ensureCompleted(progress);
            if (bestCl != ClustersWithNoiseExtraction.this.numCl) {
                LOG.warning((CharSequence)("Could not find a result with exactly " + ClustersWithNoiseExtraction.this.numCl + " clusters (+ noise), generating " + bestCl + " clusters instead."));
            }
            progress = LOG.isVerbose() ? new FiniteProgress("Performing cluster merges", bestOff, LOG) : null;
            Int2IntOpenHashMap leafMap = new Int2IntOpenHashMap(this.merges.size());
            leafMap.defaultReturnValue(-1);
            ArrayList<ArrayModifiableDBIDs> clusterMembers = new ArrayList<ArrayModifiableDBIDs>(this.merges.size() - bestCl + 2);
            clusterMembers.add(DBIDUtil.newArray());
            DBIDVar tmp = DBIDUtil.newVar();
            for (int i = bestOff; i >= 0; --i) {
                int a = this.merges.getMergeA(i);
                int b = this.merges.getMergeB(i);
                int c = leafMap.remove(i + n);
                ArrayModifiableDBIDs ci = null;
                if (c < 0 && this.merges.getSize(i) < ClustersWithNoiseExtraction.this.minClSize) {
                    c = 0;
                }
                if (c < 0) {
                    c = clusterMembers.size();
                    ci = DBIDUtil.newArray();
                    clusterMembers.add(ci);
                } else {
                    ci = (ModifiableDBIDs)clusterMembers.get(c);
                }
                if (b < n) {
                    ci.add((DBIDRef)this.merges.assignVar(b, tmp));
                } else {
                    leafMap.put(b, c);
                }
                if (a < n) {
                    ci.add((DBIDRef)this.merges.assignVar(a, tmp));
                } else {
                    leafMap.put(a, c);
                }
                LOG.incrementProcessed((AbstractProgress)progress);
            }
            LOG.ensureCompleted(progress);
            ArrayList toplevel = new ArrayList(bestCl + 1);
            if (!((ModifiableDBIDs)clusterMembers.get(0)).isEmpty()) {
                toplevel.add(new Cluster("Noise", (DBIDs)clusterMembers.get(0), true));
            }
            for (int i = 1; i < clusterMembers.size(); ++i) {
                toplevel.add(new Cluster((DBIDs)clusterMembers.get(i)));
            }
            Clustering<Model> dendrogram = new Clustering<Model>(toplevel);
            Metadata.of(dendrogram).setLongName("Hierarchical Clustering");
            return dendrogram;
        }
    }
}

