/*
 * Decompiled with CFR 0.152.
 */
package elki.clustering.trivial;

import elki.clustering.ClusteringAlgorithm;
import elki.clustering.trivial.ReferenceClustering;
import elki.data.Cluster;
import elki.data.Clustering;
import elki.data.model.ClusterModel;
import elki.data.model.Model;
import elki.data.type.NoSupportedDataTypeException;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.Database;
import elki.database.ids.ArrayModifiableDBIDs;
import elki.database.ids.DBID;
import elki.database.ids.DBIDIter;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDUtil;
import elki.database.ids.DBIDs;
import elki.database.ids.HashSetModifiableDBIDs;
import elki.database.ids.ModifiableDBIDs;
import elki.database.relation.Relation;
import elki.result.Metadata;
import elki.utilities.Priority;
import elki.utilities.documentation.Description;
import elki.utilities.documentation.Title;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.Flag;
import elki.utilities.optionhandling.parameters.PatternParameter;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

@Title(value="Clustering by label")
@Description(value="Cluster points by a (pre-assigned!) label. For comparing results with a reference clustering.")
@Priority(value=-100)
public class ByLabelClustering
implements ClusteringAlgorithm<Clustering<Model>> {
    private boolean multiple;
    private Pattern noisepattern = null;

    public ByLabelClustering(boolean multiple, Pattern noisepattern) {
        this.multiple = multiple;
        this.noisepattern = noisepattern;
    }

    public ByLabelClustering() {
        this(false, null);
    }

    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array((TypeInformation[])new TypeInformation[]{TypeUtil.GUESSED_LABEL});
    }

    @Override
    public Clustering<Model> autorun(Database database) {
        try {
            return this.run(database.getRelation((TypeInformation)TypeUtil.CLASSLABEL, new Object[0]));
        }
        catch (NoSupportedDataTypeException e) {
            return this.run(database.getRelation(this.getInputTypeRestriction()[0], new Object[0]));
        }
    }

    public Clustering<Model> run(Relation<?> relation) {
        HashMap<String, DBIDs> labelMap = this.multiple ? this.multipleAssignment(relation) : this.singleAssignment(relation);
        ArrayModifiableDBIDs noiseids = DBIDUtil.newArray();
        ReferenceClustering<Model> result = new ReferenceClustering<Model>();
        Metadata.of(result).setLongName("By Label Clustering");
        for (Map.Entry<String, DBIDs> entry : labelMap.entrySet()) {
            DBIDs ids = entry.getValue();
            if (ids.size() <= 1) {
                noiseids.addDBIDs(ids);
                continue;
            }
            Cluster<ClusterModel> c = new Cluster<ClusterModel>(entry.getKey(), ids, ClusterModel.CLUSTER);
            if (this.noisepattern != null && this.noisepattern.matcher(entry.getKey()).find()) {
                c.setNoise(true);
            }
            result.addToplevelCluster(c);
        }
        if (noiseids.size() > 0) {
            Cluster<ClusterModel> c = new Cluster<ClusterModel>("Noise", (DBIDs)noiseids, ClusterModel.CLUSTER);
            c.setNoise(true);
            result.addToplevelCluster(c);
        }
        return result;
    }

    private HashMap<String, DBIDs> singleAssignment(Relation<?> data) {
        HashMap<String, DBIDs> labelMap = new HashMap<String, DBIDs>();
        DBIDIter iditer = data.iterDBIDs();
        while (iditer.valid()) {
            Object val = data.get((DBIDRef)iditer);
            String label = val != null ? val.toString() : null;
            this.assign(labelMap, label, (DBIDRef)iditer);
            iditer.advance();
        }
        return labelMap;
    }

    private HashMap<String, DBIDs> multipleAssignment(Relation<?> data) {
        HashMap<String, DBIDs> labelMap = new HashMap<String, DBIDs>();
        DBIDIter iditer = data.iterDBIDs();
        while (iditer.valid()) {
            String[] labels;
            for (String label : labels = data.get((DBIDRef)iditer).toString().split(" ")) {
                this.assign(labelMap, label, (DBIDRef)iditer);
            }
            iditer.advance();
        }
        return labelMap;
    }

    private void assign(HashMap<String, DBIDs> labelMap, String label, DBIDRef id) {
        if (labelMap.containsKey(label)) {
            DBIDs exist = labelMap.get(label);
            if (exist instanceof DBID) {
                HashSetModifiableDBIDs n = DBIDUtil.newHashSet();
                n.add((DBIDRef)((DBID)exist));
                n.add(id);
                labelMap.put(label, (DBIDs)n);
            } else {
                assert (exist instanceof HashSetModifiableDBIDs);
                assert (exist.size() > 1);
                ((ModifiableDBIDs)exist).add(id);
            }
        } else {
            labelMap.put(label, (DBIDs)DBIDUtil.deref((DBIDRef)id));
        }
    }

    public static class Par
    implements Parameterizer {
        public static final OptionID MULTIPLE_ID = new OptionID("bylabelclustering.multiple", "Flag to indicate that only subspaces with large coverage (i.e. the fraction of the database that is covered by the dense units) are selected, the rest will be pruned.");
        public static final OptionID NOISE_ID = new OptionID("bylabelclustering.noise", "Pattern to recognize noise classes by their label.");
        protected boolean multiple;
        protected Pattern noisepat;

        public void configure(Parameterization config) {
            new Flag(MULTIPLE_ID).grab(config, x -> {
                this.multiple = x;
            });
            ((PatternParameter)new PatternParameter(NOISE_ID).setOptional(true)).grab(config, x -> {
                this.noisepat = x;
            });
        }

        public ByLabelClustering make() {
            return new ByLabelClustering(this.multiple, this.noisepat);
        }
    }
}

