/*
 * Decompiled with CFR 0.152.
 */
package elki.outlier.clustering;

import elki.clustering.em.EM;
import elki.clustering.em.models.EMClusterModelFactory;
import elki.clustering.em.models.MultivariateGaussianModelFactory;
import elki.data.NumberVector;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.database.datastore.DataStoreUtil;
import elki.database.datastore.DoubleDataStore;
import elki.database.datastore.WritableDataStore;
import elki.database.datastore.WritableDoubleDataStore;
import elki.database.ids.DBIDIter;
import elki.database.ids.DBIDRef;
import elki.database.ids.DBIDs;
import elki.database.relation.DoubleRelation;
import elki.database.relation.MaterializedDoubleRelation;
import elki.database.relation.Relation;
import elki.logging.Logging;
import elki.logging.statistics.DoubleStatistic;
import elki.logging.statistics.LongStatistic;
import elki.logging.statistics.Statistic;
import elki.math.DoubleMinMax;
import elki.outlier.OutlierAlgorithm;
import elki.result.outlier.InvertedOutlierScoreMeta;
import elki.result.outlier.OutlierResult;
import elki.result.outlier.OutlierScoreMeta;
import elki.utilities.documentation.Description;
import elki.utilities.documentation.Title;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.constraints.CommonConstraints;
import elki.utilities.optionhandling.constraints.ParameterConstraint;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.DoubleParameter;
import elki.utilities.optionhandling.parameters.IntParameter;
import elki.utilities.optionhandling.parameters.ObjectParameter;
import java.util.List;

@Title(value="EM Outlier: Outlier Detection based on the generic EM clustering")
@Description(value="The outlier score assigned is based on the highest cluster probability obtained from EM clustering.")
public class EMOutlier<V extends NumberVector>
implements OutlierAlgorithm {
    private static final Logging LOG = Logging.getLogger(EM.class);
    protected int k;
    protected double delta;
    protected EMClusterModelFactory<? super V, ?> mfactory;
    protected int miniter;
    protected int maxiter;
    protected double prior = 0.0;
    protected static final double MIN_LOGLIKELIHOOD = -100000.0;

    public EMOutlier(int k, double delta, EMClusterModelFactory<? super V, ?> mfactory, int miniter, int maxiter, double prior) {
        this.k = k;
        this.delta = delta;
        this.mfactory = mfactory;
        this.miniter = miniter;
        this.maxiter = maxiter;
        this.prior = prior;
    }

    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array((TypeInformation[])new TypeInformation[]{TypeUtil.NUMBER_VECTOR_FIELD});
    }

    public OutlierResult run(Relation<V> relation) {
        if (relation.size() == 0) {
            throw new IllegalArgumentException("database empty: must contain elements");
        }
        List models = this.mfactory.buildInitialModels(relation, this.k);
        WritableDataStore probClusterIGivenX = DataStoreUtil.makeStorage((DBIDs)relation.getDBIDs(), (int)10, double[].class);
        WritableDoubleDataStore loglikelihoods = DataStoreUtil.makeDoubleStorage((DBIDs)relation.getDBIDs(), (int)10, (double)Double.NEGATIVE_INFINITY);
        double loglikelihood = EM.assignProbabilitiesToInstances(relation, (List)models, (WritableDataStore)probClusterIGivenX, (WritableDoubleDataStore)loglikelihoods);
        DoubleStatistic likestat = new DoubleStatistic(this.getClass().getName() + ".loglikelihood");
        LOG.statistics((Statistic)likestat.setDouble(loglikelihood));
        int it = 0;
        int lastimprovement = 0;
        double bestloglikelihood = Double.NEGATIVE_INFINITY;
        ++it;
        while (it < this.maxiter || this.maxiter < 0) {
            double oldloglikelihood = loglikelihood;
            EM.recomputeCovarianceMatrices(relation, (WritableDataStore)probClusterIGivenX, (List)models, (double)this.prior);
            loglikelihood = EM.assignProbabilitiesToInstances(relation, (List)models, (WritableDataStore)probClusterIGivenX, (WritableDoubleDataStore)loglikelihoods);
            LOG.statistics((Statistic)likestat.setDouble(loglikelihood));
            if (loglikelihood - bestloglikelihood > this.delta) {
                lastimprovement = it;
                bestloglikelihood = loglikelihood;
            }
            if (it >= this.miniter && (Math.abs(loglikelihood - oldloglikelihood) <= this.delta || lastimprovement < it >> 1)) break;
            ++it;
        }
        LOG.statistics((Statistic)new LongStatistic(EMOutlier.class.getSimpleName() + ".iterations", (long)it));
        DoubleMinMax mm = new DoubleMinMax();
        DBIDIter iditer = relation.iterDBIDs();
        while (iditer.valid()) {
            mm.put(loglikelihoods.doubleValue((DBIDRef)iditer));
            iditer.advance();
        }
        MaterializedDoubleRelation scoreres = new MaterializedDoubleRelation("EM Loglikelihoods", relation.getDBIDs(), (DoubleDataStore)loglikelihoods);
        InvertedOutlierScoreMeta meta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), Double.NEGATIVE_INFINITY, 1.0);
        OutlierResult result = new OutlierResult((OutlierScoreMeta)meta, (DoubleRelation)scoreres);
        return result;
    }

    public static class Par<V extends NumberVector>
    implements Parameterizer {
        protected int k;
        protected double delta;
        protected EMClusterModelFactory<V, ?> mfactory;
        protected int miniter = 1;
        protected int maxiter = -1;
        double prior = 0.0;

        public void configure(Parameterization config) {
            ((IntParameter)new IntParameter(EM.Par.K_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ONE_INT)).grab(config, x -> {
                this.k = x;
            });
            new ObjectParameter(EM.Par.MODEL_ID, EMClusterModelFactory.class, MultivariateGaussianModelFactory.class).grab(config, x -> {
                this.mfactory = x;
            });
            ((DoubleParameter)new DoubleParameter(EM.Par.DELTA_ID, 1.0E-7).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE)).grab(config, x -> {
                this.delta = x;
            });
            ((IntParameter)((IntParameter)new IntParameter(EM.Par.MINITER_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ZERO_INT)).setOptional(true)).grab(config, x -> {
                this.miniter = x;
            });
            ((IntParameter)((IntParameter)new IntParameter(EM.Par.MAXITER_ID).addConstraint((ParameterConstraint)CommonConstraints.GREATER_EQUAL_ZERO_INT)).setOptional(true)).grab(config, x -> {
                this.maxiter = x;
            });
            ((DoubleParameter)((DoubleParameter)new DoubleParameter(EM.Par.PRIOR_ID).setOptional(true)).addConstraint((ParameterConstraint)CommonConstraints.GREATER_THAN_ZERO_DOUBLE)).grab(config, x -> {
                this.prior = x;
            });
        }

        public EMOutlier<V> make() {
            return new EMOutlier<V>(this.k, this.delta, this.mfactory, this.miniter, this.maxiter, this.prior);
        }
    }
}

