/*
 * Decompiled with CFR 0.152.
 */
package elki.datasource.filter.normalization.columnwise;

import elki.data.NumberVector;
import elki.data.type.SimpleTypeInformation;
import elki.data.type.TypeInformation;
import elki.data.type.TypeUtil;
import elki.data.type.VectorFieldTypeInformation;
import elki.datasource.bundle.MultipleObjectsBundle;
import elki.datasource.filter.FilterUtil;
import elki.datasource.filter.normalization.Normalization;
import elki.logging.Logging;
import elki.math.statistics.distribution.Distribution;
import elki.math.statistics.distribution.UniformDistribution;
import elki.math.statistics.distribution.estimator.DistributionEstimator;
import elki.math.statistics.distribution.estimator.meta.BestFitEstimator;
import elki.math.statistics.tests.KolmogorovSmirnovTest;
import elki.utilities.datastructures.arraylike.NumberArrayAdapter;
import elki.utilities.optionhandling.OptionID;
import elki.utilities.optionhandling.Parameterizer;
import elki.utilities.optionhandling.parameterization.Parameterization;
import elki.utilities.optionhandling.parameters.ClassListParameter;
import elki.utilities.optionhandling.parameters.ObjectListParameter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class AttributeWiseCDFNormalization<V extends NumberVector>
implements Normalization<V> {
    private static final Logging LOG = Logging.getLogger(AttributeWiseCDFNormalization.class);
    protected List<? extends DistributionEstimator<?>> estimators;
    protected List<Distribution> dists;
    protected NumberVector.Factory<V> factory;

    public AttributeWiseCDFNormalization(List<? extends DistributionEstimator<?>> estimators) {
        this.estimators = estimators;
    }

    public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
        if (objects.dataLength() == 0) {
            return objects;
        }
        for (int r = 0; r < objects.metaLength(); ++r) {
            SimpleTypeInformation type = objects.meta(r);
            List column = objects.getColumn(r);
            if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType((TypeInformation)type)) continue;
            List castColumn = column;
            VectorFieldTypeInformation castType = (VectorFieldTypeInformation)type;
            this.factory = FilterUtil.guessFactory(castType);
            int dim = castType.getDimensionality();
            this.dists = new ArrayList<Distribution>(dim);
            double[] test = this.estimators.size() > 1 ? new double[castColumn.size()] : null;
            Adapter adapter = new Adapter();
            for (int d = 0; d < dim; ++d) {
                adapter.dim = d;
                Distribution dist = this.findBestFit(castColumn, adapter, d, test);
                if (dist instanceof UniformDistribution) {
                    dist = this.constantZero(castColumn, adapter) ? new UniformDistribution(0.0, 1.0) : dist;
                }
                this.dists.add(dist);
            }
            double[] buf = new double[dim];
            for (int i = 0; i < objects.dataLength(); ++i) {
                NumberVector obj = (NumberVector)castColumn.get(i);
                for (int d = 0; d < dim; ++d) {
                    buf[d] = this.dists.get(d).cdf(obj.doubleValue(d));
                }
                castColumn.set(i, this.factory.newNumberVector(buf));
            }
        }
        return objects;
    }

    protected Distribution findBestFit(List<V> col, Adapter adapter, int d, double[] test) {
        if (this.estimators.size() == 1) {
            return this.estimators.get(0).estimate(col, (NumberArrayAdapter)adapter);
        }
        Distribution best = null;
        double bestq = Double.POSITIVE_INFINITY;
        block2: for (DistributionEstimator<?> est : this.estimators) {
            try {
                Distribution dist = est.estimate(col, (NumberArrayAdapter)adapter);
                for (int i = 0; i < test.length; ++i) {
                    test[i] = dist.cdf(((NumberVector)col.get(i)).doubleValue(d));
                    if (Double.isNaN(test[i])) {
                        LOG.warning((CharSequence)("Got NaN after fitting " + est + ": " + dist));
                        continue block2;
                    }
                    if (!Double.isInfinite(test[i])) continue;
                    LOG.warning((CharSequence)("Got infinite value after fitting " + est + ": " + dist));
                    continue block2;
                }
                Arrays.sort(test);
                double q = KolmogorovSmirnovTest.simpleTest((double[])test);
                if (LOG.isVeryVerbose()) {
                    LOG.veryverbose((CharSequence)("Estimator " + est + " (" + dist + ") has maximum deviation " + q + " for dimension " + d));
                }
                if (best != null && !(q < bestq)) continue;
                best = dist;
                bestq = q;
            }
            catch (ArithmeticException e) {
                if (!LOG.isVeryVerbose()) continue;
                LOG.veryverbose((CharSequence)("Fitting distribution " + est + " failed: " + e.getMessage()));
            }
        }
        if (LOG.isVerbose()) {
            LOG.verbose((CharSequence)("Best fit for dimension " + d + ": " + best));
        }
        return best;
    }

    protected boolean constantZero(List<V> column, Adapter adapter) {
        int s = adapter.size(column);
        for (int i = 0; i < s; ++i) {
            if (adapter.get(column, i) == 0.0) continue;
            return false;
        }
        return true;
    }

    public String toString() {
        StringBuilder result = new StringBuilder(1000).append("normalization class: ").append(this.getClass().getName()).append('\n').append("normalization distributions: ");
        for (DistributionEstimator<?> est : this.estimators) {
            result.append(est.getClass().getSimpleName()).append(',');
        }
        if (!this.estimators.isEmpty()) {
            result.setLength(result.length() - 1);
        }
        return result.toString();
    }

    public static class Par<V extends NumberVector>
    implements Parameterizer {
        public static final OptionID DISTRIBUTIONS_ID = new OptionID("normalize.distributions", "A list of the distribution estimators to try.");
        private List<? extends DistributionEstimator<?>> estimators;

        public void configure(Parameterization config) {
            ((ClassListParameter)new ObjectListParameter(DISTRIBUTIONS_ID, DistributionEstimator.class).setDefaultValue(Arrays.asList(BestFitEstimator.class))).grab(config, x -> {
                this.estimators = x;
            });
        }

        public AttributeWiseCDFNormalization<V> make() {
            return new AttributeWiseCDFNormalization(this.estimators);
        }
    }

    protected static class Adapter
    implements NumberArrayAdapter<Double, List<? extends NumberVector>> {
        int dim;

        protected Adapter() {
        }

        public int size(List<? extends NumberVector> array) {
            return array.size();
        }

        public Double get(List<? extends NumberVector> array, int off) throws IndexOutOfBoundsException {
            return this.getDouble(array, off);
        }

        public double getDouble(List<? extends NumberVector> array, int off) throws IndexOutOfBoundsException {
            return array.get(off).doubleValue(this.dim);
        }

        public long getLong(List<? extends NumberVector> array, int off) throws IndexOutOfBoundsException {
            return array.get(off).longValue(this.dim);
        }
    }
}

