/*
 * Decompiled with CFR 0.152.
 */
package ai.databand.parameters;

import ai.databand.log.HistogramRequest;
import ai.databand.schema.histograms.ColumnSummary;
import ai.databand.schema.histograms.NumericSummary;
import ai.databand.schema.histograms.Summary;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.BooleanType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.FractionalType;
import org.apache.spark.sql.types.IntegralType;
import org.apache.spark.sql.types.NumericType;
import org.apache.spark.sql.types.StringType;
import org.apache.spark.sql.types.StructField;
import scala.collection.Iterable;
import scala.collection.JavaConverters;
import scala.collection.Seq;

public class Histogram {
    private static final int MAX_NUMERIC_BUCKETS_COUNT = 20;
    private static final int MAX_CATEGORICAL_BUCKETS_COUNT = 50;
    private final String dfKey;
    private final Dataset<?> dataset;
    private final HistogramRequest req;
    private final Map<String, Object> result;
    private final Map<String, Summary> summaries;

    public Histogram(String key, Dataset<?> dataset, HistogramRequest histogramRequest) {
        this.dfKey = key;
        this.dataset = dataset.alias(String.format("%s_%s", "DBND_INTERNAL", "HISTOGRAM"));
        this.req = histogramRequest;
        this.result = new HashMap<String, Object>(1);
        this.summaries = new HashMap<String, Summary>(1);
    }

    protected <T> Seq<T> seq(List<T> list) {
        return ((Iterable)JavaConverters.collectionAsScalaIterableConverter(list).asScala()).toSeq();
    }

    public Map<String, Object> metricValues() {
        this.result.put(String.format("%s.stats", this.dfKey), this.summary());
        if (this.req.isEnabled() && !this.req.isOnlyStats()) {
            HashMap<String, Object> histograms = new HashMap<String, Object>(1);
            if (this.req.isIncludeAllNumeric()) {
                histograms.putAll(this.numericHistograms());
            }
            if (this.req.isIncludeAllString()) {
                histograms.putAll(this.categoricalHistograms(StringType.class));
            }
            if (this.req.isIncludeAllBoolean()) {
                histograms.putAll(this.categoricalHistograms(BooleanType.class));
            }
            this.result.put(String.format("%s.histograms", this.dfKey), histograms);
        }
        return this.result;
    }

    public Map<String, Map<String, Object>> summary() {
        Dataset summaryDf = this.dataset.summary(new String[0]);
        HashMap<String, Integer> colToIdx = new HashMap<String, Integer>();
        for (int i = 0; i < summaryDf.columns().length; ++i) {
            colToIdx.put(summaryDf.columns()[i], i);
        }
        List rawSummary = summaryDf.collectAsList();
        HashMap<String, StructField[]> summary = new HashMap<String, StructField[]>(1);
        for (StructField[] row : rawSummary) {
            summary.put(row.get(0).toString(), row);
        }
        ArrayList<String> exprs = new ArrayList<String>(1);
        for (StructField c : this.dataset.schema().fields()) {
            if (!this.isSimpleType(c.dataType()) || this.req.isExcluded(c.name())) continue;
            Column col = functions.col((String)c.name());
            exprs.add(String.format("count(DISTINCT `%s`) AS `%s_%s`", c.name(), c.name(), "distinct"));
            exprs.add(functions.count((Column)col).alias(String.format("%s_%s", c.name(), "non-null")).toString());
            exprs.add(functions.count((Column)functions.when((Column)col.isNull(), (Object)1)).alias(String.format("%s_%s", c.name(), "count_null")).toString());
        }
        Dataset countsDf = this.dataset.selectExpr(this.seq(exprs));
        Row rawCounts = (Row)countsDf.collectAsList().get(0);
        String[] countsColumns = countsDf.columns();
        HashMap<String, Object> counts = new HashMap<String, Object>(1);
        for (int i = 0; i < countsColumns.length; ++i) {
            counts.put(countsColumns[i], rawCounts.get(i));
        }
        HashMap<String, Map<String, Object>> stats = new HashMap<String, Map<String, Object>>(1);
        for (StructField c : this.dataset.schema().fields()) {
            if (!this.isSimpleType(c.dataType()) || this.req.isExcluded(c.name())) continue;
            Summary columnSummary = null;
            long nonNull = Long.parseLong(counts.get(String.format("%s_%s", c.name(), "non-null")).toString());
            long countNull = Long.parseLong(counts.get(String.format("%s_%s", c.name(), "count_null")).toString());
            if (c.dataType() instanceof NumericType) {
                int idx = (Integer)colToIdx.get(c.name());
                columnSummary = new NumericSummary(new ColumnSummary(nonNull + countNull, Long.parseLong(counts.get(String.format("%s_%s", c.name(), "distinct")).toString()), nonNull, countNull, c.dataType() instanceof FractionalType ? "double" : "integer"), Double.parseDouble(((Row)summary.get("max")).get(idx).toString()), Double.parseDouble(((Row)summary.get("mean")).get(idx).toString()), Double.parseDouble(((Row)summary.get("min")).get(idx).toString()), Double.parseDouble(((Row)summary.get("stddev")).get(idx).toString()), Double.parseDouble(((Row)summary.get("25%")).get(idx).toString()), Double.parseDouble(((Row)summary.get("50%")).get(idx).toString()), Double.parseDouble(((Row)summary.get("75%")).get(idx).toString()));
            } else if (c.dataType() instanceof StringType || c.dataType() instanceof BooleanType) {
                columnSummary = new ColumnSummary(nonNull + countNull, Long.parseLong(counts.get(String.format("%s_%s", c.name(), "distinct")).toString()), nonNull, countNull, c.dataType() instanceof StringType ? "string" : "boolean");
            }
            Map<String, Object> columnSummaryMap = columnSummary.toMap();
            stats.put(c.name(), columnSummaryMap);
            for (Map.Entry<String, Object> entry : columnSummaryMap.entrySet()) {
                this.result.put(String.format("%s.%s.%s", this.dfKey, c.name(), entry.getKey()), entry.getValue());
            }
            this.summaries.put(c.name(), columnSummary);
        }
        return stats;
    }

    public Map<String, Summary> getSummaries() {
        return this.summaries;
    }

    protected boolean isSimpleType(DataType dt) {
        return dt instanceof NumericType || dt instanceof StringType || dt instanceof BooleanType;
    }

    protected Map<String, Object[][]> numericHistograms() {
        ArrayList<Column> numericColumns = new ArrayList<Column>(1);
        ArrayList<String> histogramsExpr = new ArrayList<String>(1);
        HashMap<String, Object[]> namedBuckets = new HashMap<String, Object[]>(1);
        for (StructField c : this.dataset.schema().fields()) {
            int i;
            if (!(c.dataType() instanceof NumericType) || this.req.isExcluded(c.name())) continue;
            numericColumns.add(functions.col((String)c.name()));
            long distinct = (Long)this.result.get(String.format("%s.%s.%s", this.dfKey, c.name(), "distinct"));
            double minv = (Double)this.result.get(String.format("%s.%s.%s", this.dfKey, c.name(), "min"));
            double maxv = (Double)this.result.get(String.format("%s.%s.%s", this.dfKey, c.name(), "max"));
            int bucketsCount = (int)Math.min(distinct, 20L);
            double inc = c.dataType() instanceof IntegralType ? (double)((int)((maxv - minv) / (double)bucketsCount)) : (maxv - minv) * 1.0 / (double)bucketsCount;
            Object[] buckets = new Object[bucketsCount + 1];
            for (i = 0; i < bucketsCount; ++i) {
                buckets[i] = (double)i * inc + minv;
            }
            buckets[bucketsCount] = maxv;
            namedBuckets.put(c.name(), buckets);
            for (i = 0; i < buckets.length - 1; ++i) {
                histogramsExpr.add(functions.count((Column)functions.when((Column)functions.col((String)c.name()).geq(buckets[i]).and(i == buckets.length - 2 ? functions.col((String)c.name()).leq(buckets[i + 1]) : functions.col((String)c.name()).lt(buckets[i + 1])), (Object)1)).alias(String.format("%s_%s", c.name(), i)).toString());
            }
        }
        Dataset histogramsDf = this.dataset.select(this.seq(numericColumns)).selectExpr(this.seq(histogramsExpr));
        Row histograms = (Row)histogramsDf.collectAsList().get(0);
        HashMap<String, Object[][]> histogramsResult = new HashMap<String, Object[][]>(1);
        for (String column : namedBuckets.keySet()) {
            Object[] buckets = (Object[])namedBuckets.get(column);
            Object[] bucketCounts = new Object[buckets.length];
            for (int i = 0; i < buckets.length - 1; ++i) {
                bucketCounts[i] = histograms.getAs(String.format("%s_%s", column, i));
            }
            histogramsResult.put(column, new Object[][]{bucketCounts, buckets});
        }
        return histogramsResult;
    }

    List<Dataset<Row>> columnsOfType(Class<?> dataType) {
        return Arrays.stream(this.dataset.schema().fields()).filter(f -> dataType.isInstance(f.dataType())).filter(f -> !this.req.isExcluded(f.name())).map(f -> this.dataset.select(f.name(), new String[0])).collect(Collectors.toList());
    }

    protected Map<String, List<List<Object>>> categoricalHistograms(Class<?> dataType) {
        List<Dataset<Row>> columnsDf = this.columnsOfType(dataType);
        if (columnsDf.isEmpty()) {
            return Collections.emptyMap();
        }
        Dataset valueCounts = null;
        for (Dataset<Row> column : columnsDf) {
            String string = column.schema().names()[0];
            Dataset columnCounts = column.groupBy(string, new String[0]).count().orderBy(new Column[]{functions.desc((String)"count")}).withColumn("column_name", functions.lit((Object)string)).limit(49);
            if (valueCounts == null) {
                valueCounts = columnCounts;
                continue;
            }
            valueCounts = valueCounts.union(columnCounts);
        }
        HashMap<String, List<List<Object>>> histogramsResult = new HashMap<String, List<List<Object>>>(1);
        for (Row row : valueCounts.collectAsList()) {
            if (row.get(0) == null) continue;
            String bucket = row.get(0).toString();
            long count = row.getLong(1);
            String columnName = row.getString(2);
            List columnHistogram = histogramsResult.computeIfAbsent(columnName, c -> {
                ArrayList pair = new ArrayList(2);
                pair.add(new ArrayList(1));
                pair.add(new ArrayList(1));
                return pair;
            });
            ((List)columnHistogram.get(0)).add(count);
            ((List)columnHistogram.get(1)).add(bucket);
        }
        for (Map.Entry entry : histogramsResult.entrySet()) {
            Summary summary = this.summaries.get(entry.getKey());
            long distinct = summary.getDistinct();
            if (distinct < 50L) continue;
            long total = summary.getCount();
            long histogramSumCount = ((List)((List)entry.getValue()).get(0)).stream().mapToLong(f -> (Long)f).sum();
            long othersCount = total - histogramSumCount;
            ((List)((List)entry.getValue()).get(0)).add(othersCount);
            ((List)((List)entry.getValue()).get(1)).add("_others");
        }
        return histogramsResult;
    }
}

