/*
 * Decompiled with CFR 0.152.
 */
package com.linkedin.feathr.offline.join.util;

import com.linkedin.feathr.offline.join.util.FrequentItemEstimator;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.expressions.UserDefinedFunction;
import org.apache.spark.sql.functions$;
import org.apache.spark.util.sketch.CountMinSketch;
import scala.Function1;
import scala.Predef$;
import scala.Serializable;
import scala.collection.Seq;
import scala.reflect.ScalaSignature;
import scala.reflect.api.TypeTags;
import scala.reflect.runtime.package$;
import scala.runtime.BoxesRunTime;

@ScalaSignature(bytes="\u0006\u0001\u00014Q!\u0003\u0006\u0001\u001dYAQ!\t\u0001\u0005\u0002\rBq!\n\u0001C\u0002\u0013%a\u0005\u0003\u0004+\u0001\u0001\u0006Ia\n\u0005\bW\u0001\u0011\r\u0011\"\u0003'\u0011\u0019a\u0003\u0001)A\u0005O!9Q\u0006\u0001b\u0001\n\u0013q\u0003B\u0002\u001a\u0001A\u0003%q\u0006C\u00034\u0001\u0011\u0005CGA\u0012D_VtG/T5o'.,Go\u00195Ge\u0016\fX/\u001a8u\u0013R,W.R:uS6\fGo\u001c:\u000b\u0005-a\u0011\u0001B;uS2T!!\u0004\b\u0002\t)|\u0017N\u001c\u0006\u0003\u001fA\tqa\u001c4gY&tWM\u0003\u0002\u0012%\u00051a-Z1uQJT!a\u0005\u000b\u0002\u00111Lgn[3eS:T\u0011!F\u0001\u0004G>l7c\u0001\u0001\u0018;A\u0011\u0001dG\u0007\u00023)\t!$A\u0003tG\u0006d\u0017-\u0003\u0002\u001d3\t1\u0011I\\=SK\u001a\u0004\"AH\u0010\u000e\u0003)I!\u0001\t\u0006\u0003+\u0019\u0013X-];f]RLE/Z7FgRLW.\u0019;pe\u00061A(\u001b8jiz\u001a\u0001\u0001F\u0001%!\tq\u0002!A\u0002faN,\u0012a\n\t\u00031!J!!K\r\u0003\r\u0011{WO\u00197f\u0003\u0011)\u0007o\u001d\u0011\u0002\u0015\r|gNZ5eK:\u001cW-A\u0006d_:4\u0017\u000eZ3oG\u0016\u0004\u0013\u0001B:fK\u0012,\u0012a\f\t\u00031AJ!!M\r\u0003\u0007%sG/A\u0003tK\u0016$\u0007%A\u000bfgRLW.\u0019;f\rJ,\u0017/^3oi&#X-\\:\u0015\tUz\u0015k\u0017\t\u0003m1s!aN%\u000f\u0005a2eBA\u001dD\u001d\tQ\u0004I\u0004\u0002<}5\tAH\u0003\u0002>E\u00051AH]8pizJ\u0011aP\u0001\u0004_J<\u0017BA!C\u0003\u0019\t\u0007/Y2iK*\tq(\u0003\u0002E\u000b\u0006)1\u000f]1sW*\u0011\u0011IQ\u0005\u0003\u000f\"\u000b1a]9m\u0015\t!U)\u0003\u0002K\u0017\u00069\u0001/Y2lC\u001e,'BA$I\u0013\tieJA\u0005ECR\fgI]1nK*\u0011!j\u0013\u0005\u0006!\"\u0001\r!N\u0001\bS:\u0004X\u000f\u001e#g\u0011\u0015\u0011\u0006\u00021\u0001T\u00031!\u0018M]4fi\u000e{G.^7o!\t!\u0006L\u0004\u0002V-B\u00111(G\u0005\u0003/f\ta\u0001\u0015:fI\u00164\u0017BA-[\u0005\u0019\u0019FO]5oO*\u0011q+\u0007\u0005\u00069\"\u0001\r!X\u0001\u000eMJ,\u0017\u000f\u00165sKNDw\u000e\u001c3\u0011\u0005aq\u0016BA0\u001a\u0005\u00151En\\1u\u0001")
public class CountMinSketchFrequentItemEstimator
implements FrequentItemEstimator {
    private final double eps;
    private final double confidence;
    private final int seed;

    private double eps() {
        return this.eps;
    }

    private double confidence() {
        return this.confidence;
    }

    private int seed() {
        return this.seed;
    }

    @Override
    public Dataset<Row> estimateFrequentItems(Dataset<Row> inputDf, String targetColumn, float freqThreshold) {
        CountMinSketch minSketch = inputDf.stat().countMinSketch(targetColumn, this.eps(), this.confidence(), this.seed());
        UserDefinedFunction countUDF = functions$.MODULE$.udf((Function1 & java.io.Serializable & Serializable)item -> BoxesRunTime.boxToLong((long)minSketch.estimateCount(item)), ((TypeTags)package$.MODULE$.universe()).TypeTag().Long(), ((TypeTags)package$.MODULE$.universe()).TypeTag().Any());
        String internalCountColumnName = "_feathr_estimate_count";
        long totalCount = inputDf.count();
        Dataset freqDf = inputDf.withColumn(internalCountColumnName, countUDF.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.expr(targetColumn)}))).filter(functions$.MODULE$.expr(new StringBuilder(8).append(internalCountColumnName).append("*1.0/").append(totalCount).append(" > ").append(freqThreshold).toString())).select(targetColumn, (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).distinct();
        return freqDf;
    }

    public CountMinSketchFrequentItemEstimator() {
        this.eps = 0.01;
        this.confidence = 0.95;
        this.seed = 7;
    }
}

