/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.sql.delta;

import java.io.Serializable;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.spark.Partitioner;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Dataset$;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;
import org.apache.spark.sql.delta.DeltaLog;
import org.apache.spark.sql.delta.Snapshot;
import org.apache.spark.sql.delta.UpsertBF$;
import org.apache.spark.sql.delta.UpsertTableInDeltaConf;
import org.apache.spark.sql.delta.actions.AddFile;
import org.apache.spark.sql.delta.actions.RemoveFile;
import org.apache.spark.sql.delta.commands.UpsertTableInDelta$;
import org.apache.spark.sql.delta.files.TahoeBatchFileIndex;
import org.apache.spark.sql.delta.sources.BFItem;
import org.apache.spark.sql.execution.datasources.FileIndex;
import org.apache.spark.sql.execution.datasources.HadoopFsRelation;
import org.apache.spark.sql.execution.datasources.LogicalRelation$;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.sources.BaseRelation;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import scala.Array$;
import scala.Function0;
import scala.Function1;
import scala.Function2;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.GenTraversableOnce;
import scala.collection.Iterator;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SeqLike;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.;
import scala.collection.immutable.List;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.Set;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayOps;
import scala.math.Ordering;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.reflect.api.JavaUniverse;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.TypeTags;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import tech.mlsql.common.BloomFilter;

@ScalaSignature(bytes="\u0006\u0001I3A!\u0002\u0004\u0001#!A\u0001\u0004\u0001B\u0001B\u0003%\u0011\u0004\u0003\u0005\u001e\u0001\t\u0005\t\u0015!\u0003\u001f\u0011\u0015I\u0003\u0001\"\u0001+\u0011\u0015q\u0003\u0001\"\u00010\u0005!)\u0006o]3si\n3%BA\u0004\t\u0003\u0015!W\r\u001c;b\u0015\tI!\"A\u0002tc2T!a\u0003\u0007\u0002\u000bM\u0004\u0018M]6\u000b\u00055q\u0011AB1qC\u000eDWMC\u0001\u0010\u0003\ry'oZ\u0002\u0001'\t\u0001!\u0003\u0005\u0002\u0014-5\tACC\u0001\u0016\u0003\u0015\u00198-\u00197b\u0013\t9BC\u0001\u0004B]f\u0014VMZ\u0001\u000bkB\u001cXM\u001d;D_:4\u0007C\u0001\u000e\u001c\u001b\u00051\u0011B\u0001\u000f\u0007\u0005Y)\u0006o]3siR\u000b'\r\\3J]\u0012+G\u000e^1D_:4\u0017!\u0002:v]&#\u0007CA\u0010'\u001d\t\u0001C\u0005\u0005\u0002\")5\t!E\u0003\u0002$!\u00051AH]8pizJ!!\n\u000b\u0002\rA\u0013X\rZ3g\u0013\t9\u0003F\u0001\u0004TiJLgn\u001a\u0006\u0003KQ\ta\u0001P5oSRtDcA\u0016-[A\u0011!\u0004\u0001\u0005\u00061\r\u0001\r!\u0007\u0005\u0006;\r\u0001\rAH\u0001\u0019O\u0016tWM]1uK\n3ei\u001c:QCJ\fX/\u001a;GS2,G\u0003\u0002\u00194w1\u0003\"aE\u0019\n\u0005I\"\"\u0001B+oSRDQ\u0001\u000e\u0003A\u0002U\nAb]8ve\u000e,7k\u00195f[\u0006\u0004\"AN\u001d\u000e\u0003]R!\u0001\u000f\u0005\u0002\u000bQL\b/Z:\n\u0005i:$AC*ueV\u001cG\u000fV=qK\")A\b\u0002a\u0001{\u0005A\u0011\r\u001a3GS2,7\u000fE\u0002?\u0007\u001as!aP!\u000f\u0005\u0005\u0002\u0015\"A\u000b\n\u0005\t#\u0012a\u00029bG.\fw-Z\u0005\u0003\t\u0016\u00131aU3r\u0015\t\u0011E\u0003\u0005\u0002H\u00156\t\u0001J\u0003\u0002J\r\u00059\u0011m\u0019;j_:\u001c\u0018BA&I\u0005\u001d\tE\r\u001a$jY\u0016DQ!\u0014\u0003A\u00029\u000bA\u0002Z3mKR,GMR5mKN\u00042AP\"P!\t9\u0005+\u0003\u0002R\u0011\nQ!+Z7pm\u00164\u0015\u000e\\3")
public class UpsertBF {
    private final UpsertTableInDeltaConf upsertConf;
    private final String runId;

    public void generateBFForParquetFile(StructType sourceSchema, Seq<AddFile> addFiles, Seq<RemoveFile> deletedFiles) {
        DeltaLog deltaLog = this.upsertConf.deltaLog();
        Snapshot snapshot = deltaLog.snapshot();
        SparkSession sparkSession = this.upsertConf.sparkSession();
        boolean isInitial = this.upsertConf.isInitial();
        Path newBFPathFs = new Path(deltaLog.dataPath(), new StringBuilder(11).append("_bf_index_").append((int)deltaLog.snapshot().version() + 1).append("_").append(this.runId).toString());
        String newBFPath = newBFPathFs.toUri().getPath();
        Path bfPathFs = new Path(deltaLog.dataPath(), new StringBuilder(10).append("_bf_index_").append(deltaLog.snapshot().version()).toString());
        String bfPath = bfPathFs.toUri().getPath();
        if (deltaLog.fs().exists(bfPathFs)) {
            deltaLog.fs().mkdirs(newBFPathFs);
            Set deletePaths = ((TraversableOnce)deletedFiles.map((Function1 & Serializable & scala.Serializable)f -> f.path(), Seq$.MODULE$.canBuildFrom())).toSet();
            JavaUniverse $u = package$.MODULE$.universe();
            JavaUniverse.JavaMirror $m = package$.MODULE$.universe().runtimeMirror(UpsertBF.class.getClassLoader());
            public final class Org_apache_spark_sql_delta_UpsertBF$$typecreator4$1
            extends TypeCreator {
                public <U extends Universe> Types.TypeApi apply(Mirror<U> $m$untyped) {
                    Universe $u = $m$untyped.universe();
                    Mirror<U> $m = $m$untyped;
                    return $m.staticClass("org.apache.spark.sql.delta.sources.BFItem").asType().toTypeConstructor();
                }

                public Org_apache_spark_sql_delta_UpsertBF$$typecreator4$1(UpsertBF $outer) {
                }
            }
            sparkSession.read().parquet(bfPath).repartition(1).as(this.upsertConf.sparkSession().implicits().newProductEncoder(((TypeTags)$u).TypeTag().apply((Mirror)$m, (TypeCreator)new Org_apache_spark_sql_delta_UpsertBF$$typecreator4$1(null)))).filter((Function1 & Serializable & scala.Serializable)f -> BoxesRunTime.boxToBoolean((boolean)UpsertBF.$anonfun$generateBFForParquetFile$2(deletePaths, f))).write().mode(SaveMode.Append).parquet(newBFPath);
        }
        Seq realAddFiles = addFiles;
        if (!deltaLog.fs().exists(bfPathFs) && deltaLog.snapshot().version() > -1L) {
            realAddFiles = (Seq)realAddFiles.$plus$plus((GenTraversableOnce)new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])deltaLog.snapshot().allFiles().collect())), Seq$.MODULE$.canBuildFrom());
            realAddFiles = (Seq)realAddFiles.filterNot((Function1 & Serializable & scala.Serializable)addfile -> BoxesRunTime.boxToBoolean((boolean)UpsertBF.$anonfun$generateBFForParquetFile$3(deletedFiles, addfile)));
        }
        String deltaPathPrefix = deltaLog.snapshot().deltaLog().dataPath().toUri().getPath();
        Dataset df = !isInitial ? deltaLog.createDataFrame(snapshot, realAddFiles, false, deltaLog.createDataFrame$default$4()).withColumn(UpsertTableInDelta$.MODULE$.FILE_NAME(), functions$.MODULE$.input_file_name()) : UpsertBF.createDataFrame$1(realAddFiles, false, UpsertBF.createDataFrame$default$3$1(), sparkSession, deltaLog, snapshot, sourceSchema).withColumn(UpsertTableInDelta$.MODULE$.FILE_NAME(), functions$.MODULE$.input_file_name());
        String FILE_NAME = UpsertTableInDelta$.MODULE$.FILE_NAME();
        Seq schemaNames = (Seq)df.schema().map((Function1 & Serializable & scala.Serializable)f -> f.name(), Seq$.MODULE$.canBuildFrom());
        double errorRate = this.upsertConf.bfErrorRate();
        Seq<String> idColsList = this.upsertConf.idColsList();
        Seq dfSchema = (Seq)df.schema().map((Function1 & Serializable & scala.Serializable)f -> f.name(), Seq$.MODULE$.canBuildFrom());
        Map fileWithIndex = ((TraversableOnce)((TraversableLike)realAddFiles.zipWithIndex(Seq$.MODULE$.canBuildFrom())).map((Function1 & Serializable & scala.Serializable)f -> new Tuple2((Object)((AddFile)f._1()).path(), (Object)BoxesRunTime.boxToInteger((int)f._2$mcI$sp())), Seq$.MODULE$.canBuildFrom())).toMap(Predef$.MODULE$.$conforms());
        int fileNum = fileWithIndex.size();
        RDD qual$1 = RDD$.MODULE$.rddToPairRDDFunctions(df.rdd().map((Function1 & Serializable & scala.Serializable)row -> new Tuple2((Object)UpsertTableInDelta$.MODULE$.getColStrs((Row)row, (Seq<String>)((Seq)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{FILE_NAME}))), (Seq<String>)dfSchema), row), ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Row.class), (Ordering)Ordering.String$.MODULE$).partitionBy(new Partitioner(null, fileNum, fileWithIndex, deltaPathPrefix){
            private final int fileNum$1;
            private final Map fileWithIndex$1;
            private final String deltaPathPrefix$1;

            public int numPartitions() {
                return this.fileNum$1;
            }

            public int getPartition(Object key) {
                return BoxesRunTime.unboxToInt((Object)this.fileWithIndex$1.apply((Object)new StringOps(Predef$.MODULE$.augmentString((String)new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])StringUtils.splitByWholeSeparator((String)key.toString(), (String)this.deltaPathPrefix$1))).last())).stripPrefix("/")));
            }
            {
                this.fileNum$1 = fileNum$1;
                this.fileWithIndex$1 = fileWithIndex$1;
                this.deltaPathPrefix$1 = deltaPathPrefix$1;
            }
        }).map((Function1 & Serializable & scala.Serializable)f -> (Row)f._2(), ClassTag$.MODULE$.apply(Row.class));
        Function2 & Serializable & scala.Serializable x$1 = (Function2 & Serializable & scala.Serializable)(index, iter) -> UpsertBF.$anonfun$generateBFForParquetFile$11(FILE_NAME, idColsList, schemaNames, errorRate, deltaPathPrefix, BoxesRunTime.unboxToInt((Object)index), iter);
        boolean x$2 = qual$1.mapPartitionsWithIndex$default$2();
        RDD qual$2 = qual$1.mapPartitionsWithIndex((Function2)x$1, x$2, ClassTag$.MODULE$.apply(BFItem.class));
        int x$3 = 1;
        Ordering x$4 = qual$2.repartition$default$2(x$3);
        JavaUniverse $u = package$.MODULE$.universe();
        JavaUniverse.JavaMirror $m = package$.MODULE$.universe().runtimeMirror(UpsertBF.class.getClassLoader());
        JavaUniverse $u2 = package$.MODULE$.universe();
        JavaUniverse.JavaMirror $m2 = package$.MODULE$.universe().runtimeMirror(UpsertBF.class.getClassLoader());
        public final class Org_apache_spark_sql_delta_UpsertBF$$typecreator9$1
        extends TypeCreator {
            public <U extends Universe> Types.TypeApi apply(Mirror<U> $m$untyped) {
                Universe $u = $m$untyped.universe();
                Mirror<U> $m = $m$untyped;
                return $m.staticClass("org.apache.spark.sql.delta.sources.BFItem").asType().toTypeConstructor();
            }

            public Org_apache_spark_sql_delta_UpsertBF$$typecreator9$1(UpsertBF $outer) {
            }
        }
        public final class Org_apache_spark_sql_delta_UpsertBF$$typecreator13$1
        extends TypeCreator {
            public <U extends Universe> Types.TypeApi apply(Mirror<U> $m$untyped) {
                Universe $u = $m$untyped.universe();
                Mirror<U> $m = $m$untyped;
                return $m.staticClass("org.apache.spark.sql.delta.sources.BFItem").asType().toTypeConstructor();
            }

            public Org_apache_spark_sql_delta_UpsertBF$$typecreator13$1(UpsertBF $outer) {
            }
        }
        this.upsertConf.sparkSession().implicits().rddToDatasetHolder(qual$2.repartition(x$3, x$4), this.upsertConf.sparkSession().implicits().newProductEncoder(((TypeTags)$u).TypeTag().apply((Mirror)$m, (TypeCreator)new Org_apache_spark_sql_delta_UpsertBF$$typecreator9$1(null)))).toDF().as(this.upsertConf.sparkSession().implicits().newProductEncoder(((TypeTags)$u2).TypeTag().apply((Mirror)$m2, (TypeCreator)new Org_apache_spark_sql_delta_UpsertBF$$typecreator13$1(null)))).write().mode(SaveMode.Append).parquet(newBFPath);
        BoxedUnit rdd = BoxedUnit.UNIT;
    }

    public static final /* synthetic */ boolean $anonfun$generateBFForParquetFile$2(Set deletePaths$1, BFItem f) {
        return !deletePaths$1.contains((Object)f.fileName());
    }

    public static final /* synthetic */ boolean $anonfun$generateBFForParquetFile$3(Seq deletedFiles$1, AddFile addfile) {
        return ((SeqLike)deletedFiles$1.map((Function1 & Serializable & scala.Serializable)x$4 -> x$4.path(), Seq$.MODULE$.canBuildFrom())).contains((Object)addfile.path());
    }

    private static final Dataset createDataFrame$1(Seq addFiles, boolean isStreaming, Option actionTypeOpt, SparkSession sparkSession$1, DeltaLog deltaLog$1, Snapshot snapshot$1, StructType sourceSchema$1) {
        String actionType = (String)actionTypeOpt.getOrElse((Function0 & Serializable & scala.Serializable)() -> isStreaming ? "streaming" : "batch");
        TahoeBatchFileIndex fileIndex = new TahoeBatchFileIndex(sparkSession$1, actionType, addFiles, deltaLog$1, deltaLog$1.dataPath(), snapshot$1);
        HadoopFsRelation relation = new HadoopFsRelation((FileIndex)fileIndex, new StructType((StructField[])Array$.MODULE$.apply((Seq)Nil$.MODULE$, ClassTag$.MODULE$.apply(StructField.class))), sourceSchema$1, (Option)None$.MODULE$, deltaLog$1.snapshot().fileFormat(), deltaLog$1.snapshot().metadata().format().options(), sparkSession$1);
        return Dataset$.MODULE$.ofRows(sparkSession$1, (LogicalPlan)LogicalRelation$.MODULE$.apply((BaseRelation)relation, isStreaming));
    }

    private static final boolean createDataFrame$default$2$1() {
        return false;
    }

    private static final Option createDataFrame$default$3$1() {
        return None$.MODULE$;
    }

    public static final /* synthetic */ Iterator $anonfun$generateBFForParquetFile$11(String FILE_NAME$1, Seq idColsList$1, Seq schemaNames$1, double errorRate$1, String deltaPathPrefix$1, int index, Iterator iter) {
        Iterator iterator;
        ArrayBuffer buffer = new ArrayBuffer();
        String fileName = null;
        int numEntries = 0;
        while (iter.hasNext()) {
            Row row = (Row)iter.next();
            if (fileName == null) {
                fileName = (String)row.getAs(FILE_NAME$1);
            }
            ++numEntries;
            buffer.$plus$eq((Object)UpsertTableInDelta$.MODULE$.getKey(row, (Seq<String>)idColsList$1, (Seq<String>)schemaNames$1));
        }
        if (numEntries > 0) {
            BloomFilter bf = new BloomFilter(numEntries, errorRate$1);
            buffer.foreach((Function1 & Serializable & scala.Serializable)rowId -> {
                bf.add(rowId);
                return BoxedUnit.UNIT;
            });
            iterator = new .colon.colon((Object)new BFItem(new StringOps(Predef$.MODULE$.augmentString((String)new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])StringUtils.splitByWholeSeparator((String)fileName, (String)deltaPathPrefix$1))).last())).stripPrefix("/"), bf.serializeToString(), bf.size(), new StringBuilder(1).append((double)bf.size() / 8.0 / (double)1024 / (double)1024).append("m").toString()), (List)Nil$.MODULE$).iterator();
        } else {
            iterator = Nil$.MODULE$.iterator();
        }
        return iterator;
    }

    public UpsertBF(UpsertTableInDeltaConf upsertConf, String runId) {
        this.upsertConf = upsertConf;
        this.runId = runId;
    }
}

