/*
 * Decompiled with CFR 0.152.
 */
package ai.tripl.arc.util;

import ai.tripl.arc.api.API;
import ai.tripl.arc.api.API$Extract$;
import ai.tripl.arc.config.Error;
import ai.tripl.arc.util.ArcSchema$;
import ai.tripl.arc.util.ExtractUtils;
import ai.tripl.arc.util.log.logger.Logger;
import java.io.Serializable;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Row$;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder;
import org.apache.spark.sql.catalyst.encoders.RowEncoder$;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.MetadataBuilder;
import org.apache.spark.sql.types.StructType;
import scala.Array$;
import scala.Function1;
import scala.Function2;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.BufferedIterator;
import scala.collection.GenSeq;
import scala.collection.MapLike;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.immutable.;
import scala.collection.immutable.List;
import scala.collection.immutable.Map;
import scala.collection.immutable.Map$;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.ArrayOps;
import scala.math.Ordering$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import scala.util.Either;
import scala.util.Left;
import scala.util.Right;

public final class ExtractUtils$ {
    public static ExtractUtils$ MODULE$;

    static {
        new ExtractUtils$();
    }

    public Option<StructType> getSchema(Either<String, List<API.ExtractColumn>> schema, SparkSession spark, Logger logger) {
        Either<String, List<API.ExtractColumn>> either = schema;
        if (either instanceof Right) {
            Right right = (Right)either;
            List cols = (List)right.value();
            List list = cols;
            if (Nil$.MODULE$.equals(list)) {
                return None$.MODULE$;
            }
            return Option$.MODULE$.apply((Object)API$Extract$.MODULE$.toStructType((List<API.ExtractColumn>)list));
        }
        if (either instanceof Left) {
            Left left = (Left)either;
            String view = (String)left.value();
            Either<List<Error.StageError>, List<API.ExtractColumn>> parseResult = ArcSchema$.MODULE$.parseArcSchemaDataFrame((Dataset<Row>)spark.table(view), logger);
            Either<List<Error.StageError>, List<API.ExtractColumn>> either2 = parseResult;
            if (either2 instanceof Right) {
                Right right = (Right)either2;
                List cols = (List)right.value();
                return Option$.MODULE$.apply((Object)API$Extract$.MODULE$.toStructType((List<API.ExtractColumn>)cols));
            }
            if (either2 instanceof Left) {
                Left left2 = (Left)either2;
                List errors = (List)left2.value();
                throw new Exception(new StringBuilder(54).append("Schema view '").append(view).append("' to cannot be parsed as it has errors: ").append(errors.mkString(", ")).append(".").toString());
            }
            throw new MatchError(either2);
        }
        throw new MatchError(either);
    }

    public Dataset<Row> addInternalColumns(Dataset<Row> input, boolean contiguousIndex, SparkSession spark, API.ARCContext arcContext) {
        if (!input.isStreaming() && !arcContext.isStreaming()) {
            if (new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])input.columns())).intersect((GenSeq)new .colon.colon((Object)"_filename", (List)new .colon.colon((Object)"_index", (List)new .colon.colon((Object)"_monotonically_increasing_id", (List)Nil$.MODULE$)))))).isEmpty()) {
                Dataset enrichedInput = input.withColumn("_monotonically_increasing_id", functions$.MODULE$.monotonically_increasing_id().as("_monotonically_increasing_id", new MetadataBuilder().putBoolean("internal", true).putString("description", "An Arc internal field describing where in _filename this row was originally sourced from.").build())).withColumn("_filename", functions$.MODULE$.input_file_name().as("_filename", new MetadataBuilder().putBoolean("internal", true).putString("description", "An Arc internal field describing where this row was originally sourced from.").build())).withColumn("_partition_id", functions$.MODULE$.spark_partition_id().as("_partition_id", new MetadataBuilder().putBoolean("internal", true).build()));
                if (contiguousIndex) {
                    Map summary = (Map)new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])enrichedInput.groupBy((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.col("_filename"), functions$.MODULE$.col("_partition_id")})).agg(functions$.MODULE$.min(functions$.MODULE$.col("_monotonically_increasing_id")), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[]{functions$.MODULE$.max(functions$.MODULE$.col("_monotonically_increasing_id"))})).collect())).map((Function1 & Serializable & scala.Serializable)row -> new ExtractUtils.Partition(row.getString(0), Predef$.MODULE$.int2Integer(row.getInt(1)), row.getLong(2), row.getLong(3), 0L), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(ExtractUtils.Partition.class))))).groupBy((Function1 & Serializable & scala.Serializable)x$1 -> x$1.filename()).map((Function1 & Serializable & scala.Serializable)x0$1 -> {
                        Tuple2 tuple2 = x0$1;
                        if (tuple2 != null) {
                            String filename = (String)tuple2._1();
                            ExtractUtils.Partition[] partitions = (ExtractUtils.Partition[])tuple2._2();
                            return new Tuple2((Object)filename, new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])partitions)).sortBy((Function1 & Serializable & scala.Serializable)x$2 -> x$2.partitionId(), Ordering$.MODULE$.ordered((Function1)Predef$.MODULE$.$conforms())))).scanLeft((Object)new ExtractUtils.Partition("", Predef$.MODULE$.int2Integer(0), 0L, 0L, 0L), (Function2 & Serializable & scala.Serializable)(x0$2, x1$1) -> {
                                Tuple2 tuple2 = new Tuple2(x0$2, x1$1);
                                if (tuple2 != null) {
                                    ExtractUtils.Partition previousPartition = (ExtractUtils.Partition)tuple2._1();
                                    ExtractUtils.Partition partition = (ExtractUtils.Partition)tuple2._2();
                                    return new ExtractUtils.Partition(partition.filename(), partition.partitionId(), previousPartition.max() + 1L, previousPartition.max() + 1L + (partition.max() - partition.min()), partition.min() & 0x1FFFFFFFFL);
                                }
                                throw new MatchError((Object)tuple2);
                            }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(ExtractUtils.Partition.class))))).drop(1))).groupBy((Function1 & Serializable & scala.Serializable)x$3 -> x$3.partitionId()).map((Function1 & Serializable & scala.Serializable)x0$3 -> {
                                Tuple2 tuple2 = x0$3;
                                if (tuple2 != null) {
                                    Integer partitionId = (Integer)tuple2._1();
                                    ExtractUtils.Partition[] partitionGroup = (ExtractUtils.Partition[])tuple2._2();
                                    return new Tuple2((Object)partitionId, new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])partitionGroup)).head());
                                }
                                throw new MatchError((Object)tuple2);
                            }, Map$.MODULE$.canBuildFrom()));
                        }
                        throw new MatchError((Object)tuple2);
                    }, Map$.MODULE$.canBuildFrom());
                    ExpressionEncoder typedEncoder = RowEncoder$.MODULE$.apply(enrichedInput.schema());
                    return enrichedInput.mapPartitions((Function1 & Serializable & scala.Serializable)partition -> {
                        BufferedIterator bufferedPartition = partition.buffered();
                        boolean bl = bufferedPartition.hasNext();
                        if (!bl) {
                            return partition;
                        }
                        if (bl) {
                            Row head = (Row)bufferedPartition.head();
                            int filenameIndex = head.fieldIndex("_filename");
                            int partitionId = head.getInt(head.fieldIndex("_partition_id"));
                            int monotonicallyIncreasingIdIndex = head.fieldIndex("_monotonically_increasing_id");
                            return bufferedPartition.map((Function1 & Serializable & scala.Serializable)row -> {
                                ExtractUtils.Partition partitionSummary = (ExtractUtils.Partition)((MapLike)summary.get((Object)row.getString(filenameIndex)).get()).get((Object)Predef$.MODULE$.int2Integer(partitionId)).get();
                                long rowNumber = (row.getLong(monotonicallyIncreasingIdIndex) & 0x1FFFFFFFFL) - partitionSummary.offset();
                                return Row$.MODULE$.fromSeq((Seq)row.toSeq().updated(monotonicallyIncreasingIdIndex, (Object)BoxesRunTime.boxToLong((long)(rowNumber + partitionSummary.min())), Seq$.MODULE$.canBuildFrom()));
                            });
                        }
                        throw new MatchError((Object)BoxesRunTime.boxToBoolean((boolean)bl));
                    }, (Encoder)typedEncoder).withColumn("_index", functions$.MODULE$.col("_monotonically_increasing_id").as("_index", new MetadataBuilder().putBoolean("internal", true).putString("description", "An Arc internal field describing where in _filename this row was originally sourced from.").build())).withColumn("_filename", functions$.MODULE$.col("_filename").as("_filename", new MetadataBuilder().putBoolean("internal", true).putString("description", "An Arc internal field describing where this row was originally sourced from.").build())).drop("_monotonically_increasing_id").drop("_partition_id");
                }
                return enrichedInput.drop("_partition_id");
            }
            return input;
        }
        return input;
    }

    private ExtractUtils$() {
        MODULE$ = this;
    }
}

