/*
 * Decompiled with CFR 0.152.
 */
package ai.starlake.job.infer;

import ai.starlake.config.Settings;
import ai.starlake.config.SparkEnv;
import ai.starlake.config.SparkEnv$;
import ai.starlake.schema.handlers.InferSchemaHandler$;
import ai.starlake.schema.model.Attribute;
import ai.starlake.schema.model.Domain;
import ai.starlake.schema.model.Format;
import ai.starlake.schema.model.Metadata;
import ai.starlake.schema.model.Metadata$;
import ai.starlake.schema.model.Mode;
import ai.starlake.schema.model.Partition;
import ai.starlake.schema.model.Schema;
import ai.starlake.schema.model.Sink;
import ai.starlake.schema.model.WriteMode;
import java.io.Serializable;
import java.util.regex.Pattern;
import org.apache.hadoop.fs.Path;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders$;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import scala.Array$;
import scala.Function0;
import scala.Function1;
import scala.MatchError;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.collection.LinearSeqOptimized;
import scala.collection.Seq;
import scala.collection.TraversableLike;
import scala.collection.immutable.;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.io.Codec$;
import scala.io.Source$;
import scala.math.Ordering;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.java8.JFunction0;
import scala.util.Try;
import scala.util.Try$;
import scala.util.matching.Regex;

@ScalaSignature(bytes="\u0006\u0001\u0005ub\u0001B\b\u0011\u0001eA\u0001\u0002\t\u0001\u0003\u0002\u0003\u0006Y!\t\u0005\u0006O\u0001!\t\u0001\u000b\u0005\u0006[\u0001!\tA\f\u0005\bu\u0001\u0011\r\u0011\"\u0003<\u0011\u0019y\u0004\u0001)A\u0005y!9\u0001\t\u0001b\u0001\n\u0013\t\u0005B\u0002(\u0001A\u0003%!\tC\u0003P\u0001\u0011\u0005\u0001\u000bC\u0003_\u0001\u0011\u0005q\fC\u0003l\u0001\u0011\u0005A\u000eC\u0003o\u0001\u0011\u0005q\u000eC\u0003r\u0001\u0011\u0005!\u000fC\u0003u\u0001\u0011\u0005Q\u000f\u0003\u0004\u0012\u0001\u0011\u0005\u0011\u0011\u0004\u0002\u000f\u0013:4WM]*dQ\u0016l\u0017MS8c\u0015\t\t\"#A\u0003j]\u001a,'O\u0003\u0002\u0014)\u0005\u0019!n\u001c2\u000b\u0005U1\u0012\u0001C:uCJd\u0017m[3\u000b\u0003]\t!!Y5\u0004\u0001M\u0011\u0001A\u0007\t\u00037yi\u0011\u0001\b\u0006\u0002;\u0005)1oY1mC&\u0011q\u0004\b\u0002\u0007\u0003:L(+\u001a4\u0002\u0011M,G\u000f^5oON\u0004\"AI\u0013\u000e\u0003\rR!\u0001\n\u000b\u0002\r\r|gNZ5h\u0013\t13E\u0001\u0005TKR$\u0018N\\4t\u0003\u0019a\u0014N\\5u}Q\t\u0011\u0006\u0006\u0002+YA\u00111\u0006A\u0007\u0002!!)\u0001E\u0001a\u0002C\u0005!a.Y7f+\u0005y\u0003C\u0001\u00198\u001d\t\tT\u0007\u0005\u0002395\t1G\u0003\u000251\u00051AH]8pizJ!A\u000e\u000f\u0002\rA\u0013X\rZ3g\u0013\tA\u0014H\u0001\u0004TiJLgn\u001a\u0006\u0003mq\t\u0001b\u001d9be.,eN^\u000b\u0002yA\u0011!%P\u0005\u0003}\r\u0012\u0001b\u00159be.,eN^\u0001\ngB\f'o[#om\u0002\nqa]3tg&|g.F\u0001C!\t\u0019E*D\u0001E\u0015\t)e)A\u0002tc2T!a\u0012%\u0002\u000bM\u0004\u0018M]6\u000b\u0005%S\u0015AB1qC\u000eDWMC\u0001L\u0003\ry'oZ\u0005\u0003\u001b\u0012\u0013Ab\u00159be.\u001cVm]:j_:\f\u0001b]3tg&|g\u000eI\u0001\te\u0016\fGMR5mKR\u0011\u0011\u000b\u0016\t\u0004\u0007J{\u0013BA*E\u0005\u001d!\u0015\r^1tKRDQ!\u0016\u0005A\u0002Y\u000bA\u0001]1uQB\u0011q\u000bX\u0007\u00021*\u0011\u0011LW\u0001\u0003MNT!a\u0017%\u0002\r!\fGm\\8q\u0013\ti\u0006L\u0001\u0003QCRD\u0017!D4fi\u001a{'/\\1u\r&dW\r\u0006\u00020A\")\u0011-\u0003a\u0001E\u0006)A.\u001b8fgB\u00191\r[\u0018\u000f\u0005\u00114gB\u0001\u001af\u0013\u0005i\u0012BA4\u001d\u0003\u001d\u0001\u0018mY6bO\u0016L!!\u001b6\u0003\t1K7\u000f\u001e\u0006\u0003Or\tAbZ3u'\u0016\u0004\u0018M]1u_J$\"aL7\t\u000b\u0005T\u0001\u0019\u00012\u0002-\u001d,G\u000fR8nC&tG)\u001b:fGR|'/\u001f(b[\u0016$\"a\f9\t\u000bU[\u0001\u0019\u0001,\u0002!\u001d,GoU2iK6\f\u0007+\u0019;uKJtGCA\u0018t\u0011\u0015)F\u00021\u0001W\u0003e\u0019'/Z1uK\u0012\u000bG/\u0019$sC6,w+\u001b;i\r>\u0014X.\u0019;\u0015\u000fY\fI!a\u0003\u0002\u0010A\u0019q/a\u0001\u000f\u0007a\f\tA\u0004\u0002z\u007f:\u0011!P \b\u0003wvt!A\r?\n\u0003-K!!\u0013&\n\u0005\u001dC\u0015BA#G\u0013\t9G)\u0003\u0003\u0002\u0006\u0005\u001d!!\u0003#bi\u00064%/Y7f\u0015\t9G\tC\u0003b\u001b\u0001\u0007!\r\u0003\u0004\u0002\u000e5\u0001\raL\u0001\tI\u0006$\u0018\rU1uQ\"9\u0011\u0011C\u0007A\u0002\u0005M\u0011A\u00025fC\u0012,'\u000fE\u0002\u001c\u0003+I1!a\u0006\u001d\u0005\u001d\u0011un\u001c7fC:$B\"a\u0007\u0002.\u0005E\u0012QGA\u001c\u0003w\u0001b!!\b\u0002$\u0005\u001dRBAA\u0010\u0015\r\t\t\u0003H\u0001\u0005kRLG.\u0003\u0003\u0002&\u0005}!a\u0001+ssB\u00191$!\u000b\n\u0007\u0005-BD\u0001\u0003V]&$\bBBA\u0018\u001d\u0001\u0007q&\u0001\u0006e_6\f\u0017N\u001c(b[\u0016Da!a\r\u000f\u0001\u0004y\u0013AC:dQ\u0016l\u0017MT1nK\"1\u0011Q\u0002\bA\u0002=Ba!!\u000f\u000f\u0001\u0004y\u0013\u0001C:bm\u0016\u0004\u0016\r\u001e5\t\u000f\u0005Ea\u00021\u0001\u0002\u0014\u0001")
public class InferSchemaJob {
    private final Settings settings;
    private final SparkEnv sparkEnv;
    private final SparkSession session;

    public String name() {
        return "InferSchema";
    }

    private SparkEnv sparkEnv() {
        return this.sparkEnv;
    }

    private SparkSession session() {
        return this.session;
    }

    public Dataset<String> readFile(Path path) {
        return this.session().read().textFile(path.toString());
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    public String getFormatFile(List<String> lines) {
        String firstLine = (String)lines.head();
        String lastLine = (String)lines.last();
        Regex jsonRegexStart = new StringOps(Predef$.MODULE$.augmentString("\\{.*")).r();
        Regex jsonArrayRegexStart = new StringOps(Predef$.MODULE$.augmentString("\\[.*")).r();
        Regex jsonRegexEnd = new StringOps(Predef$.MODULE$.augmentString(".*\\}")).r();
        Regex jsonArrayRegexEnd = new StringOps(Predef$.MODULE$.augmentString(".*\\]")).r();
        Regex xmlRegexStart = new StringOps(Predef$.MODULE$.augmentString("<.*")).r();
        Regex xmlRegexEnd = new StringOps(Predef$.MODULE$.augmentString(".*>")).r();
        Tuple2 tuple2 = new Tuple2((Object)firstLine, (Object)lastLine);
        if (tuple2 != null) {
            Option option;
            String string = (String)tuple2._1();
            String string2 = (String)tuple2._2();
            Option option2 = jsonRegexStart.unapplySeq((CharSequence)string);
            if (!option2.isEmpty() && option2.get() != null && ((LinearSeqOptimized)option2.get()).lengthCompare(0) == 0 && !(option = jsonRegexEnd.unapplySeq((CharSequence)string2)).isEmpty() && option.get() != null && ((LinearSeqOptimized)option.get()).lengthCompare(0) == 0) {
                return "JSON";
            }
        }
        if (tuple2 != null) {
            Option option;
            String string = (String)tuple2._1();
            String string3 = (String)tuple2._2();
            Option option3 = jsonArrayRegexStart.unapplySeq((CharSequence)string);
            if (!option3.isEmpty() && option3.get() != null && ((LinearSeqOptimized)option3.get()).lengthCompare(0) == 0 && !(option = jsonArrayRegexEnd.unapplySeq((CharSequence)string3)).isEmpty() && option.get() != null && ((LinearSeqOptimized)option.get()).lengthCompare(0) == 0) {
                return "ARRAY_JSON";
            }
        }
        if (tuple2 == null) return "DSV";
        String string = (String)tuple2._1();
        String string4 = (String)tuple2._2();
        Option option = xmlRegexStart.unapplySeq((CharSequence)string);
        if (option.isEmpty()) return "DSV";
        if (option.get() == null) return "DSV";
        if (((LinearSeqOptimized)option.get()).lengthCompare(0) != 0) return "DSV";
        Option option4 = xmlRegexEnd.unapplySeq((CharSequence)string4);
        if (option4.isEmpty()) return "DSV";
        if (option4.get() == null) return "DSV";
        if (((LinearSeqOptimized)option4.get()).lengthCompare(0) != 0) return "DSV";
        return "XML";
    }

    public String getSeparator(List<String> lines) {
        String firstLine = (String)lines.head();
        Tuple2 tuple2 = (Tuple2)new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofChar(Predef$.MODULE$.charArrayOps(firstLine.replaceAll("[A-Za-z0-9 \"'()@?!\u00e9\u00e8\u00ee\u00e0\u00c0\u00c9\u00c8\u00e7+]", "").toCharArray())).map((Function1 & Serializable & scala.Serializable)x$1 -> InferSchemaJob.$anonfun$getSeparator$1(BoxesRunTime.unboxToChar((Object)x$1)), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class))))).groupBy((Function1 & Serializable & scala.Serializable)x$2 -> BoxesRunTime.boxToCharacter((char)x$2._1$mcC$sp())).mapValues((Function1 & Serializable & scala.Serializable)x$3 -> BoxesRunTime.boxToInteger((int)InferSchemaJob.$anonfun$getSeparator$3(x$3))).toList().maxBy((Function1 & Serializable & scala.Serializable)x0$1 -> BoxesRunTime.boxToInteger((int)InferSchemaJob.$anonfun$getSeparator$4(x0$1)), (Ordering)Ordering.Int$.MODULE$);
        if (tuple2 == null) {
            throw new MatchError((Object)tuple2);
        }
        char separator = tuple2._1$mcC$sp();
        int count = tuple2._2$mcI$sp();
        Tuple2.mcCI.sp sp2 = new Tuple2.mcCI.sp(separator, count);
        Tuple2.mcCI.sp sp3 = sp2;
        char separator2 = sp3._1$mcC$sp();
        int n = sp3._2$mcI$sp();
        return ((Object)BoxesRunTime.boxToCharacter((char)separator2)).toString();
    }

    public String getDomainDirectoryName(Path path) {
        return path.toString().replace(path.getName(), "");
    }

    public String getSchemaPattern(Path path) {
        return path.getName();
    }

    public Dataset<Row> createDataFrameWithFormat(List<String> lines, String dataPath, boolean header) {
        Dataset dataset;
        String formatFile = this.getFormatFile(lines);
        String string = formatFile;
        if ("ARRAY_JSON".equals(string)) {
            RDD jsonRDD = this.session().sparkContext().wholeTextFiles(dataPath, this.session().sparkContext().wholeTextFiles$default$2()).map((Function1 & Serializable & scala.Serializable)x0$1 -> {
                String content;
                Tuple2 tuple2 = x0$1;
                if (tuple2 == null) {
                    throw new MatchError((Object)tuple2);
                }
                String string = content = (String)tuple2._2();
                return string;
            }, ClassTag$.MODULE$.apply(String.class));
            dataset = this.session().read().option("inferSchema", true).json(this.session().createDataset(jsonRDD, Encoders$.MODULE$.STRING()));
        } else if ("JSON".equals(string)) {
            dataset = this.session().read().format("json").option("inferSchema", true).load(dataPath);
        } else if ("XML".equals(string)) {
            dataset = this.session().read().format("com.databricks.spark.xml").option("inferSchema", true).load(dataPath);
        } else if ("DSV".equals(string)) {
            dataset = this.session().read().format("com.databricks.spark.csv").option("header", header).option("inferSchema", true).option("delimiter", this.getSeparator(lines)).option("parserLib", "UNIVOCITY").load(dataPath);
        } else {
            throw new MatchError((Object)string);
        }
        return dataset;
    }

    public Try<BoxedUnit> infer(String domainName, String schemaName, String dataPath, String savePath, boolean header) {
        return Try$.MODULE$.apply((Function0)(JFunction0.mcV.sp & Serializable & scala.Serializable)() -> {
            String format;
            Path path = new Path(dataPath);
            List lines = (List)((TraversableLike)Source$.MODULE$.fromFile(path.toString(), Codec$.MODULE$.fallbackSystemCodec()).getLines().toList().map((Function1 & Serializable & scala.Serializable)x$5 -> x$5.trim(), List$.MODULE$.canBuildFrom())).filter((Function1 & Serializable & scala.Serializable)x$6 -> BoxesRunTime.boxToBoolean((boolean)InferSchemaJob.$anonfun$infer$3(x$6)));
            Dataset<Row> dataframeWithFormat = this.createDataFrameWithFormat((List<String>)lines, dataPath, header);
            String string = format = this.getFormatFile((List<String>)lines);
            String string2 = "ARRAY_JSON";
            boolean array = !(string != null ? !string.equals(string2) : string2 != null);
            boolean withHeader = header;
            String separator = this.getSeparator((List<String>)lines);
            InferSchemaHandler$ inferSchema = InferSchemaHandler$.MODULE$;
            List<Attribute> attributes = inferSchema.createAttributes(dataframeWithFormat.schema(), $this.settings);
            Metadata metadata = inferSchema.createMetaData(format, (Option<Object>)Option$.MODULE$.apply((Object)BoxesRunTime.boxToBoolean((boolean)array)), (Option<Object>)Option$.MODULE$.apply((Object)BoxesRunTime.boxToBoolean((boolean)withHeader)), (Option<String>)Option$.MODULE$.apply((Object)separator));
            Schema schema = inferSchema.createSchema(schemaName, Pattern.compile(this.getSchemaPattern(path)), attributes, (Option<Metadata>)new Some((Object)metadata));
            Some x$1 = new Some((Object)this.getDomainDirectoryName(path));
            Option<Mode> x$2 = Metadata$.MODULE$.apply$default$1();
            Option<Format> x$3 = Metadata$.MODULE$.apply$default$2();
            Option<String> x$4 = Metadata$.MODULE$.apply$default$3();
            Option<Object> x$52 = Metadata$.MODULE$.apply$default$4();
            Option<Object> x$62 = Metadata$.MODULE$.apply$default$5();
            Option<Object> x$7 = Metadata$.MODULE$.apply$default$6();
            Option<String> x$8 = Metadata$.MODULE$.apply$default$7();
            Option<String> x$9 = Metadata$.MODULE$.apply$default$8();
            Option<String> x$10 = Metadata$.MODULE$.apply$default$9();
            Option<WriteMode> x$11 = Metadata$.MODULE$.apply$default$10();
            Option<Partition> x$12 = Metadata$.MODULE$.apply$default$11();
            Option<Sink> x$13 = Metadata$.MODULE$.apply$default$12();
            Option<String> x$14 = Metadata$.MODULE$.apply$default$13();
            Option<Seq<String>> x$15 = Metadata$.MODULE$.apply$default$14();
            Option<Map<String, String>> x$16 = Metadata$.MODULE$.apply$default$15();
            Option<List<String>> x$17 = Metadata$.MODULE$.apply$default$17();
            Option<String> x$18 = Metadata$.MODULE$.apply$default$18();
            Option<Map<String, String>> x$19 = Metadata$.MODULE$.apply$default$19();
            Option<String> x$20 = Metadata$.MODULE$.apply$default$20();
            Option<Map<String, String>> x$21 = Metadata$.MODULE$.apply$default$21();
            Domain domain = inferSchema.createDomain(domainName, (Option<Metadata>)new Some((Object)new Metadata(x$2, x$3, x$4, x$52, x$62, x$7, x$8, x$9, x$10, x$11, x$12, x$13, x$14, x$15, x$16, (Option<String>)x$1, x$17, x$18, x$19, x$20, x$21)), (List<Schema>)new .colon.colon((Object)schema, (List)Nil$.MODULE$));
            inferSchema.generateYaml(domain, savePath, $this.settings);
        });
    }

    public static final /* synthetic */ Tuple2 $anonfun$getSeparator$1(char x$1) {
        return new Tuple2.mcCI.sp(x$1, 1);
    }

    public static final /* synthetic */ int $anonfun$getSeparator$3(Tuple2[] x$3) {
        return x$3.length;
    }

    public static final /* synthetic */ int $anonfun$getSeparator$4(Tuple2 x0$1) {
        int count;
        Tuple2 tuple2 = x0$1;
        if (tuple2 == null) {
            throw new MatchError((Object)tuple2);
        }
        int n = count = tuple2._2$mcI$sp();
        return n;
    }

    public static final /* synthetic */ boolean $anonfun$infer$3(String x$6) {
        return new StringOps(Predef$.MODULE$.augmentString(x$6)).nonEmpty();
    }

    public InferSchemaJob(Settings settings) {
        this.settings = settings;
        this.sparkEnv = new SparkEnv(this.name(), SparkEnv$.MODULE$.$lessinit$greater$default$2(), settings);
        this.session = this.sparkEnv().session();
    }
}

