/*
 * Decompiled with CFR 0.152.
 */
package com.ebiznext.comet.job.infer;

import com.ebiznext.comet.config.Settings;
import com.ebiznext.comet.config.SparkEnv;
import com.ebiznext.comet.schema.handlers.InferSchemaHandler$;
import com.ebiznext.comet.schema.model.Attribute;
import com.ebiznext.comet.schema.model.Domain;
import com.ebiznext.comet.schema.model.Metadata;
import com.ebiznext.comet.schema.model.Schema;
import java.io.Serializable;
import java.util.regex.Pattern;
import org.apache.hadoop.fs.Path;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders$;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import scala.Array$;
import scala.Function0;
import scala.Function1;
import scala.MatchError;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.collection.LinearSeqOptimized;
import scala.collection.TraversableLike;
import scala.collection.immutable.;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.io.Codec$;
import scala.io.Source$;
import scala.math.Ordering;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.java8.JFunction0;
import scala.util.Try;
import scala.util.Try$;
import scala.util.matching.Regex;

@ScalaSignature(bytes="\u0006\u0001\u0005\u0005c\u0001B\b\u0011\u0001mA\u0001B\t\u0001\u0003\u0002\u0003\u0006Ya\t\u0005\u0006S\u0001!\tA\u000b\u0005\u0006_\u0001!\t\u0001\r\u0005\by\u0001\u0011\r\u0011\"\u0003>\u0011\u0019\t\u0005\u0001)A\u0005}!9!\t\u0001b\u0001\n\u0013\u0019\u0005B\u0002)\u0001A\u0003%A\tC\u0003R\u0001\u0011\u0005!\u000bC\u0003a\u0001\u0011\u0005\u0011\rC\u0003n\u0001\u0011\u0005a\u000eC\u0003q\u0001\u0011\u0005\u0011\u000fC\u0003t\u0001\u0011\u0005A\u000fC\u0003w\u0001\u0011\u0005q\u000f\u0003\u0004\u0012\u0001\u0011\u0005\u0011Q\u0004\u0002\u000f\u0013:4WM]*dQ\u0016l\u0017MS8c\u0015\t\t\"#A\u0003j]\u001a,'O\u0003\u0002\u0014)\u0005\u0019!n\u001c2\u000b\u0005U1\u0012!B2p[\u0016$(BA\f\u0019\u0003!)'-\u001b>oKb$(\"A\r\u0002\u0007\r|Wn\u0001\u0001\u0014\u0005\u0001a\u0002CA\u000f!\u001b\u0005q\"\"A\u0010\u0002\u000bM\u001c\u0017\r\\1\n\u0005\u0005r\"AB!osJ+g-\u0001\u0005tKR$\u0018N\\4t!\t!s%D\u0001&\u0015\t1C#\u0001\u0004d_:4\u0017nZ\u0005\u0003Q\u0015\u0012\u0001bU3ui&twm]\u0001\u0007y%t\u0017\u000e\u001e \u0015\u0003-\"\"\u0001\f\u0018\u0011\u00055\u0002Q\"\u0001\t\t\u000b\t\u0012\u00019A\u0012\u0002\t9\fW.Z\u000b\u0002cA\u0011!'\u000f\b\u0003g]\u0002\"\u0001\u000e\u0010\u000e\u0003UR!A\u000e\u000e\u0002\rq\u0012xn\u001c;?\u0013\tAd$\u0001\u0004Qe\u0016$WMZ\u0005\u0003um\u0012aa\u0015;sS:<'B\u0001\u001d\u001f\u0003!\u0019\b/\u0019:l\u000b:4X#\u0001 \u0011\u0005\u0011z\u0014B\u0001!&\u0005!\u0019\u0006/\u0019:l\u000b:4\u0018!C:qCJ\\WI\u001c<!\u0003\u001d\u0019Xm]:j_:,\u0012\u0001\u0012\t\u0003\u000b:k\u0011A\u0012\u0006\u0003\u000f\"\u000b1a]9m\u0015\tI%*A\u0003ta\u0006\u00148N\u0003\u0002L\u0019\u00061\u0011\r]1dQ\u0016T\u0011!T\u0001\u0004_J<\u0017BA(G\u00051\u0019\u0006/\u0019:l'\u0016\u001c8/[8o\u0003!\u0019Xm]:j_:\u0004\u0013\u0001\u0003:fC\u00124\u0015\u000e\\3\u0015\u0005M3\u0006cA#Uc%\u0011QK\u0012\u0002\b\t\u0006$\u0018m]3u\u0011\u00159\u0006\u00021\u0001Y\u0003\u0011\u0001\u0018\r\u001e5\u0011\u0005esV\"\u0001.\u000b\u0005mc\u0016A\u00014t\u0015\ti&*\u0001\u0004iC\u0012|w\u000e]\u0005\u0003?j\u0013A\u0001U1uQ\u0006iq-\u001a;G_Jl\u0017\r\u001e$jY\u0016$\"!\r2\t\u000b\rL\u0001\u0019\u00013\u0002\u000b1Lg.Z:\u0011\u0007\u0015T\u0017G\u0004\u0002gQ:\u0011AgZ\u0005\u0002?%\u0011\u0011NH\u0001\ba\u0006\u001c7.Y4f\u0013\tYGN\u0001\u0003MSN$(BA5\u001f\u000319W\r^*fa\u0006\u0014\u0018\r^8s)\t\tt\u000eC\u0003d\u0015\u0001\u0007A-\u0001\fhKR$u.\\1j]\u0012K'/Z2u_JLh*Y7f)\t\t$\u000fC\u0003X\u0017\u0001\u0007\u0001,\u0001\thKR\u001c6\r[3nCB\u000bG\u000f^3s]R\u0011\u0011'\u001e\u0005\u0006/2\u0001\r\u0001W\u0001\u001aGJ,\u0017\r^3ECR\fgI]1nK^KG\u000f\u001b$pe6\fG\u000fF\u0004y\u0003\u001b\ty!a\u0005\u0011\u0007e\f9AD\u0002{\u0003\u000bq1a_A\u0002\u001d\ra\u0018\u0011\u0001\b\u0003{~t!\u0001\u000e@\n\u00035K!a\u0013'\n\u0005%S\u0015BA$I\u0013\tIg)\u0003\u0003\u0002\n\u0005-!!\u0003#bi\u00064%/Y7f\u0015\tIg\tC\u0003d\u001b\u0001\u0007A\r\u0003\u0004\u0002\u00125\u0001\r!M\u0001\tI\u0006$\u0018\rU1uQ\"9\u0011QC\u0007A\u0002\u0005]\u0011A\u00025fC\u0012,'\u000fE\u0002\u001e\u00033I1!a\u0007\u001f\u0005\u001d\u0011un\u001c7fC:$B\"a\b\u00022\u0005U\u0012\u0011HA\u001e\u0003\u007f\u0001b!!\t\u0002(\u0005-RBAA\u0012\u0015\r\t)CH\u0001\u0005kRLG.\u0003\u0003\u0002*\u0005\r\"a\u0001+ssB\u0019Q$!\f\n\u0007\u0005=bD\u0001\u0003V]&$\bBBA\u001a\u001d\u0001\u0007\u0011'\u0001\u0006e_6\f\u0017N\u001c(b[\u0016Da!a\u000e\u000f\u0001\u0004\t\u0014AC:dQ\u0016l\u0017MT1nK\"1\u0011\u0011\u0003\bA\u0002EBa!!\u0010\u000f\u0001\u0004\t\u0014\u0001C:bm\u0016\u0004\u0016\r\u001e5\t\u000f\u0005Ua\u00021\u0001\u0002\u0018\u0001")
public class InferSchemaJob {
    private final Settings settings;
    private final SparkEnv sparkEnv;
    private final SparkSession session;

    public String name() {
        return "InferSchema";
    }

    private SparkEnv sparkEnv() {
        return this.sparkEnv;
    }

    private SparkSession session() {
        return this.session;
    }

    public Dataset<String> readFile(Path path) {
        return this.session().read().textFile(path.toString());
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    public String getFormatFile(List<String> lines) {
        String firstLine = (String)lines.head();
        String lastLine = (String)lines.last();
        Regex jsonRegexStart = new StringOps(Predef$.MODULE$.augmentString("\\{.*")).r();
        Regex jsonArrayRegexStart = new StringOps(Predef$.MODULE$.augmentString("\\[.*")).r();
        Regex jsonRegexEnd = new StringOps(Predef$.MODULE$.augmentString(".*\\}")).r();
        Regex jsonArrayRegexEnd = new StringOps(Predef$.MODULE$.augmentString(".*\\]")).r();
        Tuple2 tuple2 = new Tuple2((Object)firstLine, (Object)lastLine);
        if (tuple2 != null) {
            Option option;
            String string = (String)tuple2._1();
            String string2 = (String)tuple2._2();
            Option option2 = jsonRegexStart.unapplySeq((CharSequence)string);
            if (!option2.isEmpty() && option2.get() != null && ((LinearSeqOptimized)option2.get()).lengthCompare(0) == 0 && !(option = jsonRegexEnd.unapplySeq((CharSequence)string2)).isEmpty() && option.get() != null && ((LinearSeqOptimized)option.get()).lengthCompare(0) == 0) {
                return "JSON";
            }
        }
        if (tuple2 == null) return "DSV";
        String string = (String)tuple2._1();
        String string3 = (String)tuple2._2();
        Option option = jsonArrayRegexStart.unapplySeq((CharSequence)string);
        if (option.isEmpty()) return "DSV";
        if (option.get() == null) return "DSV";
        if (((LinearSeqOptimized)option.get()).lengthCompare(0) != 0) return "DSV";
        Option option3 = jsonArrayRegexEnd.unapplySeq((CharSequence)string3);
        if (option3.isEmpty()) return "DSV";
        if (option3.get() == null) return "DSV";
        if (((LinearSeqOptimized)option3.get()).lengthCompare(0) != 0) return "DSV";
        return "ARRAY_JSON";
    }

    public String getSeparator(List<String> lines) {
        String firstLine = (String)lines.head();
        Tuple2 separator = (Tuple2)new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofChar(Predef$.MODULE$.charArrayOps(firstLine.replaceAll("[A-Za-z0-9 \"'()@?!\u00e9\u00e8\u00ee\u00e0\u00c0\u00c9\u00c8\u00e7+]", "").toCharArray())).map((Function1 & Serializable & scala.Serializable)x$1 -> InferSchemaJob.$anonfun$getSeparator$1(BoxesRunTime.unboxToChar((Object)x$1)), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class))))).groupBy((Function1 & Serializable & scala.Serializable)x$2 -> BoxesRunTime.boxToCharacter((char)x$2._1$mcC$sp())).mapValues((Function1 & Serializable & scala.Serializable)x$3 -> BoxesRunTime.boxToInteger((int)InferSchemaJob.$anonfun$getSeparator$3(x$3))).toList().maxBy((Function1 & Serializable & scala.Serializable)x$4 -> BoxesRunTime.boxToInteger((int)x$4._2$mcI$sp()), (Ordering)Ordering.Int$.MODULE$);
        return ((Object)BoxesRunTime.boxToCharacter((char)separator._1$mcC$sp())).toString();
    }

    public String getDomainDirectoryName(Path path) {
        return path.toString().replace(path.getName(), "");
    }

    public String getSchemaPattern(Path path) {
        return path.getName();
    }

    public Dataset<Row> createDataFrameWithFormat(List<String> lines, String dataPath, boolean header) {
        Dataset dataset;
        String formatFile = this.getFormatFile(lines);
        String string = formatFile;
        if ("ARRAY_JSON".equals(string)) {
            RDD jsonRDD = this.session().sparkContext().wholeTextFiles(dataPath, this.session().sparkContext().wholeTextFiles$default$2()).map((Function1 & Serializable & scala.Serializable)x0$1 -> {
                String content;
                Tuple2 tuple2 = x0$1;
                if (tuple2 == null) {
                    throw new MatchError((Object)tuple2);
                }
                String string = content = (String)tuple2._2();
                return string;
            }, ClassTag$.MODULE$.apply(String.class));
            dataset = this.session().read().option("inferSchema", true).json(this.session().createDataset(jsonRDD, Encoders$.MODULE$.STRING()));
        } else if ("JSON".equals(string)) {
            dataset = this.session().read().format("json").option("inferSchema", true).load(dataPath);
        } else if ("DSV".equals(string)) {
            dataset = this.session().read().format("com.databricks.spark.csv").option("header", header).option("inferSchema", true).option("delimiter", this.getSeparator(lines)).option("parserLib", "UNIVOCITY").load(dataPath);
        } else {
            throw new MatchError((Object)string);
        }
        return dataset;
    }

    public Try<BoxedUnit> infer(String domainName, String schemaName, String dataPath, String savePath, boolean header) {
        return Try$.MODULE$.apply((Function0)(JFunction0.mcV.sp & Serializable & scala.Serializable)() -> {
            String format;
            Path path = new Path(dataPath);
            List lines = (List)((TraversableLike)Source$.MODULE$.fromFile(path.toString(), Codec$.MODULE$.fallbackSystemCodec()).getLines().toList().map((Function1 & Serializable & scala.Serializable)x$5 -> x$5.trim(), List$.MODULE$.canBuildFrom())).filter((Function1 & Serializable & scala.Serializable)x$6 -> BoxesRunTime.boxToBoolean((boolean)InferSchemaJob.$anonfun$infer$3(x$6)));
            Dataset<Row> dataframeWithFormat = this.createDataFrameWithFormat((List<String>)lines, dataPath, header);
            String string = format = this.getFormatFile((List<String>)lines);
            String string2 = "ARRAY_JSON";
            boolean array = !(string != null ? !string.equals(string2) : string2 != null);
            boolean withHeader = header;
            String separator = this.getSeparator((List<String>)lines);
            InferSchemaHandler$ inferSchema = InferSchemaHandler$.MODULE$;
            List<Attribute> attributes = inferSchema.createAttributes(dataframeWithFormat.schema(), $this.settings);
            Metadata metadata = inferSchema.createMetaData(format, (Option<Object>)Option$.MODULE$.apply((Object)BoxesRunTime.boxToBoolean((boolean)array)), (Option<Object>)Option$.MODULE$.apply((Object)BoxesRunTime.boxToBoolean((boolean)withHeader)), (Option<String>)Option$.MODULE$.apply((Object)separator));
            Schema schema = inferSchema.createSchema(schemaName, Pattern.compile(this.getSchemaPattern(path)), attributes, (Option<Metadata>)new Some((Object)metadata));
            String x$1 = domainName;
            String x$2 = this.getDomainDirectoryName(path);
            .colon.colon x$3 = new .colon.colon((Object)schema, (List)Nil$.MODULE$);
            Option<Metadata> x$4 = inferSchema.createDomain$default$3();
            Domain domain = inferSchema.createDomain(x$1, x$2, x$4, (List<Schema>)x$3);
            inferSchema.generateYaml(domain, savePath, $this.settings);
        });
    }

    public static final /* synthetic */ Tuple2 $anonfun$getSeparator$1(char x$1) {
        return new Tuple2.mcCI.sp(x$1, 1);
    }

    public static final /* synthetic */ int $anonfun$getSeparator$3(Tuple2[] x$3) {
        return new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])x$3)).size();
    }

    public static final /* synthetic */ boolean $anonfun$infer$3(String x$6) {
        return new StringOps(Predef$.MODULE$.augmentString(x$6)).nonEmpty();
    }

    public InferSchemaJob(Settings settings) {
        this.settings = settings;
        this.sparkEnv = new SparkEnv(this.name(), settings);
        this.session = this.sparkEnv().session();
    }
}

