/*
 * Decompiled with CFR 0.152.
 */
package ai.h2o.sparkling.examples;

import ai.h2o.sparkling.ml.algos.H2OGBM;
import ai.h2o.sparkling.ml.params.H2OAlgoSharedTreeParams;
import ai.h2o.sparkling.ml.params.H2OCommonSupervisedParams;
import java.io.File;
import org.apache.spark.h2o.H2OContext$;
import org.apache.spark.ml.Pipeline;
import org.apache.spark.ml.PipelineModel;
import org.apache.spark.ml.PipelineStage;
import org.apache.spark.ml.feature.RegexTokenizer;
import org.apache.spark.ml.feature.StopWordsRemover;
import org.apache.spark.ml.feature.Word2Vec;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Row$;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType;
import scala.Function0;
import scala.Function1;
import scala.Predef$;
import scala.Serializable;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Map;
import scala.collection.mutable.StringBuilder;
import scala.reflect.ClassTag$;

public final class CraigslistJobTitlesApp$ {
    public static final CraigslistJobTitlesApp$ MODULE$;

    static {
        new CraigslistJobTitlesApp$();
    }

    public void main(String[] args) {
        SparkSession spark = SparkSession$.MODULE$.builder().appName("Craigslist Job Titles").getOrCreate();
        Dataset<Row> titlesTable = this.loadTitlesTable(spark);
        PipelineModel model = this.fitModelPipeline(titlesTable);
        this.show(this.predictAndAssert(spark, "school teacher having holidays every month", model, "education"));
        this.show(this.predictAndAssert(spark, "Financial accountant CPA preferred", model, "accounting"));
    }

    public Dataset<Row> loadTitlesTable(SparkSession spark) {
        String titlesDataPath = "./examples/smalldata/craigslistJobTitles.csv";
        String titlesDataFile = new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"file://", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{new File(titlesDataPath).getAbsolutePath()}));
        return spark.read().option("inferSchema", "true").option("header", "true").csv(titlesDataFile);
    }

    public PipelineModel fitModelPipeline(Dataset<Row> train) {
        RegexTokenizer tokenizer = ((RegexTokenizer)new RegexTokenizer().setInputCol("jobtitle").setOutputCol("tokenized")).setMinTokenLength(2).setGaps(false).setPattern("[a-zA-Z]+");
        StopWordsRemover stopWordsRemover = new StopWordsRemover().setInputCol(tokenizer.getOutputCol()).setOutputCol("jobtitles_tokenized").setCaseSensitive(false);
        Word2Vec word2Vec = new Word2Vec().setInputCol(stopWordsRemover.getOutputCol()).setOutputCol("word2vec");
        H2OContext$.MODULE$.getOrCreate();
        H2OGBM gbm = (H2OGBM)((H2OCommonSupervisedParams)((H2OAlgoSharedTreeParams)((H2OAlgoSharedTreeParams)new H2OGBM().setFeaturesCol(word2Vec.getOutputCol())).setNtrees(50).setSplitRatio(0.8)).setMaxDepth(6).setDistribution("AUTO").setColumnsToCategorical("category", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0]))).setLabelCol("category").setWithDetailedPredictionCol(true);
        Pipeline pipeline = new Pipeline().setStages((PipelineStage[])((Object[])new PipelineStage[]{tokenizer, stopWordsRemover, word2Vec, gbm}));
        return pipeline.fit(train);
    }

    public Tuple2<String, Map<String, Object>> predictAndAssert(SparkSession spark, String jobTitle, PipelineModel model, String expected) {
        Tuple2<String, Map<String, Object>> prediction = this.predict(spark, jobTitle, model);
        Object object = prediction._1();
        String string = expected;
        Predef$.MODULE$.assert(!(object != null ? !object.equals(string) : string != null), (Function0)new Serializable(expected, prediction){
            public static final long serialVersionUID = 0L;
            private final String expected$1;
            private final Tuple2 prediction$1;

            public final String apply() {
                return new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"Expected category was: ", ", but predicted: ", ""})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.expected$1, this.prediction$1._1()}));
            }
            {
                this.expected$1 = expected$1;
                this.prediction$1 = prediction$1;
            }
        });
        return prediction;
    }

    public Tuple2<String, Map<String, Object>> predict(SparkSession spark, String jobTitle, PipelineModel model) {
        StructType titleSchema = new StructType((StructField[])((Object[])new StructField[]{new StructField("jobtitle", (DataType)StringType$.MODULE$, false, StructField$.MODULE$.apply$default$4())}));
        RDD titleRDD = spark.sparkContext().parallelize((Seq)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{jobTitle})), spark.sparkContext().parallelize$default$2(), ClassTag$.MODULE$.apply(String.class)).map((Function1)new Serializable(){
            public static final long serialVersionUID = 0L;

            public final Row apply(String x$1) {
                return Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{x$1}));
            }
        }, ClassTag$.MODULE$.apply(Row.class));
        Dataset titleDF = spark.createDataFrame(titleRDD, titleSchema);
        Dataset prediction = model.transform(titleDF);
        String predictedCategory = ((Row)prediction.select("prediction", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).head()).getString(0);
        Map probabilities = ((Row)prediction.select("detailed_prediction.probabilities", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).head()).getMap(0).toMap(Predef$.MODULE$.$conforms());
        return new Tuple2((Object)predictedCategory, (Object)probabilities);
    }

    public void show(Tuple2<String, Map<String, Object>> pred) {
        Predef$.MODULE$.println((Object)new StringBuilder().append((Object)((String)pred._1())).append((Object)": ").append((Object)((TraversableOnce)pred._2()).mkString("\n[", "\n ", "]\n")).toString());
    }

    private CraigslistJobTitlesApp$() {
        MODULE$ = this;
    }
}

