/*
 * Decompiled with CFR 0.152.
 */
package ai.h2o.sparkling.examples;

import ai.h2o.sparkling.H2OContext$;
import ai.h2o.sparkling.ml.algos.H2OGBM;
import ai.h2o.sparkling.ml.params.H2OCommonParams;
import ai.h2o.sparkling.ml.params.H2OGBMParams;
import java.io.File;
import java.io.Serializable;
import org.apache.spark.ml.Pipeline;
import org.apache.spark.ml.PipelineModel;
import org.apache.spark.ml.PipelineStage;
import org.apache.spark.ml.feature.RegexTokenizer;
import org.apache.spark.ml.feature.StopWordsRemover;
import org.apache.spark.ml.feature.Word2Vec;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Row$;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType;
import scala.Array$;
import scala.Function0;
import scala.Function1;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.GenIterable;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Map;
import scala.collection.mutable.ArrayOps;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;

public final class CraigslistJobTitlesApp$ {
    public static CraigslistJobTitlesApp$ MODULE$;

    static {
        new CraigslistJobTitlesApp$();
    }

    public void main(String[] args) {
        SparkSession spark = SparkSession$.MODULE$.builder().appName("Craigslist Job Titles").getOrCreate();
        Dataset<Row> titlesTable = this.loadTitlesTable(spark);
        PipelineModel model = this.fitModelPipeline(titlesTable);
        this.show(this.predictAndAssert(spark, "school teacher having holidays every month", model, "education"));
        this.show(this.predictAndAssert(spark, "Financial accountant CPA preferred", model, "accounting"));
    }

    public Dataset<Row> loadTitlesTable(SparkSession spark) {
        String titlesDataPath = "./examples/smalldata/craigslistJobTitles.csv";
        String titlesDataFile = new StringBuilder(7).append("file://").append(new File(titlesDataPath).getAbsolutePath()).toString();
        return spark.read().option("inferSchema", "true").option("header", "true").csv(titlesDataFile);
    }

    public PipelineModel fitModelPipeline(Dataset<Row> train) {
        RegexTokenizer tokenizer = ((RegexTokenizer)new RegexTokenizer().setInputCol("jobtitle").setOutputCol("tokenized")).setMinTokenLength(2).setGaps(false).setPattern("[a-zA-Z]+");
        StopWordsRemover stopWordsRemover = new StopWordsRemover().setInputCol(tokenizer.getOutputCol()).setOutputCol("jobtitles_tokenized").setCaseSensitive(false);
        Word2Vec word2Vec = new Word2Vec().setInputCol(stopWordsRemover.getOutputCol()).setOutputCol("word2vec");
        H2OContext$.MODULE$.getOrCreate();
        H2OGBM gbm = (H2OGBM)((H2OGBMParams)((H2OCommonParams)((H2OGBMParams)((H2OCommonParams)((H2OGBMParams)new H2OGBM().setFeaturesCol(word2Vec.getOutputCol())).setNtrees(50)).setSplitRatio(0.8)).setMaxDepth(6).setDistribution("AUTO")).setColumnsToCategorical("category", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0]))).setLabelCol("category");
        Pipeline pipeline = new Pipeline().setStages((PipelineStage[])((Object[])new PipelineStage[]{tokenizer, stopWordsRemover, word2Vec, gbm}));
        return pipeline.fit(train);
    }

    public Tuple2<String, Map<String, Object>> predictAndAssert(SparkSession spark, String jobTitle, PipelineModel model, String expected) {
        Tuple2<String, Map<String, Object>> prediction = this.predict(spark, jobTitle, model);
        Object object = prediction._1();
        String string = expected;
        Predef$.MODULE$.assert(!(object != null ? !object.equals(string) : string != null), (Function0 & Serializable & scala.Serializable)() -> new StringBuilder(40).append("Expected category was: ").append(expected).append(", but predicted: ").append(prediction._1()).toString());
        return prediction;
    }

    public Tuple2<String, Map<String, Object>> predict(SparkSession spark, String jobTitle, PipelineModel model) {
        StructType titleSchema = new StructType((StructField[])((Object[])new StructField[]{new StructField("jobtitle", (DataType)StringType$.MODULE$, false, StructField$.MODULE$.apply$default$4())}));
        RDD titleRDD = spark.sparkContext().parallelize((Seq)Seq$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{jobTitle})), spark.sparkContext().parallelize$default$2(), ClassTag$.MODULE$.apply(String.class)).map((Function1 & Serializable & scala.Serializable)x$1 -> Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{x$1})), ClassTag$.MODULE$.apply(Row.class));
        Dataset titleDF = spark.createDataFrame(titleRDD, titleSchema);
        Dataset prediction = model.transform(titleDF);
        String predictedCategory = ((Row)prediction.select("prediction", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).head()).getString(0);
        Dataset probabilitiesDF = prediction.select("detailed_prediction.probabilities.*", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0]));
        String[] probabilityNames = (String[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])probabilitiesDF.schema().fields())).map((Function1 & Serializable & scala.Serializable)x$2 -> x$2.name(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)));
        Seq probabilityValues = (Seq)((Row)probabilitiesDF.head()).toSeq().map((Function1 & Serializable & scala.Serializable)x$3 -> BoxesRunTime.boxToDouble((double)CraigslistJobTitlesApp$.$anonfun$predict$3(x$3)), Seq$.MODULE$.canBuildFrom());
        Map probabilities = new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])probabilityNames)).zip((GenIterable)probabilityValues, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class))))).toMap(Predef$.MODULE$.$conforms());
        return new Tuple2((Object)predictedCategory, (Object)probabilities);
    }

    public void show(Tuple2<String, Map<String, Object>> pred) {
        Predef$.MODULE$.println((Object)new StringBuilder(2).append((String)pred._1()).append(": ").append(((TraversableOnce)pred._2()).mkString("\n[", "\n ", "]\n")).toString());
    }

    public static final /* synthetic */ double $anonfun$predict$3(Object x$3) {
        return BoxesRunTime.unboxToDouble((Object)x$3);
    }

    private CraigslistJobTitlesApp$() {
        MODULE$ = this;
    }
}

