/*
 * Decompiled with CFR 0.152.
 */
package net.sansa_stack.rdf.spark.io;

import net.sansa_stack.rdf.spark.io.NTripleReader$;
import net.sansa_stack.rdf.spark.io.ParquetTransformer$;
import net.sansa_stack.rdf.spark.io.SQLSchema;
import net.sansa_stack.rdf.spark.io.SQLSchemaDefault$;
import org.apache.jena.graph.Triple;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Row$;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType;
import scala.Function1;
import scala.Predef$;
import scala.Serializable;
import scala.StringContext;
import scala.collection.Seq;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.StringBuilder;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;

@ScalaSignature(bytes="\u0006\u0001\u0005]a\u0001B\u0001\u0003\u00015\u0011!\u0003U1scV,G\u000f\u0016:b]N4wN]7fe*\u00111\u0001B\u0001\u0003S>T!!\u0002\u0004\u0002\u000bM\u0004\u0018M]6\u000b\u0005\u001dA\u0011a\u0001:eM*\u0011\u0011BC\u0001\fg\u0006t7/Y0ti\u0006\u001c7NC\u0001\f\u0003\rqW\r^\u0002\u0001'\t\u0001a\u0002\u0005\u0002\u0010%5\t\u0001CC\u0001\u0012\u0003\u0015\u00198-\u00197b\u0013\t\u0019\u0002C\u0001\u0004B]f\u0014VM\u001a\u0005\t+\u0001\u0011)\u0019!C\u0001-\u000591/Z:tS>tW#A\f\u0011\u0005a\u0001S\"A\r\u000b\u0005iY\u0012aA:rY*\u0011Q\u0001\b\u0006\u0003;y\ta!\u00199bG\",'\"A\u0010\u0002\u0007=\u0014x-\u0003\u0002\"3\ta1\u000b]1sWN+7o]5p]\"A1\u0005\u0001B\u0001B\u0003%q#\u0001\u0005tKN\u001c\u0018n\u001c8!\u0011\u0015)\u0003\u0001\"\u0001'\u0003\u0019a\u0014N\\5u}Q\u0011q%\u000b\t\u0003Q\u0001i\u0011A\u0001\u0005\u0006+\u0011\u0002\ra\u0006\u0005\u0006W\u0001!\t\u0001L\u0001\u0010Kb\u0004xN\u001d;BgB\u000b'/];fiR\u0019Q\u0006M\u001d\u0011\u0005=q\u0013BA\u0018\u0011\u0005\u0011)f.\u001b;\t\u000bER\u0003\u0019\u0001\u001a\u0002\u0013%t\u0007/\u001e;QCRD\u0007CA\u001a7\u001d\tyA'\u0003\u00026!\u00051\u0001K]3eK\u001aL!a\u000e\u001d\u0003\rM#(/\u001b8h\u0015\t)\u0004\u0003C\u0003;U\u0001\u0007!'\u0001\u0006pkR\u0004X\u000f\u001e)bi\"DQ\u0001\u0010\u0001\u0005\u0002u\n\u0011\u0002\u001e:b]N4wN]7\u0015\u00075rt\bC\u00032w\u0001\u0007!\u0007C\u0003;w\u0001\u0007!\u0007C\u0003B\u0001\u0011\u0005!)\u0001\u0007ue&\u0004H.Z:UC\ndW\rF\u0001.\u0011\u001d!\u0005A1A\u0005\u0002\u0015\u000bQA]3hKb,\u0012A\u0012\t\u0003\u000f2k\u0011\u0001\u0013\u0006\u0003\u0013*\u000bA\u0001\\1oO*\t1*\u0001\u0003kCZ\f\u0017BA\u001cI\u0011\u0019q\u0005\u0001)A\u0005\r\u00061!/Z4fq\u0002BQ\u0001\u0015\u0001\u0005\u0002E\u000bac\u0019:fCR,GK]5qY\u0016\u001cH+\u00192mK\"Kg/\u001a\u000b\u0004%\u0012L\u0007CA*b\u001d\t!vL\u0004\u0002V=:\u0011a+\u0018\b\u0003/rs!\u0001W.\u000e\u0003eS!A\u0017\u0007\u0002\rq\u0012xn\u001c;?\u0013\u0005y\u0012BA\u000f\u001f\u0013\t)A$\u0003\u0002\u001b7%\u0011\u0001-G\u0001\ba\u0006\u001c7.Y4f\u0013\t\u00117MA\u0005ECR\fgI]1nK*\u0011\u0001-\u0007\u0005\bK>\u0003\n\u00111\u0001g\u0003\u0019\u00198\r[3nCB\u0011\u0001fZ\u0005\u0003Q\n\u0011\u0011bU)M'\u000eDW-\\1\t\u000b)|\u0005\u0019\u0001\u001a\u0002\u001dQ\f'\r\\3ESJ,7\r^8ss\"9A\u000eAI\u0001\n\u0003i\u0017\u0001I2sK\u0006$X\r\u0016:ja2,7\u000fV1cY\u0016D\u0015N^3%I\u00164\u0017-\u001e7uIE*\u0012A\u001c\u0016\u0003M>\\\u0013\u0001\u001d\t\u0003cZl\u0011A\u001d\u0006\u0003gR\f\u0011\"\u001e8dQ\u0016\u001c7.\u001a3\u000b\u0005U\u0004\u0012AC1o]>$\u0018\r^5p]&\u0011qO\u001d\u0002\u0012k:\u001c\u0007.Z2lK\u00124\u0016M]5b]\u000e,w!B=\u0003\u0011\u0003Q\u0018A\u0005)beF,X\r\u001e+sC:\u001chm\u001c:nKJ\u0004\"\u0001K>\u0007\u000b\u0005\u0011\u0001\u0012\u0001?\u0014\u0005mt\u0001\"B\u0013|\t\u0003qH#\u0001>\t\u000f\u0005\u00051\u0010\"\u0001\u0002\u0004\u0005)\u0011\r\u001d9msR\u0019q%!\u0002\t\u000bUy\b\u0019A\f\t\u000f\u0005%1\u0010\"\u0001\u0002\f\u0005!Q.Y5o)\ri\u0013Q\u0002\u0005\t\u0003\u001f\t9\u00011\u0001\u0002\u0012\u0005!\u0011M]4t!\u0011y\u00111\u0003\u001a\n\u0007\u0005U\u0001CA\u0003BeJ\f\u0017\u0010")
public class ParquetTransformer {
    private final SparkSession session;
    private final String regex;

    public static void main(String[] stringArray) {
        ParquetTransformer$.MODULE$.main(stringArray);
    }

    public static ParquetTransformer apply(SparkSession sparkSession) {
        return ParquetTransformer$.MODULE$.apply(sparkSession);
    }

    public SparkSession session() {
        return this.session;
    }

    public void exportAsParquet(String inputPath, String outputPath) {
        StructType schema = new StructType().add(new StructField("s", (DataType)StringType$.MODULE$, false, StructField$.MODULE$.apply$default$4())).add(new StructField("p", (DataType)StringType$.MODULE$, false, StructField$.MODULE$.apply$default$4())).add(new StructField("o", (DataType)StringType$.MODULE$, false, StructField$.MODULE$.apply$default$4()));
        RDD<Triple> rdd = NTripleReader$.MODULE$.load(this.session(), inputPath, NTripleReader$.MODULE$.load$default$3(), NTripleReader$.MODULE$.load$default$4(), NTripleReader$.MODULE$.load$default$5(), NTripleReader$.MODULE$.load$default$6());
        Dataset df = this.session().createDataFrame(rdd.map((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Row apply(Triple t) {
                return Row$.MODULE$.apply((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{t.getSubject().toString(), t.getPredicate().toString(), t.getObject().toString()}));
            }
        }, ClassTag$.MODULE$.apply(Row.class)), schema);
        df.sort("s", (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).write().mode(SaveMode.Overwrite).partitionBy((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"p"})).parquet(outputPath);
    }

    public void transform(String inputPath, String outputPath) {
        String x$1 = inputPath;
        SQLSchema x$2 = this.createTriplesTableHive$default$1();
        Dataset<Row> triplesTable = this.createTriplesTableHive(x$2, x$1);
        triplesTable.printSchema();
        Dataset tmp = this.session().sql("SELECT * FROM triples");
        Predef$.MODULE$.println((Object)new StringBuilder().append((Object)"#triples: ").append((Object)BoxesRunTime.boxToLong((long)tmp.count())).toString());
        tmp.show(10);
        tmp.repartition(4).write().mode(SaveMode.Overwrite).format("parquet").saveAsTable("ntriples");
    }

    public void triplesTable() {
    }

    public String regex() {
        return this.regex;
    }

    public Dataset<Row> createTriplesTableHive(SQLSchema schema, String tableDirectory) {
        return this.session().sql(new StringOps(Predef$.MODULE$.augmentString(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"\n         |CREATE EXTERNAL TABLE IF NOT EXISTS ", "\n         |(", " STRING, ", " STRING, ", " STRING)\n         |ROW FORMAT  SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'\n         |WITH SERDEPROPERTIES(\n         |\"input.regex\" = '", "'\n         |)\n         |LOCATION '", "'\n      "})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{schema.triplesTable(), schema.subjectCol(), schema.predicateCol(), schema.objectCol(), this.regex(), tableDirectory})))).stripMargin());
    }

    public SQLSchema createTriplesTableHive$default$1() {
        return SQLSchemaDefault$.MODULE$;
    }

    public ParquetTransformer(SparkSession session) {
        this.session = session;
        this.regex = "(\\\\S+)\\\\s+(\\\\S+)\\\\s+(.*)";
    }
}

