/*
 * Decompiled with CFR 0.152.
 */
package com.linkedin.feathr.offline.source.dataloader;

import com.linkedin.feathr.common.exception.ErrorLabel;
import com.linkedin.feathr.common.exception.FeathrInputDataException;
import com.linkedin.feathr.offline.config.location.DataLocation;
import com.linkedin.feathr.offline.generation.SparkIOUtils$;
import com.linkedin.feathr.offline.job.DataSourceUtils$;
import com.linkedin.feathr.offline.job.LocalFeatureJoinJob$;
import com.linkedin.feathr.offline.source.dataloader.DataLoader;
import com.linkedin.feathr.offline.source.dataloader.DataLoaderHandler;
import com.linkedin.feathr.offline.util.DelimiterUtils$;
import com.linkedin.feathr.offline.util.FeathrUtils$;
import java.io.Serializable;
import org.apache.avro.Schema;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.logging.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.internal.SQLConf$;
import scala.Function0;
import scala.Function1;
import scala.None$;
import scala.Option;
import scala.Predef;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.collection.Seq;
import scala.collection.immutable.List;
import scala.collection.immutable.Map;
import scala.collection.immutable.MapLike;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;
import scala.runtime.java8.JFunction0;
import scala.util.control.Breaks$;

@ScalaSignature(bytes="\u0006\u0001\u0005]a!\u0002\u0007\u000e\u0001EI\u0002\u0002\u0003\u0013\u0001\u0005\u0003\u0005\u000b\u0011\u0002\u0014\t\u0011I\u0002!\u0011!Q\u0001\nMB\u0001B\u000f\u0001\u0003\u0002\u0003\u0006Ia\u000f\u0005\u0006\u0015\u0002!\ta\u0013\u0005\b!\u0002\u0011\r\u0011\"\u0001R\u0011\u0019)\u0006\u0001)A\u0005%\"9a\u000b\u0001b\u0001\n\u0003\t\u0006BB,\u0001A\u0003%!\u000bC\u0003Y\u0001\u0011\u0005\u0013\fC\u0003a\u0001\u0011\u0005\u0013\rC\u0003q\u0001\u0011\u0005\u0011OA\bCCR\u001c\u0007\u000eR1uC2{\u0017\rZ3s\u0015\tqq\"\u0001\u0006eCR\fGn\\1eKJT!\u0001E\t\u0002\rM|WO]2f\u0015\t\u00112#A\u0004pM\u001ad\u0017N\\3\u000b\u0005Q)\u0012A\u00024fCRD'O\u0003\u0002\u0017/\u0005AA.\u001b8lK\u0012LgNC\u0001\u0019\u0003\r\u0019w.\\\n\u0004\u0001i\u0001\u0003CA\u000e\u001f\u001b\u0005a\"\"A\u000f\u0002\u000bM\u001c\u0017\r\\1\n\u0005}a\"AB!osJ+g\r\u0005\u0002\"E5\tQ\"\u0003\u0002$\u001b\tQA)\u0019;b\u0019>\fG-\u001a:\u0002\u0005M\u001c8\u0001\u0001\t\u0003OAj\u0011\u0001\u000b\u0006\u0003S)\n1a]9m\u0015\tYC&A\u0003ta\u0006\u00148N\u0003\u0002.]\u00051\u0011\r]1dQ\u0016T\u0011aL\u0001\u0004_J<\u0017BA\u0019)\u00051\u0019\u0006/\u0019:l'\u0016\u001c8/[8o\u0003!awnY1uS>t\u0007C\u0001\u001b9\u001b\u0005)$B\u0001\u001a7\u0015\t9\u0014#\u0001\u0004d_:4\u0017nZ\u0005\u0003sU\u0012A\u0002R1uC2{7-\u0019;j_:\f!\u0003Z1uC2{\u0017\rZ3s\u0011\u0006tG\r\\3sgB\u0019A\bR$\u000f\u0005u\u0012eB\u0001 B\u001b\u0005y$B\u0001!&\u0003\u0019a$o\\8u}%\tQ$\u0003\u0002D9\u00059\u0001/Y2lC\u001e,\u0017BA#G\u0005\u0011a\u0015n\u001d;\u000b\u0005\rc\u0002CA\u0011I\u0013\tIUBA\tECR\fGj\\1eKJD\u0015M\u001c3mKJ\fa\u0001P5oSRtD\u0003\u0002'N\u001d>\u0003\"!\t\u0001\t\u000b\u0011\"\u0001\u0019\u0001\u0014\t\u000bI\"\u0001\u0019A\u001a\t\u000bi\"\u0001\u0019A\u001e\u0002\u001bI,GO]=XC&$H+[7f+\u0005\u0011\u0006CA\u000eT\u0013\t!FDA\u0002J]R\faB]3uef<\u0016-\u001b;US6,\u0007%A\nj]&$\u0018.\u00197Ok6|eMU3ue&,7/\u0001\u000bj]&$\u0018.\u00197Ok6|eMU3ue&,7\u000fI\u0001\u000bY>\fGmU2iK6\fG#\u0001.\u0011\u0005msV\"\u0001/\u000b\u0005uc\u0013\u0001B1we>L!a\u0018/\u0003\rM\u001b\u0007.Z7b\u00035aw.\u00193ECR\fgI]1nKR\t!\r\u0005\u0002d[:\u0011A\r\u001c\b\u0003K.t!A\u001a6\u000f\u0005\u001dLgB\u0001 i\u0013\u0005y\u0013BA\u0017/\u0013\tYC&\u0003\u0002*U%\u00111\tK\u0005\u0003]>\u0014\u0011\u0002R1uC\u001a\u0013\u0018-\\3\u000b\u0005\rC\u0013A\u00067pC\u0012$\u0015\r^1Ge\u0006lWmV5uQJ+GO]=\u0015\u000b\t\u0014x0a\u0005\t\u000bM\\\u0001\u0019\u0001;\u0002!\u0011\fG/Y%P!\u0006\u0014\u0018-\\3uKJ\u001c\b\u0003B;zyrt!A^<\u0011\u0005yb\u0012B\u0001=\u001d\u0003\u0019\u0001&/\u001a3fM&\u0011!p\u001f\u0002\u0004\u001b\u0006\u0004(B\u0001=\u001d!\t)X0\u0003\u0002\u007fw\n11\u000b\u001e:j]\u001eDq!!\u0001\f\u0001\u0004\t\u0019!A\u0004k_\n\u001cuN\u001c4\u0011\t\u0005\u0015\u0011qB\u0007\u0003\u0003\u000fQA!!\u0003\u0002\f\u00051Q.\u00199sK\u0012T1!!\u0004-\u0003\u0019A\u0017\rZ8pa&!\u0011\u0011CA\u0004\u0005\u001dQuNY\"p]\u001aDa!!\u0006\f\u0001\u0004\u0011\u0016!\u0002:fiJL\b")
public class BatchDataLoader
implements DataLoader {
    private final SparkSession ss;
    private final DataLocation location;
    private final List<DataLoaderHandler> dataLoaderHandlers;
    private final int retryWaitTime;
    private final int initialNumOfRetries;
    private transient Logger log;
    private volatile transient boolean bitmap$trans$0;

    private Logger log$lzycompute() {
        BatchDataLoader batchDataLoader = this;
        synchronized (batchDataLoader) {
            if (!this.bitmap$trans$0) {
                this.log = DataLoader.log$(this);
                this.bitmap$trans$0 = true;
            }
        }
        return this.log;
    }

    @Override
    public Logger log() {
        return !this.bitmap$trans$0 ? this.log$lzycompute() : this.log;
    }

    public int retryWaitTime() {
        return this.retryWaitTime;
    }

    public int initialNumOfRetries() {
        return this.initialNumOfRetries;
    }

    @Override
    public Schema loadSchema() {
        Configuration conf = this.ss.sparkContext().hadoopConfiguration();
        FileSystem fs = FileSystem.get((Configuration)conf);
        FileStatus[] status = fs.listStatus(new Path(this.location.getPath()));
        FileStatus[] avroFiles = (FileStatus[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])status)).filter((Function1 & Serializable & scala.Serializable)x$1 -> BoxesRunTime.boxToBoolean((boolean)BatchDataLoader.$anonfun$loadSchema$1(x$1)));
        if (avroFiles.length == 0) {
            throw new FeathrInputDataException(ErrorLabel.FEATHR_USER_ERROR, new StringBuilder(75).append("Load the Avro schema for Avro data set in HDFS but no avro files found in ").append(this.location.getPath()).append(".").toString());
        }
        String dataFileName = avroFiles[0].getPath().getName();
        String dataFilePath = new Path(this.location.getPath(), dataFileName).toString();
        return DataSourceUtils$.MODULE$.getSchemaFromAvroDataFile(dataFilePath, new JobConf(conf));
    }

    @Override
    public Dataset<Row> loadDataFrame() {
        int retry = new StringOps(Predef$.MODULE$.augmentString(FeathrUtils$.MODULE$.getFeathrJobParam(this.ss.sparkContext().getConf(), FeathrUtils$.MODULE$.MAX_DATA_LOAD_RETRY()))).toInt();
        int retryCount = this.ss.sparkContext().isLocal() ? BoxesRunTime.unboxToInt((Object)SQLConf$.MODULE$.get().getConf(LocalFeatureJoinJob$.MODULE$.MAX_DATA_LOAD_RETRY())) : retry;
        return this.loadDataFrameWithRetry((Map<String, String>)((Map)Predef$.MODULE$.Map().apply((Seq)Nil$.MODULE$)), new JobConf(this.ss.sparkContext().hadoopConfiguration()), retryCount);
    }

    public Dataset<Row> loadDataFrameWithRetry(Map<String, String> dataIOParameters, JobConf jobConf, int retry) {
        Dataset<Row> dataset;
        block7: {
            SparkConf sparkConf = this.ss.sparkContext().getConf();
            String inputSplitSize = sparkConf.get("spark.feathr.input.split.size", "");
            Map dataIOParametersWithSplitSize = ((MapLike)Predef$.MODULE$.Map().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Tuple2[]{Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)SparkIOUtils$.MODULE$.SPLIT_SIZE()), (Object)inputSplitSize)}))).$plus$plus(dataIOParameters);
            String dataPath = this.location.getPath();
            this.log().info(new StringBuilder(40).append("Loading ").append(this.location).append(" as DataFrame, using parameters ").append(dataIOParametersWithSplitSize).toString());
            String csvDelimiterOption = DelimiterUtils$.MODULE$.checkDelimiterOption(this.ss.sqlContext().getConf("spark.feathr.inputFormat.csvOptions.sep", ","));
            try {
                Dataset dataset2;
                ObjectRef dfOpt = ObjectRef.create((Object)None$.MODULE$);
                Breaks$.MODULE$.breakable((Function0)(JFunction0.mcV.sp & Serializable & scala.Serializable)() -> $this.dataLoaderHandlers.foreach((Function1 & Serializable & scala.Serializable)dataLoaderHandler -> {
                    BatchDataLoader.$anonfun$loadDataFrameWithRetry$2(dataPath, dfOpt, dataIOParametersWithSplitSize, jobConf, dataLoaderHandler);
                    return BoxedUnit.UNIT;
                }));
                Option option = (Option)dfOpt.elem;
                if (option instanceof Some) {
                    Dataset df;
                    Some some = (Some)option;
                    dataset2 = df = (Dataset)some.value();
                } else {
                    dataset2 = this.location.loadDf(this.ss, (Map<String, String>)dataIOParametersWithSplitSize);
                }
                Dataset df = dataset2;
                dataset = df;
            }
            catch (Throwable throwable) {
                try {
                    dataset = this.ss.read().format("csv").option("header", "true").option("delimiter", csvDelimiterOption).load(dataPath);
                }
                catch (Exception e) {
                    this.log().info(new StringBuilder(40).append("Loading ").append(this.location).append(" failed, retrying for ").append(retry).append("-th time..").toString());
                    if (retry > 0) {
                        Thread.sleep(this.retryWaitTime());
                        dataset = this.loadDataFrameWithRetry(dataIOParameters, jobConf, retry - 1);
                        break block7;
                    }
                    throw new FeathrInputDataException(ErrorLabel.FEATHR_USER_ERROR, new StringBuilder(52).append("Failed to load ").append(dataPath).append(" after ").append(this.initialNumOfRetries()).append(" retries").append(" and retry time of ").append(this.retryWaitTime()).append("ms.").toString());
                }
            }
        }
        return dataset;
    }

    public static final /* synthetic */ boolean $anonfun$loadSchema$1(FileStatus x$1) {
        return x$1.getPath().getName().endsWith(".avro");
    }

    public static final /* synthetic */ void $anonfun$loadDataFrameWithRetry$2(String dataPath$1, ObjectRef dfOpt$1, Map dataIOParametersWithSplitSize$1, JobConf jobConf$1, DataLoaderHandler dataLoaderHandler) {
        Predef$.MODULE$.println((Object)new StringBuilder(27).append("Applying dataLoaderHandler ").append(dataLoaderHandler).toString());
        if (BoxesRunTime.unboxToBoolean((Object)dataLoaderHandler.validatePath().apply((Object)dataPath$1))) {
            dfOpt$1.elem = new Some(dataLoaderHandler.createDataFrame().apply((Object)dataPath$1, (Object)dataIOParametersWithSplitSize$1, (Object)jobConf$1));
            throw Breaks$.MODULE$.break();
        }
    }

    public BatchDataLoader(SparkSession ss, DataLocation location, List<DataLoaderHandler> dataLoaderHandlers) {
        this.ss = ss;
        this.location = location;
        this.dataLoaderHandlers = dataLoaderHandlers;
        DataLoader.$init$(this);
        this.retryWaitTime = new StringOps(Predef$.MODULE$.augmentString(FeathrUtils$.MODULE$.getFeathrJobParam(ss.sparkContext().getConf(), FeathrUtils$.MODULE$.DATA_LOAD_WAIT_IN_MS()))).toInt();
        this.initialNumOfRetries = !ss.sparkContext().isLocal() ? new StringOps(Predef$.MODULE$.augmentString(FeathrUtils$.MODULE$.getFeathrJobParam(ss.sparkContext().getConf(), FeathrUtils$.MODULE$.MAX_DATA_LOAD_RETRY()))).toInt() : BoxesRunTime.unboxToInt((Object)SQLConf$.MODULE$.get().getConf(LocalFeatureJoinJob$.MODULE$.MAX_DATA_LOAD_RETRY()));
    }
}

