/*
 * Decompiled with CFR 0.152.
 */
package io.openlineage.spark.agent.lifecycle;

import io.openlineage.client.OpenLineage;
import io.openlineage.client.utils.DatasetIdentifier;
import io.openlineage.spark.agent.util.PathUtils;
import io.openlineage.spark.api.AbstractInputDatasetBuilder;
import io.openlineage.spark.api.OpenLineageContext;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;
import lombok.NonNull;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.spark.rdd.HadoopRDD;
import org.apache.spark.rdd.NewHadoopRDD;
import org.apache.spark.rdd.RDD;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class HadoopRDDInputDatasetBuilder
extends AbstractInputDatasetBuilder<RDD<?>> {
    private static final Logger log = LoggerFactory.getLogger(HadoopRDDInputDatasetBuilder.class);

    public HadoopRDDInputDatasetBuilder(@NonNull OpenLineageContext context) {
        super(context);
        if (context == null) {
            throw new NullPointerException("context is marked non-null but is null");
        }
    }

    @Override
    public boolean isDefinedAt(RDD<?> x) {
        return x instanceof HadoopRDD || x instanceof NewHadoopRDD;
    }

    public Collection<OpenLineage.InputDataset> apply(RDD<?> x) {
        return this.findInputs(x).stream().map(this::buildInputDataset).collect(Collectors.toList());
    }

    protected OpenLineage.InputDataset buildInputDataset(URI uri) {
        DatasetIdentifier di = PathUtils.fromURI(uri);
        return new OpenLineage.InputDatasetBuilder().name(di.getName()).namespace(di.getNamespace()).build();
    }

    protected List<URI> findInputs(RDD<?> rdd) {
        ArrayList<URI> result = new ArrayList<URI>();
        Path[] inputPaths = this.getInputPaths(rdd);
        if (inputPaths != null) {
            for (Path path : inputPaths) {
                result.add(this.getDatasetUri(path.toUri()));
            }
        }
        return result;
    }

    protected Path[] getInputPaths(RDD<?> rdd) {
        Path[] inputPaths = null;
        if (rdd instanceof HadoopRDD) {
            inputPaths = FileInputFormat.getInputPaths((JobConf)((HadoopRDD)rdd).getJobConf());
        } else if (rdd instanceof NewHadoopRDD) {
            try {
                inputPaths = org.apache.hadoop.mapreduce.lib.input.FileInputFormat.getInputPaths((JobContext)new Job(((NewHadoopRDD)rdd).getConf()));
            }
            catch (IOException e) {
                log.error("Openlineage spark agent could not get input paths", (Throwable)e);
            }
        }
        return inputPaths;
    }

    protected URI getDatasetUri(URI pathUri) {
        return pathUri;
    }
}

