/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hudi.utilities.sources.helpers;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import org.apache.avro.Schema;
import org.apache.hudi.AvroConversionUtils;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.testutils.HoodieSparkClientTestHarness;
import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
import org.apache.hudi.utilities.sources.helpers.CloudObjectMetadata;
import org.apache.hudi.utilities.sources.helpers.CloudObjectsSelectorCommon;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.types.StructType;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

public class TestCloudObjectsSelectorCommon
extends HoodieSparkClientTestHarness {
    @BeforeEach
    void setUp() {
        this.initSparkContexts();
    }

    @AfterEach
    public void teardown() throws Exception {
        this.cleanupResources();
    }

    @Test
    public void emptyMetadataReturnsEmptyOption() {
        CloudObjectsSelectorCommon cloudObjectsSelectorCommon = new CloudObjectsSelectorCommon(new TypedProperties());
        Option result = cloudObjectsSelectorCommon.loadAsDataset(this.sparkSession, Collections.emptyList(), "json", Option.empty(), 1);
        Assertions.assertFalse((boolean)result.isPresent());
    }

    @Test
    public void filesFromMetadataRead() {
        CloudObjectsSelectorCommon cloudObjectsSelectorCommon = new CloudObjectsSelectorCommon(new TypedProperties());
        List<CloudObjectMetadata> input = Collections.singletonList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1L));
        Option result = cloudObjectsSelectorCommon.loadAsDataset(this.sparkSession, input, "json", Option.empty(), 1);
        Assertions.assertTrue((boolean)result.isPresent());
        Assertions.assertEquals((long)1L, (long)((Dataset)result.get()).count());
        Row expected = RowFactory.create((Object[])new Object[]{"some data"});
        Assertions.assertEquals(Collections.singletonList(expected), (Object)((Dataset)result.get()).collectAsList());
    }

    @Test
    public void partitionValueAddedToRow() {
        List<CloudObjectMetadata> input = Collections.singletonList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1L));
        TypedProperties properties = new TypedProperties();
        properties.put((Object)"hoodie.streamer.source.cloud.data.partition.fields.from.path", (Object)"country,state");
        CloudObjectsSelectorCommon cloudObjectsSelectorCommon = new CloudObjectsSelectorCommon(properties);
        Option result = cloudObjectsSelectorCommon.loadAsDataset(this.sparkSession, input, "json", Option.empty(), 1);
        Assertions.assertTrue((boolean)result.isPresent());
        Assertions.assertEquals((long)1L, (long)((Dataset)result.get()).count());
        Row expected = RowFactory.create((Object[])new Object[]{"some data", "US", "CA"});
        Assertions.assertEquals(Collections.singletonList(expected), (Object)((Dataset)result.get()).collectAsList());
    }

    @Test
    public void loadDatasetWithSchema() {
        TypedProperties props = new TypedProperties();
        TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc");
        String schemaFilePath = TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc").getPath();
        props.put((Object)"hoodie.streamer.schemaprovider.source.schema.file", (Object)schemaFilePath);
        props.put((Object)"hoodie.streamer.schema.provider.class.name", (Object)FilebasedSchemaProvider.class.getName());
        props.put((Object)"hoodie.streamer.source.cloud.data.partition.fields.from.path", (Object)"country,state");
        CloudObjectsSelectorCommon cloudObjectsSelectorCommon = new CloudObjectsSelectorCommon(props);
        List<CloudObjectMetadata> input = Collections.singletonList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1L));
        Option result = cloudObjectsSelectorCommon.loadAsDataset(this.sparkSession, input, "json", Option.of((Object)new FilebasedSchemaProvider(props, this.jsc)), 1);
        Assertions.assertTrue((boolean)result.isPresent());
        Assertions.assertEquals((long)1L, (long)((Dataset)result.get()).count());
        Row expected = RowFactory.create((Object[])new Object[]{"some data", "US", "CA"});
        Assertions.assertEquals(Collections.singletonList(expected), (Object)((Dataset)result.get()).collectAsList());
    }

    @Test
    void loadDatasetWithSchemaAndAliasFields() {
        TypedProperties props = new TypedProperties();
        TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc");
        String schemaFilePath = TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc").getPath();
        props.put((Object)"hoodie.deltastreamer.schemaprovider.source.schema.file", (Object)schemaFilePath);
        props.put((Object)"hoodie.deltastreamer.schema.provider.class.name", (Object)FilebasedSchemaProvider.class.getName());
        props.put((Object)"hoodie.deltastreamer.source.cloud.data.partition.fields.from.path", (Object)"country,state");
        props.put((Object)"hoodie.streamer.source.cloud.data.reader.coalesce.aliases", (Object)"true");
        CloudObjectsSelectorCommon cloudObjectsSelectorCommon = new CloudObjectsSelectorCommon(props);
        List<CloudObjectMetadata> input = Collections.singletonList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=TX/old_data.json", 1L));
        Option result = cloudObjectsSelectorCommon.loadAsDataset(this.sparkSession, input, "json", Option.of((Object)new FilebasedSchemaProvider(props, this.jsc)), 1);
        Assertions.assertTrue((boolean)result.isPresent());
        Assertions.assertEquals((long)1L, (long)((Dataset)result.get()).count());
        Row expected = RowFactory.create((Object[])new Object[]{"some data", "US", "TX"});
        Assertions.assertEquals(Collections.singletonList(expected), (Object)((Dataset)result.get()).collectAsList());
    }

    @Test
    public void loadDatasetWithSchemaAndRepartition() {
        TypedProperties props = new TypedProperties();
        TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc");
        String schemaFilePath = TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc").getPath();
        props.put((Object)"hoodie.streamer.schemaprovider.source.schema.file", (Object)schemaFilePath);
        props.put((Object)"hoodie.streamer.schema.provider.class.name", (Object)FilebasedSchemaProvider.class.getName());
        props.put((Object)"hoodie.streamer.source.cloud.data.partition.fields.from.path", (Object)"country,state");
        props.put((Object)"hoodie.streamer.source.cloud.data.partition.max.size", (Object)"1");
        List<CloudObjectMetadata> input = Arrays.asList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1000L), new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=TX/data.json", 1000L), new CloudObjectMetadata("src/test/resources/data/partitioned/country=IND/state=TS/data.json", 1000L));
        CloudObjectsSelectorCommon cloudObjectsSelectorCommon = new CloudObjectsSelectorCommon(props);
        Option result = cloudObjectsSelectorCommon.loadAsDataset(this.sparkSession, input, "json", Option.of((Object)new FilebasedSchemaProvider(props, this.jsc)), 30);
        Assertions.assertTrue((boolean)result.isPresent());
        List<Row> expected = Arrays.asList(RowFactory.create((Object[])new Object[]{"some data", "US", "CA"}), RowFactory.create((Object[])new Object[]{"some data", "US", "TX"}), RowFactory.create((Object[])new Object[]{"some data", "IND", "TS"}));
        List actual = ((Dataset)result.get()).collectAsList();
        Assertions.assertEquals(new HashSet<Row>(expected), new HashSet(actual));
    }

    @Test
    void loadDatasetWithSchemaAndCoalesceAliases() {
        TypedProperties props = new TypedProperties();
        TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc");
        String schemaFilePath = TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc").getPath();
        props.put((Object)"hoodie.deltastreamer.schemaprovider.source.schema.file", (Object)schemaFilePath);
        props.put((Object)"hoodie.deltastreamer.schema.provider.class.name", (Object)FilebasedSchemaProvider.class.getName());
        props.put((Object)"hoodie.deltastreamer.source.cloud.data.partition.fields.from.path", (Object)"country,state");
        props.put((Object)"hoodie.streamer.source.cloud.data.partition.max.size", (Object)"1");
        props.put((Object)"hoodie.streamer.source.cloud.data.reader.coalesce.aliases", (Object)"true");
        List<CloudObjectMetadata> input = Arrays.asList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1000L), new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=TX/old_data.json", 1000L), new CloudObjectMetadata("src/test/resources/data/partitioned/country=IND/state=TS/data.json", 1000L));
        CloudObjectsSelectorCommon cloudObjectsSelectorCommon = new CloudObjectsSelectorCommon(props);
        Option result = cloudObjectsSelectorCommon.loadAsDataset(this.sparkSession, input, "json", Option.of((Object)new FilebasedSchemaProvider(props, this.jsc)), 30);
        Assertions.assertTrue((boolean)result.isPresent());
        List<Row> expected = Arrays.asList(RowFactory.create((Object[])new Object[]{"some data", "US", "CA"}), RowFactory.create((Object[])new Object[]{"some data", "US", "TX"}), RowFactory.create((Object[])new Object[]{"some data", "IND", "TS"}));
        List actual = ((Dataset)result.get()).collectAsList();
        Assertions.assertEquals(new HashSet<Row>(expected), new HashSet(actual));
    }

    @Test
    void loadDatasetWithNestedSchemaAndCoalesceAliases() throws IOException {
        TypedProperties props = new TypedProperties();
        TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/nested_data_schema.avsc");
        String schemaFilePath = TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/nested_data_schema.avsc").getPath();
        props.put((Object)"hoodie.deltastreamer.schemaprovider.source.schema.file", (Object)schemaFilePath);
        props.put((Object)"hoodie.deltastreamer.schema.provider.class.name", (Object)FilebasedSchemaProvider.class.getName());
        props.put((Object)"hoodie.streamer.source.cloud.data.partition.max.size", (Object)"1");
        props.put((Object)"hoodie.streamer.source.cloud.data.reader.coalesce.aliases", (Object)"true");
        List<CloudObjectMetadata> input = Arrays.asList(new CloudObjectMetadata("src/test/resources/data/nested_data_1.json", 1000L), new CloudObjectMetadata("src/test/resources/data/nested_data_2.json", 1000L), new CloudObjectMetadata("src/test/resources/data/nested_data_3.json", 1000L));
        CloudObjectsSelectorCommon cloudObjectsSelectorCommon = new CloudObjectsSelectorCommon(props);
        Option result = cloudObjectsSelectorCommon.loadAsDataset(this.sparkSession, input, "json", Option.of((Object)new FilebasedSchemaProvider(props, this.jsc)), 30);
        Assertions.assertTrue((boolean)result.isPresent());
        Row address1 = RowFactory.create((Object[])new Object[]{"123 Main St", "Springfield", "12345", RowFactory.create((Object[])new Object[]{"India", "IN"})});
        Row person1 = RowFactory.create((Object[])new Object[]{"John", "Doe", RowFactory.create((Object[])new Object[]{1990, 5, 15}), address1});
        Row address2 = RowFactory.create((Object[])new Object[]{"456 Elm St", "Shelbyville", "67890", RowFactory.create((Object[])new Object[]{"Spain", "SPN"})});
        Row person2 = RowFactory.create((Object[])new Object[]{"Jane", "Smith", RowFactory.create((Object[])new Object[]{1992, 9, 2}), address2});
        Row address3 = RowFactory.create((Object[])new Object[]{"789 Maple Ave", "Paris", "98765", RowFactory.create((Object[])new Object[]{"France", "FRA"})});
        Row person3 = RowFactory.create((Object[])new Object[]{"John", "James", RowFactory.create((Object[])new Object[]{1985, 6, 15}), address3});
        List<Row> expected = Arrays.asList(person1, person2, person3);
        List actual = ((Dataset)result.get()).collectAsList();
        Assertions.assertEquals(new HashSet<Row>(expected), new HashSet(actual));
        Schema schema = new Schema.Parser().parse(new File(schemaFilePath));
        StructType expectedSchema = AvroConversionUtils.convertAvroSchemaToStructType((Schema)schema);
        Assertions.assertEquals((Object)expectedSchema, (Object)((Dataset)result.get()).schema(), (String)"output dataset schema should match source schema");
    }

    @Test
    public void partitionKeyNotPresentInPath() {
        List<CloudObjectMetadata> input = Collections.singletonList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1L));
        TypedProperties properties = new TypedProperties();
        properties.put((Object)"hoodie.deltastreamer.source.cloud.data.reader.comma.separated.path.format", (Object)"false");
        properties.put((Object)"hoodie.deltastreamer.source.cloud.data.partition.fields.from.path", (Object)"unknown");
        CloudObjectsSelectorCommon cloudObjectsSelectorCommon = new CloudObjectsSelectorCommon(properties);
        Option result = cloudObjectsSelectorCommon.loadAsDataset(this.sparkSession, input, "json", Option.empty(), 1);
        Assertions.assertTrue((boolean)result.isPresent());
        Assertions.assertEquals((long)1L, (long)((Dataset)result.get()).count());
        Row expected = RowFactory.create((Object[])new Object[]{"some data", null});
        Assertions.assertEquals(Collections.singletonList(expected), (Object)((Dataset)result.get()).collectAsList());
    }
}

