/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hudi.utilities.sources.helpers;

import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.testutils.HoodieSparkClientTestHarness;
import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
import org.apache.hudi.utilities.sources.helpers.CloudObjectMetadata;
import org.apache.hudi.utilities.sources.helpers.CloudObjectsSelectorCommon;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

public class TestCloudObjectsSelectorCommon
extends HoodieSparkClientTestHarness {
    @BeforeEach
    void setUp() {
        this.initSparkContexts();
    }

    @AfterEach
    public void teardown() throws Exception {
        this.cleanupResources();
    }

    @Test
    public void emptyMetadataReturnsEmptyOption() {
        CloudObjectsSelectorCommon cloudObjectsSelectorCommon = new CloudObjectsSelectorCommon(new TypedProperties());
        Option result = cloudObjectsSelectorCommon.loadAsDataset(this.sparkSession, Collections.emptyList(), "json", Option.empty(), 1);
        Assertions.assertFalse((boolean)result.isPresent());
    }

    @Test
    public void filesFromMetadataRead() {
        CloudObjectsSelectorCommon cloudObjectsSelectorCommon = new CloudObjectsSelectorCommon(new TypedProperties());
        List<CloudObjectMetadata> input = Collections.singletonList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1L));
        Option result = cloudObjectsSelectorCommon.loadAsDataset(this.sparkSession, input, "json", Option.empty(), 1);
        Assertions.assertTrue((boolean)result.isPresent());
        Assertions.assertEquals((long)1L, (long)((Dataset)result.get()).count());
        Row expected = RowFactory.create((Object[])new Object[]{"some data"});
        Assertions.assertEquals(Collections.singletonList(expected), (Object)((Dataset)result.get()).collectAsList());
    }

    @Test
    public void partitionValueAddedToRow() {
        List<CloudObjectMetadata> input = Collections.singletonList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1L));
        TypedProperties properties = new TypedProperties();
        properties.put((Object)"hoodie.streamer.source.cloud.data.partition.fields.from.path", (Object)"country,state");
        CloudObjectsSelectorCommon cloudObjectsSelectorCommon = new CloudObjectsSelectorCommon(properties);
        Option result = cloudObjectsSelectorCommon.loadAsDataset(this.sparkSession, input, "json", Option.empty(), 1);
        Assertions.assertTrue((boolean)result.isPresent());
        Assertions.assertEquals((long)1L, (long)((Dataset)result.get()).count());
        Row expected = RowFactory.create((Object[])new Object[]{"some data", "US", "CA"});
        Assertions.assertEquals(Collections.singletonList(expected), (Object)((Dataset)result.get()).collectAsList());
    }

    @Test
    public void loadDatasetWithSchema() {
        TypedProperties props = new TypedProperties();
        TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc");
        String schemaFilePath = TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc").getPath();
        props.put((Object)"hoodie.streamer.schemaprovider.source.schema.file", (Object)schemaFilePath);
        props.put((Object)"hoodie.streamer.schema.provider.class.name", (Object)FilebasedSchemaProvider.class.getName());
        props.put((Object)"hoodie.streamer.source.cloud.data.partition.fields.from.path", (Object)"country,state");
        CloudObjectsSelectorCommon cloudObjectsSelectorCommon = new CloudObjectsSelectorCommon(props);
        List<CloudObjectMetadata> input = Collections.singletonList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1L));
        Option result = cloudObjectsSelectorCommon.loadAsDataset(this.sparkSession, input, "json", Option.of((Object)new FilebasedSchemaProvider(props, this.jsc)), 1);
        Assertions.assertTrue((boolean)result.isPresent());
        Assertions.assertEquals((long)1L, (long)((Dataset)result.get()).count());
        Row expected = RowFactory.create((Object[])new Object[]{"some data", "US", "CA"});
        Assertions.assertEquals(Collections.singletonList(expected), (Object)((Dataset)result.get()).collectAsList());
    }

    @Test
    public void loadDatasetWithSchemaAndRepartition() {
        TypedProperties props = new TypedProperties();
        TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc");
        String schemaFilePath = TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc").getPath();
        props.put((Object)"hoodie.streamer.schemaprovider.source.schema.file", (Object)schemaFilePath);
        props.put((Object)"hoodie.streamer.schema.provider.class.name", (Object)FilebasedSchemaProvider.class.getName());
        props.put((Object)"hoodie.streamer.source.cloud.data.partition.fields.from.path", (Object)"country,state");
        props.put((Object)"hoodie.streamer.source.cloud.data.partition.max.size", (Object)"1");
        List<CloudObjectMetadata> input = Arrays.asList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1000L), new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=TX/data.json", 1000L), new CloudObjectMetadata("src/test/resources/data/partitioned/country=IND/state=TS/data.json", 1000L));
        CloudObjectsSelectorCommon cloudObjectsSelectorCommon = new CloudObjectsSelectorCommon(props);
        Option result = cloudObjectsSelectorCommon.loadAsDataset(this.sparkSession, input, "json", Option.of((Object)new FilebasedSchemaProvider(props, this.jsc)), 30);
        Assertions.assertTrue((boolean)result.isPresent());
        List<Row> expected = Arrays.asList(RowFactory.create((Object[])new Object[]{"some data", "US", "CA"}), RowFactory.create((Object[])new Object[]{"some data", "US", "TX"}), RowFactory.create((Object[])new Object[]{"some data", "IND", "TS"}));
        List actual = ((Dataset)result.get()).collectAsList();
        Assertions.assertEquals(new HashSet<Row>(expected), new HashSet(actual));
    }

    @Test
    public void partitionKeyNotPresentInPath() {
        List<CloudObjectMetadata> input = Collections.singletonList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1L));
        TypedProperties properties = new TypedProperties();
        properties.put((Object)"hoodie.deltastreamer.source.cloud.data.reader.comma.separated.path.format", (Object)"false");
        properties.put((Object)"hoodie.deltastreamer.source.cloud.data.partition.fields.from.path", (Object)"unknown");
        CloudObjectsSelectorCommon cloudObjectsSelectorCommon = new CloudObjectsSelectorCommon(properties);
        Option result = cloudObjectsSelectorCommon.loadAsDataset(this.sparkSession, input, "json", Option.empty(), 1);
        Assertions.assertTrue((boolean)result.isPresent());
        Assertions.assertEquals((long)1L, (long)((Dataset)result.get()).count());
        Row expected = RowFactory.create((Object[])new Object[]{"some data", null});
        Assertions.assertEquals(Collections.singletonList(expected), (Object)((Dataset)result.get()).collectAsList());
    }
}

