/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hudi.utilities.sources;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;
import java.util.stream.Collectors;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.hudi.client.SparkRDDWriteClient;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.model.HoodieAvroPayload;
import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.testutils.SchemaTestUtil;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.common.util.collection.Triple;
import org.apache.hudi.config.HoodieArchivalConfig;
import org.apache.hudi.config.HoodieCleanConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.testutils.Assertions;
import org.apache.hudi.testutils.SparkClientFunctionalTestHarness;
import org.apache.hudi.utilities.UtilHelpers;
import org.apache.hudi.utilities.config.CloudSourceConfig;
import org.apache.hudi.utilities.ingestion.HoodieIngestionMetrics;
import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.S3EventsHoodieIncrSource;
import org.apache.hudi.utilities.sources.Source;
import org.apache.hudi.utilities.sources.helpers.CloudDataFetcher;
import org.apache.hudi.utilities.sources.helpers.CloudObjectsSelectorCommon;
import org.apache.hudi.utilities.sources.helpers.IncrSourceHelper;
import org.apache.hudi.utilities.sources.helpers.QueryInfo;
import org.apache.hudi.utilities.sources.helpers.QueryRunner;
import org.apache.hudi.utilities.sources.helpers.TestCloudObjectsSelectorCommon;
import org.apache.hudi.utilities.streamer.DefaultStreamContext;
import org.apache.hudi.utilities.streamer.SourceProfile;
import org.apache.hudi.utilities.streamer.SourceProfileSupplier;
import org.apache.hudi.utilities.streamer.StreamContext;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;
import org.junit.jupiter.params.provider.ValueSource;
import org.mockito.ArgumentCaptor;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
import org.mockito.verification.VerificationMode;

@ExtendWith(value={MockitoExtension.class})
public class TestS3EventsHoodieIncrSource
extends SparkClientFunctionalTestHarness {
    private static final Schema S3_METADATA_SCHEMA = SchemaTestUtil.getSchemaFromResource(TestS3EventsHoodieIncrSource.class, (String)"/streamer-config/s3-metadata.avsc", (boolean)true);
    private ObjectMapper mapper = new ObjectMapper();
    private static final String MY_BUCKET = "some-bucket";
    private static final String IGNORE_FILE_EXTENSION = ".ignore";
    private Option<SchemaProvider> schemaProvider;
    @Mock
    QueryRunner mockQueryRunner;
    @Mock
    CloudObjectsSelectorCommon mockCloudObjectsSelectorCommon;
    @Mock
    SourceProfileSupplier sourceProfileSupplier;
    @Mock
    QueryInfo queryInfo;
    @Mock
    HoodieIngestionMetrics metrics;
    private JavaSparkContext jsc;
    private HoodieTableMetaClient metaClient;

    @BeforeEach
    public void setUp() throws IOException {
        this.jsc = JavaSparkContext.fromSparkContext((SparkContext)this.spark().sparkContext());
        this.metaClient = this.getHoodieMetaClient(this.storageConf(), this.basePath());
        String schemaFilePath = TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_gcs_data.avsc").getPath();
        TypedProperties props = new TypedProperties();
        props.put((Object)"hoodie.streamer.schemaprovider.source.schema.file", (Object)schemaFilePath);
        props.put((Object)"hoodie.streamer.schema.provider.class.name", (Object)FilebasedSchemaProvider.class.getName());
        this.schemaProvider = Option.of((Object)new FilebasedSchemaProvider(props, this.jsc));
    }

    private List<String> getSampleS3ObjectKeys(List<Triple<String, Long, String>> filePathSizeAndCommitTime) {
        return filePathSizeAndCommitTime.stream().map(f -> {
            try {
                return this.generateS3EventMetadata((Long)f.getMiddle(), MY_BUCKET, (String)f.getLeft(), (String)f.getRight());
            }
            catch (JsonProcessingException e) {
                throw new RuntimeException(e);
            }
        }).collect(Collectors.toList());
    }

    private Dataset<Row> generateDataset(List<Triple<String, Long, String>> filePathSizeAndCommitTime) {
        JavaRDD testRdd = this.jsc.parallelize(this.getSampleS3ObjectKeys(filePathSizeAndCommitTime), 2);
        Dataset inputDs = this.spark().read().json(testRdd);
        return inputDs;
    }

    private String generateS3EventMetadata(Long objectSize, String bucketName, String objectKey, String commitTime) throws JsonProcessingException {
        HashMap<String, Object> objectMetadata = new HashMap<String, Object>();
        objectMetadata.put("size", objectSize);
        objectMetadata.put("key", objectKey);
        HashMap<String, String> bucketMetadata = new HashMap<String, String>();
        bucketMetadata.put("name", bucketName);
        HashMap<String, HashMap<String, Object>> s3Metadata = new HashMap<String, HashMap<String, Object>>();
        s3Metadata.put("object", objectMetadata);
        s3Metadata.put("bucket", bucketMetadata);
        HashMap<String, Object> eventMetadata = new HashMap<String, Object>();
        eventMetadata.put("s3", s3Metadata);
        eventMetadata.put("_hoodie_commit_time", commitTime);
        return this.mapper.writeValueAsString(eventMetadata);
    }

    private HoodieRecord generateS3EventMetadata(String commitTime, String bucketName, String objectKey, Long objectSize) {
        String partitionPath = bucketName;
        Schema schema = S3_METADATA_SCHEMA;
        GenericData.Record rec = new GenericData.Record(schema);
        Schema.Field s3Field = schema.getField("s3");
        Schema s3Schema = (Schema)s3Field.schema().getTypes().get(1);
        GenericData.Record s3Record = new GenericData.Record(s3Schema);
        Schema.Field s3BucketField = s3Schema.getField("bucket");
        Schema s3Bucket = (Schema)s3BucketField.schema().getTypes().get(1);
        GenericData.Record s3BucketRec = new GenericData.Record(s3Bucket);
        s3BucketRec.put("name", (Object)bucketName);
        Schema.Field s3ObjectField = s3Schema.getField("object");
        Schema s3Object = (Schema)s3ObjectField.schema().getTypes().get(1);
        GenericData.Record s3ObjectRec = new GenericData.Record(s3Object);
        s3ObjectRec.put("key", (Object)objectKey);
        s3ObjectRec.put("size", (Object)objectSize);
        s3Record.put("bucket", (Object)s3BucketRec);
        s3Record.put("object", (Object)s3ObjectRec);
        rec.put("s3", (Object)s3Record);
        rec.put("_hoodie_commit_time", (Object)commitTime);
        HoodieAvroPayload payload = new HoodieAvroPayload(Option.of((Object)rec));
        return new HoodieAvroRecord(new HoodieKey(objectKey, partitionPath), (HoodieRecordPayload)payload);
    }

    private TypedProperties setProps(IncrSourceHelper.MissingCheckpointStrategy missingCheckpointStrategy) {
        Properties properties = new Properties();
        properties.setProperty("hoodie.streamer.source.hoodieincr.path", this.basePath());
        properties.setProperty("hoodie.streamer.source.hoodieincr.missing.checkpoint.strategy", missingCheckpointStrategy.name());
        properties.setProperty("hoodie.streamer.source.hoodieincr.file.format", "json");
        return new TypedProperties(properties);
    }

    private HoodieWriteConfig.Builder getConfigBuilder(String basePath, HoodieTableMetaClient metaClient) {
        return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(S3_METADATA_SCHEMA.toString()).withParallelism(2, 2).withBulkInsertParallelism(2).withFinalizeWriteParallelism(2).withDeleteParallelism(2).withTimelineLayoutVersion(TimelineLayoutVersion.CURR_VERSION.intValue()).forTable(metaClient.getTableConfig().getTableName());
    }

    private HoodieWriteConfig getWriteConfig() {
        return this.getConfigBuilder(this.basePath(), this.metaClient).withArchivalConfig(HoodieArchivalConfig.newBuilder().archiveCommitsWith(2, 3).build()).withCleanConfig(HoodieCleanConfig.newBuilder().retainCommits(1).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().withMaxNumDeltaCommitsBeforeCompaction(1).build()).build();
    }

    private Pair<String, List<HoodieRecord>> writeS3MetadataRecords(String commitTime) throws IOException {
        HoodieWriteConfig writeConfig = this.getWriteConfig();
        try (SparkRDDWriteClient writeClient = this.getHoodieWriteClient(writeConfig);){
            writeClient.startCommitWithTime(commitTime);
            List<HoodieRecord> s3MetadataRecords = Arrays.asList(this.generateS3EventMetadata(commitTime, "bucket-1", "data-file-1.json", 1L));
            JavaRDD result = writeClient.upsert(this.jsc().parallelize(s3MetadataRecords, 1), commitTime);
            List statuses = result.collect();
            Assertions.assertNoWriteErrors((List)statuses);
            Pair pair = Pair.of((Object)commitTime, s3MetadataRecords);
            return pair;
        }
    }

    @Test
    public void testEmptyCheckpoint() throws IOException {
        String commitTimeForWrites;
        String commitTimeForReads = commitTimeForWrites = "1";
        Pair<String, List<HoodieRecord>> inserts = this.writeS3MetadataRecords(commitTimeForWrites);
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)commitTimeForReads), 0L, (String)inserts.getKey());
    }

    @Test
    public void testOneFileInCommit() throws IOException {
        String commitTimeForWrites1 = "2";
        String commitTimeForReads = "1";
        Pair<String, List<HoodieRecord>> inserts = this.writeS3MetadataRecords(commitTimeForReads);
        inserts = this.writeS3MetadataRecords(commitTimeForWrites1);
        ArrayList<Triple<String, Long, String>> filePathSizeAndCommitTime = new ArrayList<Triple<String, Long, String>>();
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file1.json", (Object)100L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file2.json", (Object)150L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file3.json", (Object)200L, (Object)"1"));
        Dataset<Row> inputDs = this.generateDataset(filePathSizeAndCommitTime);
        this.setMockQueryRunner(inputDs);
        Mockito.when((Object)this.mockCloudObjectsSelectorCommon.loadAsDataset((SparkSession)Mockito.any(), (List)Mockito.any(), (String)Mockito.any(), (Option)Mockito.eq(this.schemaProvider), Mockito.anyInt())).thenReturn((Object)Option.empty());
        Mockito.when((Object)this.sourceProfileSupplier.getSourceProfile()).thenReturn(null);
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)commitTimeForReads), 100L, "1#path/to/file1.json");
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)"1#path/to/file1.json"), 200L, "1#path/to/file2.json");
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)"1#path/to/file2.json"), 200L, "1#path/to/file3.json");
    }

    @Test
    public void testTwoFilesAndContinueInSameCommit() throws IOException {
        String commitTimeForWrites = "2";
        String commitTimeForReads = "1";
        Pair<String, List<HoodieRecord>> inserts = this.writeS3MetadataRecords(commitTimeForReads);
        inserts = this.writeS3MetadataRecords(commitTimeForWrites);
        ArrayList<Triple<String, Long, String>> filePathSizeAndCommitTime = new ArrayList<Triple<String, Long, String>>();
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file1.json", (Object)100L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file2.json", (Object)150L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file3.json", (Object)200L, (Object)"1"));
        Dataset<Row> inputDs = this.generateDataset(filePathSizeAndCommitTime);
        this.setMockQueryRunner(inputDs);
        Mockito.when((Object)this.mockCloudObjectsSelectorCommon.loadAsDataset((SparkSession)Mockito.any(), (List)Mockito.any(), (String)Mockito.any(), (Option)Mockito.eq(this.schemaProvider), Mockito.anyInt())).thenReturn((Object)Option.empty());
        Mockito.when((Object)this.sourceProfileSupplier.getSourceProfile()).thenReturn(null);
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)commitTimeForReads), 250L, "1#path/to/file2.json");
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)"1#path/to/file2.json"), 250L, "1#path/to/file3.json");
    }

    @ParameterizedTest
    @ValueSource(strings={".json", ".gz"})
    public void testTwoFilesAndContinueAcrossCommits(String extension) throws IOException {
        String commitTimeForWrites = "2";
        String commitTimeForReads = "1";
        Pair<String, List<HoodieRecord>> inserts = this.writeS3MetadataRecords(commitTimeForReads);
        inserts = this.writeS3MetadataRecords(commitTimeForWrites);
        TypedProperties typedProperties = this.setProps(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT);
        if (!extension.endsWith("json")) {
            typedProperties.setProperty(CloudSourceConfig.CLOUD_DATAFILE_EXTENSION.key(), extension);
        }
        ArrayList<Triple<String, Long, String>> filePathSizeAndCommitTime = new ArrayList<Triple<String, Long, String>>();
        filePathSizeAndCommitTime.add(Triple.of((Object)String.format("path/to/file1%s", extension), (Object)100L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)String.format("path/to/file2%s", IGNORE_FILE_EXTENSION), (Object)800L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)String.format("path/to/file3%s", extension), (Object)200L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)String.format("path/to/file2%s", extension), (Object)150L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)String.format("path/to/file4%s", extension), (Object)50L, (Object)"2"));
        filePathSizeAndCommitTime.add(Triple.of((Object)String.format("path/to/file4%s", IGNORE_FILE_EXTENSION), (Object)200L, (Object)"2"));
        filePathSizeAndCommitTime.add(Triple.of((Object)String.format("path/to/file5%s", extension), (Object)150L, (Object)"2"));
        Dataset<Row> inputDs = this.generateDataset(filePathSizeAndCommitTime);
        this.setMockQueryRunner(inputDs);
        Mockito.when((Object)this.mockCloudObjectsSelectorCommon.loadAsDataset((SparkSession)Mockito.any(), (List)Mockito.any(), (String)Mockito.any(), (Option)Mockito.eq(this.schemaProvider), Mockito.anyInt())).thenReturn((Object)Option.empty());
        Mockito.when((Object)this.sourceProfileSupplier.getSourceProfile()).thenReturn(null);
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)"1"), 100L, "1#path/to/file1" + extension, typedProperties);
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)("1#path/to/file1" + extension)), 100L, "1#path/to/file2" + extension, typedProperties);
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)("1#path/to/file2" + extension)), 1000L, "2#path/to/file5" + extension, typedProperties);
    }

    @Test
    public void testEmptyDataAfterFilter() throws IOException {
        String commitTimeForWrites = "2";
        String commitTimeForReads = "1";
        Pair<String, List<HoodieRecord>> inserts = this.writeS3MetadataRecords(commitTimeForReads);
        inserts = this.writeS3MetadataRecords(commitTimeForWrites);
        ArrayList<Triple<String, Long, String>> filePathSizeAndCommitTime = new ArrayList<Triple<String, Long, String>>();
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/skip1.json", (Object)100L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/skip3.json", (Object)200L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/skip2.json", (Object)150L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/skip5.json", (Object)50L, (Object)"2"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/skip4.json", (Object)150L, (Object)"2"));
        Dataset<Row> inputDs = this.generateDataset(filePathSizeAndCommitTime);
        this.setMockQueryRunner(inputDs);
        TypedProperties typedProperties = this.setProps(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT);
        typedProperties.setProperty("hoodie.streamer.source.s3incr.ignore.key.prefix", "path/to/skip");
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)"1"), 1000L, "2", typedProperties);
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)"1#path/to/file3.json"), 1000L, "2", typedProperties);
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)"2#path/to/skip4.json"), 1000L, "2#path/to/skip4.json", typedProperties);
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)"2#path/to/skip5.json"), 1000L, "2#path/to/skip5.json", typedProperties);
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)"2"), 1000L, "2", typedProperties);
    }

    @ParameterizedTest
    @ValueSource(booleans={true, false})
    public void testFilterAnEntireCommit(boolean useSourceProfile) throws IOException {
        String commitTimeForWrites1 = "2";
        String commitTimeForReads = "1";
        Pair<String, List<HoodieRecord>> inserts = this.writeS3MetadataRecords(commitTimeForReads);
        inserts = this.writeS3MetadataRecords(commitTimeForWrites1);
        ArrayList<Triple<String, Long, String>> filePathSizeAndCommitTime = new ArrayList<Triple<String, Long, String>>();
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/skip1.json", (Object)100L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/skip2.json", (Object)200L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/skip3.json", (Object)150L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/skip4.json", (Object)50L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/skip5.json", (Object)150L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file5.json", (Object)150L, (Object)"2"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file4.json", (Object)150L, (Object)"2"));
        Dataset<Row> inputDs = this.generateDataset(filePathSizeAndCommitTime);
        this.setMockQueryRunner(inputDs);
        TestSourceProfile sourceProfile = new TestSourceProfile(50L, 10L);
        Mockito.when((Object)this.mockCloudObjectsSelectorCommon.loadAsDataset((SparkSession)Mockito.any(), (List)Mockito.any(), (String)Mockito.any(), (Option)Mockito.eq(this.schemaProvider), Mockito.anyInt())).thenReturn((Object)Option.empty());
        if (useSourceProfile) {
            Mockito.when((Object)this.sourceProfileSupplier.getSourceProfile()).thenReturn((Object)sourceProfile);
        } else {
            Mockito.when((Object)this.sourceProfileSupplier.getSourceProfile()).thenReturn(null);
        }
        TypedProperties typedProperties = this.setProps(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT);
        typedProperties.setProperty("hoodie.streamer.source.s3incr.ignore.key.prefix", "path/to/skip");
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)"1"), 50L, "2#path/to/file4.json", typedProperties);
    }

    @ParameterizedTest
    @ValueSource(booleans={true, false})
    public void testFilterAnEntireMiddleCommit(boolean useSourceProfile) throws IOException {
        String commitTimeForWrites1 = "2";
        String commitTimeForWrites2 = "3";
        String commitTimeForReads = "1";
        Pair<String, List<HoodieRecord>> inserts = this.writeS3MetadataRecords(commitTimeForReads);
        inserts = this.writeS3MetadataRecords(commitTimeForWrites1);
        inserts = this.writeS3MetadataRecords(commitTimeForWrites2);
        ArrayList<Triple<String, Long, String>> filePathSizeAndCommitTime = new ArrayList<Triple<String, Long, String>>();
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file1.json", (Object)100L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file3.json", (Object)200L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file2.json", (Object)150L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/skip1.json", (Object)50L, (Object)"2"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/skip2.json", (Object)150L, (Object)"2"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file5.json", (Object)150L, (Object)"3"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file4.json", (Object)150L, (Object)"3"));
        Dataset<Row> inputDs = this.generateDataset(filePathSizeAndCommitTime);
        this.setMockQueryRunner(inputDs);
        Mockito.when((Object)this.mockCloudObjectsSelectorCommon.loadAsDataset((SparkSession)Mockito.any(), (List)Mockito.any(), (String)Mockito.any(), (Option)Mockito.eq(this.schemaProvider), Mockito.anyInt())).thenReturn((Object)Option.empty());
        TestSourceProfile sourceProfile = new TestSourceProfile(50L, 10L);
        if (useSourceProfile) {
            Mockito.when((Object)this.sourceProfileSupplier.getSourceProfile()).thenReturn((Object)sourceProfile);
        } else {
            Mockito.when((Object)this.sourceProfileSupplier.getSourceProfile()).thenReturn(null);
        }
        TypedProperties typedProperties = this.setProps(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT);
        typedProperties.setProperty("hoodie.streamer.source.s3incr.ignore.key.prefix", "path/to/skip");
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)"1#path/to/file3.json"), 50L, "3#path/to/file4.json", typedProperties);
        this.schemaProvider = Option.empty();
        Mockito.when((Object)this.sourceProfileSupplier.getSourceProfile()).thenReturn(null);
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)"1#path/to/file3.json"), 50L, "3#path/to/file4.json", typedProperties);
    }

    @ParameterizedTest
    @CsvSource(value={"1,1#path/to/file2.json,3#path/to/file4.json,1#path/to/file1.json,1", "2,1#path/to/file2.json,3#path/to/file4.json,1#path/to/file1.json,2", "3,3#path/to/file5.json,3#path/to/file5.json,1#path/to/file1.json,3"})
    public void testSplitSnapshotLoad(String snapshotCheckPoint, String exptected1, String exptected2, String exptected3, String exptected4) throws IOException {
        this.writeS3MetadataRecords("1");
        this.writeS3MetadataRecords("2");
        this.writeS3MetadataRecords("3");
        ArrayList<Triple<String, Long, String>> filePathSizeAndCommitTime = new ArrayList<Triple<String, Long, String>>();
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file1.json", (Object)50L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file2.json", (Object)50L, (Object)"1"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/skip1.json", (Object)50L, (Object)"2"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/skip2.json", (Object)50L, (Object)"2"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file5.json", (Object)50L, (Object)"3"));
        filePathSizeAndCommitTime.add(Triple.of((Object)"path/to/file4.json", (Object)50L, (Object)"3"));
        Dataset<Row> inputDs = this.generateDataset(filePathSizeAndCommitTime);
        this.setMockQueryRunner(inputDs, (Option<String>)Option.of((Object)snapshotCheckPoint));
        Mockito.when((Object)this.mockCloudObjectsSelectorCommon.loadAsDataset((SparkSession)Mockito.any(), (List)Mockito.any(), (String)Mockito.any(), (Option)Mockito.eq(this.schemaProvider), Mockito.anyInt())).thenReturn((Object)Option.empty());
        TypedProperties typedProperties = this.setProps(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT);
        typedProperties.setProperty("hoodie.streamer.source.s3incr.ignore.key.prefix", "path/to/skip");
        List<Long> bytesPerPartition = Arrays.asList(10L, 20L, -1L, 1000000000L);
        Mockito.when((Object)this.sourceProfileSupplier.getSourceProfile()).thenReturn((Object)new TestSourceProfile(50000L, bytesPerPartition.get(0)));
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.empty(), 50000L, exptected1, typedProperties);
        Mockito.when((Object)this.sourceProfileSupplier.getSourceProfile()).thenReturn((Object)new TestSourceProfile(10L, bytesPerPartition.get(1)));
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.of((Object)exptected1), 10L, exptected2, typedProperties);
        Mockito.when((Object)this.sourceProfileSupplier.getSourceProfile()).thenReturn((Object)new TestSourceProfile(50L, bytesPerPartition.get(2)));
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.empty(), 50L, exptected3, typedProperties);
        typedProperties.setProperty("hoodie.streamer.source.s3incr.ignore.key.prefix", "path/to");
        Mockito.when((Object)this.sourceProfileSupplier.getSourceProfile()).thenReturn((Object)new TestSourceProfile(50L, bytesPerPartition.get(3)));
        this.readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, (Option<String>)Option.empty(), 50L, exptected4, typedProperties);
        ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(Integer.class);
        ArgumentCaptor argumentCaptorForMetrics = ArgumentCaptor.forClass(Integer.class);
        ((CloudObjectsSelectorCommon)Mockito.verify((Object)this.mockCloudObjectsSelectorCommon, (VerificationMode)Mockito.atLeastOnce())).loadAsDataset((SparkSession)Mockito.any(), (List)Mockito.any(), (String)Mockito.any(), (Option)Mockito.eq(this.schemaProvider), ((Integer)argumentCaptor.capture()).intValue());
        ((HoodieIngestionMetrics)Mockito.verify((Object)this.metrics, (VerificationMode)Mockito.atLeastOnce())).updateStreamerSourceParallelism(((Integer)argumentCaptorForMetrics.capture()).intValue());
        List<Integer> numPartitions = snapshotCheckPoint.equals("1") || snapshotCheckPoint.equals("2") ? Arrays.asList(12, 3, 1) : Arrays.asList(23, 1);
        org.junit.jupiter.api.Assertions.assertEquals(numPartitions, (Object)argumentCaptor.getAllValues());
        org.junit.jupiter.api.Assertions.assertEquals(numPartitions, (Object)argumentCaptorForMetrics.getAllValues());
    }

    @Test
    public void testCreateSource() throws IOException {
        TypedProperties typedProperties = this.setProps(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT);
        Source s3Source = UtilHelpers.createSource((String)S3EventsHoodieIncrSource.class.getName(), (TypedProperties)typedProperties, (JavaSparkContext)this.jsc(), (SparkSession)this.spark(), (HoodieIngestionMetrics)this.metrics, (StreamContext)new DefaultStreamContext((SchemaProvider)this.schemaProvider.orElse(null), Option.of((Object)this.sourceProfileSupplier)));
        org.junit.jupiter.api.Assertions.assertEquals((Object)Source.SourceType.ROW, (Object)s3Source.getSourceType());
    }

    private void readAndAssert(IncrSourceHelper.MissingCheckpointStrategy missingCheckpointStrategy, Option<String> checkpointToPull, long sourceLimit, String expectedCheckpoint, TypedProperties typedProperties) {
        S3EventsHoodieIncrSource incrSource = new S3EventsHoodieIncrSource(typedProperties, this.jsc(), this.spark(), this.mockQueryRunner, new CloudDataFetcher(typedProperties, this.jsc(), this.spark(), this.metrics, this.mockCloudObjectsSelectorCommon), (StreamContext)new DefaultStreamContext((SchemaProvider)this.schemaProvider.orElse(null), Option.of((Object)this.sourceProfileSupplier)));
        Pair dataAndCheckpoint = incrSource.fetchNextBatch(checkpointToPull, sourceLimit);
        Option datasetOpt = (Option)dataAndCheckpoint.getLeft();
        String nextCheckPoint = (String)dataAndCheckpoint.getRight();
        org.junit.jupiter.api.Assertions.assertNotNull((Object)nextCheckPoint);
        org.junit.jupiter.api.Assertions.assertEquals((Object)expectedCheckpoint, (Object)nextCheckPoint);
    }

    private void setMockQueryRunner(Dataset<Row> inputDs) {
        this.setMockQueryRunner(inputDs, (Option<String>)Option.empty());
    }

    private void setMockQueryRunner(Dataset<Row> inputDs, Option<String> nextCheckPointOpt) {
        Mockito.when((Object)this.mockQueryRunner.run((QueryInfo)Mockito.any(QueryInfo.class), (Option)Mockito.any())).thenAnswer(invocation -> {
            QueryInfo queryInfo = (QueryInfo)invocation.getArgument(0);
            QueryInfo updatedQueryInfo = (QueryInfo)nextCheckPointOpt.map(nextCheckPoint -> queryInfo.withUpdatedEndInstant(nextCheckPoint)).orElse((Object)queryInfo);
            if (updatedQueryInfo.isSnapshot()) {
                return Pair.of((Object)updatedQueryInfo, (Object)inputDs.filter(String.format("%s >= '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, updatedQueryInfo.getStartInstant())).filter(String.format("%s <= '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, updatedQueryInfo.getEndInstant())));
            }
            return Pair.of((Object)updatedQueryInfo, (Object)inputDs);
        });
    }

    private void readAndAssert(IncrSourceHelper.MissingCheckpointStrategy missingCheckpointStrategy, Option<String> checkpointToPull, long sourceLimit, String expectedCheckpoint) {
        TypedProperties typedProperties = this.setProps(missingCheckpointStrategy);
        this.readAndAssert(missingCheckpointStrategy, checkpointToPull, sourceLimit, expectedCheckpoint, typedProperties);
    }

    static class TestSourceProfile
    implements SourceProfile<Long> {
        private final long maxSourceBytes;
        private final long bytesPerPartition;

        public TestSourceProfile(long maxSourceBytes, long bytesPerPartition) {
            this.maxSourceBytes = maxSourceBytes;
            this.bytesPerPartition = bytesPerPartition;
        }

        public long getMaxSourceBytes() {
            return this.maxSourceBytes;
        }

        public int getSourcePartitions() {
            throw new UnsupportedOperationException("getSourcePartitions is not required for S3 source profile");
        }

        public Long getSourceSpecificContext() {
            return this.bytesPerPartition;
        }
    }
}

