/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hudi.common.util;

import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import org.apache.avro.JsonProperties;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.AvroSchemaUtils;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.avro.HoodieAvroWriteSupport;
import org.apache.hudi.common.bloom.BloomFilter;
import org.apache.hudi.common.bloom.BloomFilterFactory;
import org.apache.hudi.common.bloom.BloomFilterTypeCode;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ParquetUtils;
import org.apache.hudi.common.util.collection.ClosableIterator;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.keygen.BaseKeyGenerator;
import org.apache.hudi.metadata.HoodieIndexVersion;
import org.apache.hudi.stats.HoodieColumnRangeMetadata;
import org.apache.hudi.storage.HoodieStorage;
import org.apache.hudi.storage.StoragePath;
import org.apache.parquet.avro.AvroSchemaConverter;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.api.WriteSupport;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.schema.MessageType;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

public class TestParquetUtils
extends HoodieCommonTestHarness {
    private ParquetUtils parquetUtils;

    public static List<Arguments> bloomFilterTypeCodes() {
        return Arrays.asList(Arguments.of((Object[])new Object[]{BloomFilterTypeCode.SIMPLE.name()}), Arguments.of((Object[])new Object[]{BloomFilterTypeCode.DYNAMIC_V0.name()}));
    }

    @BeforeEach
    public void setup() {
        this.initPath();
        this.parquetUtils = new ParquetUtils();
    }

    @ParameterizedTest
    @MethodSource(value={"bloomFilterTypeCodes"})
    public void testHoodieWriteSupport(String typeCode) throws Exception {
        ArrayList<String> rowKeys = new ArrayList<String>();
        for (int i = 0; i < 1000; ++i) {
            rowKeys.add(UUID.randomUUID().toString());
        }
        String filePath = Paths.get(this.basePath, "test.parquet").toUri().toString();
        this.writeParquetFile(typeCode, filePath, rowKeys);
        ArrayList rowKeysInFile = new ArrayList(this.parquetUtils.readRowKeys(HoodieTestUtils.getStorage((String)filePath), new StoragePath(filePath)));
        Collections.sort(rowKeysInFile);
        Collections.sort(rowKeys);
        Assertions.assertEquals(rowKeys, rowKeysInFile, (String)"Did not read back the expected list of keys");
        BloomFilter filterInFile = this.parquetUtils.readBloomFilterFromMetadata(HoodieTestUtils.getStorage((String)filePath), new StoragePath(filePath));
        for (String rowKey : rowKeys) {
            Assertions.assertTrue((boolean)filterInFile.mightContain(rowKey), (String)"key should be found in bloom filter");
        }
    }

    @ParameterizedTest
    @MethodSource(value={"bloomFilterTypeCodes"})
    public void testFilterParquetRowKeys(String typeCode) throws Exception {
        ArrayList<String> rowKeys = new ArrayList<String>();
        HashSet<String> filter = new HashSet<String>();
        for (int i = 0; i < 1000; ++i) {
            String rowKey = UUID.randomUUID().toString();
            rowKeys.add(rowKey);
            if (i % 100 != 0) continue;
            filter.add(rowKey);
        }
        String filePath = Paths.get(this.basePath, "test.parquet").toUri().toString();
        this.writeParquetFile(typeCode, filePath, rowKeys);
        Set filtered = this.parquetUtils.filterRowKeys(HoodieTestUtils.getStorage((String)filePath), new StoragePath(filePath), filter);
        Assertions.assertEquals((int)filter.size(), (int)filtered.size(), (String)"Filtered count does not match");
        for (Pair rowKeyAndPosition : filtered) {
            Assertions.assertTrue((boolean)filter.contains(rowKeyAndPosition.getLeft()), (String)"filtered key must be in the given filter");
        }
    }

    @ParameterizedTest
    @MethodSource(value={"bloomFilterTypeCodes"})
    public void testFetchRecordKeyPartitionPathFromParquet(String typeCode) throws Exception {
        ArrayList<String> rowKeys = new ArrayList<String>();
        ArrayList<HoodieKey> expected = new ArrayList<HoodieKey>();
        String partitionPath = "path1";
        for (int i = 0; i < 1000; ++i) {
            String rowKey = UUID.randomUUID().toString();
            rowKeys.add(rowKey);
            expected.add(new HoodieKey(rowKey, partitionPath));
        }
        String filePath = Paths.get(this.basePath, "test.parquet").toUri().toString();
        Schema schema = HoodieAvroUtils.getRecordKeyPartitionPathSchema();
        this.writeParquetFile(typeCode, filePath, rowKeys, schema, true, partitionPath);
        ArrayList<Object> fetchedRows = new ArrayList<Object>();
        Throwable throwable = null;
        try (ClosableIterator iter = this.parquetUtils.fetchRecordKeysWithPositions(HoodieTestUtils.getStorage((String)filePath), new StoragePath(filePath));){
            while (iter.hasNext()) {
                fetchedRows.add(iter.next());
            }
        }
        catch (Throwable throwable2) {
            Throwable throwable3 = throwable2;
            throw throwable2;
        }
        Assertions.assertEquals((int)rowKeys.size(), (int)fetchedRows.size(), (String)"Total count does not match");
        for (Pair pair : fetchedRows) {
            Assertions.assertTrue((boolean)expected.contains(pair.getLeft()), (String)"Record key must be in the given filter");
        }
    }

    @Test
    public void testFetchRecordKeyPartitionPathVirtualKeysFromParquet() throws Exception {
        ArrayList<String> rowKeys = new ArrayList<String>();
        ArrayList<HoodieKey> expected = new ArrayList<HoodieKey>();
        String partitionPath = "path1";
        for (int i = 0; i < 1000; ++i) {
            String rowKey = UUID.randomUUID().toString();
            rowKeys.add(rowKey);
            expected.add(new HoodieKey(rowKey, partitionPath));
        }
        String filePath = Paths.get(this.basePath, "test.parquet").toUri().toString();
        Schema schema = TestParquetUtils.getSchemaWithFields(Arrays.asList("abc", "def"));
        this.writeParquetFile(BloomFilterTypeCode.SIMPLE.name(), filePath, rowKeys, schema, true, partitionPath, false, "abc", "def");
        ArrayList<Object> fetchedRows = new ArrayList<Object>();
        Throwable throwable = null;
        try (ClosableIterator iter = this.parquetUtils.fetchRecordKeysWithPositions(HoodieTestUtils.getStorage((String)filePath), new StoragePath(filePath), Option.of((Object)((Object)new TestBaseKeyGen("abc", "def"))), Option.empty());){
            while (iter.hasNext()) {
                fetchedRows.add(iter.next());
            }
        }
        catch (Throwable throwable2) {
            Throwable throwable3 = throwable2;
            throw throwable2;
        }
        Assertions.assertEquals((int)rowKeys.size(), (int)fetchedRows.size(), (String)"Total count does not match");
        for (Pair pair : fetchedRows) {
            Assertions.assertTrue((boolean)expected.contains(pair.getLeft()), (String)"Record key must be in the given filter");
        }
    }

    @Test
    public void testReadCounts() throws Exception {
        String filePath = Paths.get(this.basePath, "test.parquet").toUri().toString();
        ArrayList<String> rowKeys = new ArrayList<String>();
        for (int i = 0; i < 123; ++i) {
            rowKeys.add(UUID.randomUUID().toString());
        }
        this.writeParquetFile(BloomFilterTypeCode.SIMPLE.name(), filePath, rowKeys);
        Assertions.assertEquals((long)123L, (long)this.parquetUtils.getRowCount(HoodieTestUtils.getStorage((String)filePath), new StoragePath(filePath)));
    }

    @Test
    public void testReadColumnStatsFromMetadata() throws Exception {
        ArrayList<Pair> valueList = new ArrayList<Pair>();
        String minKey = "z";
        String maxKey = "0";
        String minValue = "z";
        String maxValue = "0";
        int nullValueCount = 0;
        int totalCount = 1000;
        String partitionPath = "path1";
        for (int i = 0; i < totalCount; ++i) {
            boolean nullifyData = i % 3 == 0;
            String rowKey = UUID.randomUUID().toString();
            String value = String.valueOf(i);
            valueList.add(Pair.of((Object)Pair.of((Object)rowKey, (Object)value), (Object)nullifyData));
            minKey = minKey.compareTo(rowKey) > 0 ? rowKey : minKey;
            String string = maxKey = maxKey.compareTo(rowKey) < 0 ? rowKey : maxKey;
            if (nullifyData) {
                ++nullValueCount;
                continue;
            }
            minValue = minValue.compareTo(value) > 0 ? value : minValue;
            maxValue = maxValue.compareTo(value) < 0 ? value : maxValue;
        }
        String fileName = "test.parquet";
        String filePath = new StoragePath(this.basePath, fileName).toString();
        String recordKeyField = "id";
        String partitionPathField = "partition";
        String dataField = "data";
        Schema schema = this.getSchema(recordKeyField, partitionPathField, dataField);
        BloomFilter filter = BloomFilterFactory.createBloomFilter((int)1000, (double)1.0E-4, (int)10000, (String)BloomFilterTypeCode.SIMPLE.name());
        HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, Option.of((Object)filter), new Properties());
        try (ParquetWriter writer = new ParquetWriter(new Path(filePath), (WriteSupport)writeSupport, CompressionCodecName.GZIP, 0x7800000, 0x100000);){
            valueList.forEach(entry -> {
                GenericData.Record rec = new GenericData.Record(schema);
                rec.put(recordKeyField, ((Pair)entry.getLeft()).getLeft());
                rec.put(partitionPathField, (Object)partitionPath);
                if (((Boolean)entry.getRight()).booleanValue()) {
                    rec.put(dataField, null);
                } else {
                    rec.put(dataField, ((Pair)entry.getLeft()).getRight());
                }
                try {
                    writer.write((Object)rec);
                }
                catch (IOException e) {
                    throw new RuntimeException(e);
                }
                writeSupport.add((String)((Pair)entry.getLeft()).getLeft());
            });
        }
        ArrayList<String> columnList = new ArrayList<String>();
        columnList.add(recordKeyField);
        columnList.add(partitionPathField);
        columnList.add(dataField);
        List columnRangeMetadataList = this.parquetUtils.readColumnStatsFromMetadata(HoodieTestUtils.getStorage((String)filePath), new StoragePath(filePath), columnList, HoodieIndexVersion.V1).stream().sorted(Comparator.comparing(HoodieColumnRangeMetadata::getColumnName)).collect(Collectors.toList());
        Assertions.assertEquals((int)3, (int)columnRangeMetadataList.size(), (String)"Should return column stats of 3 columns");
        this.validateColumnRangeMetadata((HoodieColumnRangeMetadata)columnRangeMetadataList.get(0), fileName, dataField, minValue, maxValue, nullValueCount, totalCount);
        this.validateColumnRangeMetadata((HoodieColumnRangeMetadata)columnRangeMetadataList.get(1), fileName, recordKeyField, minKey, maxKey, 0L, totalCount);
        this.validateColumnRangeMetadata((HoodieColumnRangeMetadata)columnRangeMetadataList.get(2), fileName, partitionPathField, partitionPath, partitionPath, 0L, totalCount);
    }

    private Schema getSchema(String recordKeyField, String partitionPathField, String dataField) {
        ArrayList<Schema.Field> toBeAddedFields = new ArrayList<Schema.Field>();
        Schema recordSchema = Schema.createRecord((String)"HoodieRecord", (String)"", (String)"", (boolean)false);
        Schema.Field recordKeySchemaField = new Schema.Field(recordKeyField, AvroSchemaUtils.createNullableSchema((Schema.Type)Schema.Type.STRING), "", (Object)JsonProperties.NULL_VALUE);
        Schema.Field partitionPathSchemaField = new Schema.Field(partitionPathField, AvroSchemaUtils.createNullableSchema((Schema.Type)Schema.Type.STRING), "", (Object)JsonProperties.NULL_VALUE);
        Schema.Field dataSchemaField = new Schema.Field(dataField, AvroSchemaUtils.createNullableSchema((Schema.Type)Schema.Type.STRING), "", (Object)JsonProperties.NULL_VALUE);
        toBeAddedFields.add(recordKeySchemaField);
        toBeAddedFields.add(partitionPathSchemaField);
        toBeAddedFields.add(dataSchemaField);
        recordSchema.setFields(toBeAddedFields);
        return recordSchema;
    }

    private void validateColumnRangeMetadata(HoodieColumnRangeMetadata metadata, String filePath, String columnName, String minValue, String maxValue, long nullCount, long valueCount) {
        Assertions.assertEquals((Object)filePath, (Object)metadata.getFilePath(), (String)"File path does not match");
        Assertions.assertEquals((Object)columnName, (Object)metadata.getColumnName(), (String)"Column name does not match");
        Assertions.assertEquals((Object)minValue, (Object)metadata.getMinValue(), (String)"Min value does not match");
        Assertions.assertEquals((Object)maxValue, (Object)metadata.getMaxValue(), (String)"Max value does not match");
        Assertions.assertEquals((long)nullCount, (long)metadata.getNullCount(), (String)"Null count does not match");
        Assertions.assertEquals((long)valueCount, (long)metadata.getValueCount(), (String)"Value count does not match");
    }

    private void writeParquetFile(String typeCode, String filePath, List<String> rowKeys) throws Exception {
        this.writeParquetFile(typeCode, filePath, rowKeys, HoodieAvroUtils.getRecordKeySchema(), false, "");
    }

    private void writeParquetFile(String typeCode, String filePath, List<String> rowKeys, Schema schema, boolean addPartitionPathField, String partitionPath) throws Exception {
        this.writeParquetFile(typeCode, filePath, rowKeys, schema, addPartitionPathField, partitionPath, true, null, null);
    }

    private void writeParquetFile(String typeCode, String filePath, List<String> rowKeys, Schema schema, boolean addPartitionPathField, String partitionPathValue, boolean useMetaFields, String recordFieldName, String partitionFieldName) throws Exception {
        BloomFilter filter = BloomFilterFactory.createBloomFilter((int)1000, (double)1.0E-4, (int)10000, (String)typeCode);
        HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, Option.of((Object)filter), new Properties());
        ParquetWriter writer = new ParquetWriter(new Path(filePath), (WriteSupport)writeSupport, CompressionCodecName.GZIP, 0x7800000, 0x100000);
        for (String rowKey : rowKeys) {
            GenericData.Record rec = new GenericData.Record(schema);
            rec.put(useMetaFields ? HoodieRecord.RECORD_KEY_METADATA_FIELD : recordFieldName, (Object)rowKey);
            if (addPartitionPathField) {
                rec.put(useMetaFields ? HoodieRecord.PARTITION_PATH_METADATA_FIELD : partitionFieldName, (Object)partitionPathValue);
            }
            writer.write((Object)rec);
            writeSupport.add(rowKey);
        }
        writer.close();
    }

    private static Schema getSchemaWithFields(List<String> fields) {
        ArrayList<Schema.Field> toBeAddedFields = new ArrayList<Schema.Field>();
        Schema recordSchema = Schema.createRecord((String)"HoodieRecordKey", (String)"", (String)"", (boolean)false);
        for (String field : fields) {
            Schema.Field schemaField = new Schema.Field(field, HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", (Object)JsonProperties.NULL_VALUE);
            toBeAddedFields.add(schemaField);
        }
        recordSchema.setFields(toBeAddedFields);
        return recordSchema;
    }

    @Test
    public void testReadSchemaHash() throws Exception {
        List<String> rowKeys = Arrays.asList("row1", "row2", "row3");
        String filePath = Paths.get(this.basePath, "test_schema_hash.parquet").toUri().toString();
        this.writeParquetFile(BloomFilterTypeCode.SIMPLE.name(), filePath, rowKeys);
        StoragePath storagePath = new StoragePath(filePath);
        Integer schemaHash = ParquetUtils.readSchemaHash((HoodieStorage)HoodieTestUtils.getStorage((String)filePath), (StoragePath)storagePath);
        Assertions.assertTrue((schemaHash != null ? 1 : 0) != 0, (String)"Schema hash should not be null");
        Assertions.assertTrue((schemaHash != 0 ? 1 : 0) != 0, (String)"Schema hash should not be zero (default error value)");
        Integer secondRead = ParquetUtils.readSchemaHash((HoodieStorage)HoodieTestUtils.getStorage((String)filePath), (StoragePath)storagePath);
        Assertions.assertEquals((Integer)schemaHash, (Integer)secondRead, (String)"Schema hash should be consistent across reads");
    }

    @Test
    public void testReadSchemaHash_DifferentSchemas() throws Exception {
        List<String> rowKeys = Arrays.asList("row1", "row2");
        String filePath1 = Paths.get(this.basePath, "test_schema1.parquet").toUri().toString();
        this.writeParquetFile(BloomFilterTypeCode.SIMPLE.name(), filePath1, rowKeys);
        String filePath2 = Paths.get(this.basePath, "test_schema2.parquet").toUri().toString();
        this.writeParquetFileWithExtendedSchema(filePath2, rowKeys);
        Integer hash1 = ParquetUtils.readSchemaHash((HoodieStorage)HoodieTestUtils.getStorage((String)filePath1), (StoragePath)new StoragePath(filePath1));
        Integer hash2 = ParquetUtils.readSchemaHash((HoodieStorage)HoodieTestUtils.getStorage((String)filePath2), (StoragePath)new StoragePath(filePath2));
        Assertions.assertTrue((hash1 != null && hash2 != null ? 1 : 0) != 0, (String)"Both schema hashes should be valid");
        Assertions.assertTrue((!hash1.equals(hash2) ? 1 : 0) != 0, (String)"Different schemas should have different hash codes");
    }

    @Test
    public void testReadSchemaHash_NonExistentFile() throws Exception {
        StoragePath nonExistentPath = new StoragePath("/non/existent/file.parquet");
        Integer schemaHash = ParquetUtils.readSchemaHash((HoodieStorage)HoodieTestUtils.getStorage((String)this.basePath), (StoragePath)nonExistentPath);
        Assertions.assertEquals((Integer)0, (Integer)schemaHash, (String)"Non-existent file should return default error value 0");
    }

    @Test
    public void testReadSchemaHash_MatchesDirectSchemaRead() throws Exception {
        List<String> rowKeys = Arrays.asList("row1", "row2", "row3");
        String filePath = Paths.get(this.basePath, "test_direct_schema.parquet").toUri().toString();
        this.writeParquetFile(BloomFilterTypeCode.SIMPLE.name(), filePath, rowKeys);
        StoragePath storagePath = new StoragePath(filePath);
        Integer schemaHashFromUtils = ParquetUtils.readSchemaHash((HoodieStorage)HoodieTestUtils.getStorage((String)filePath), (StoragePath)storagePath);
        MessageType directSchema = this.parquetUtils.readSchema(HoodieTestUtils.getStorage((String)filePath), storagePath);
        Integer directSchemaHash = directSchema.hashCode();
        Assertions.assertEquals((Integer)directSchemaHash, (Integer)schemaHashFromUtils, (String)"Schema hash from utility should match direct schema.hashCode()");
    }

    private void writeParquetFileWithExtendedSchema(String filePath, List<String> rowKeys) throws Exception {
        Schema extendedSchema = Schema.createRecord((String)"record", (String)"", (String)"", (boolean)false);
        ArrayList<Schema.Field> fields = new ArrayList<Schema.Field>();
        fields.add(new Schema.Field("_row_key", Schema.create((Schema.Type)Schema.Type.STRING), "", null));
        fields.add(new Schema.Field("time", Schema.create((Schema.Type)Schema.Type.LONG), "", null));
        fields.add(new Schema.Field("number", Schema.create((Schema.Type)Schema.Type.LONG), "", null));
        fields.add(new Schema.Field("extra_field", AvroSchemaUtils.createNullableSchema((Schema.Type)Schema.Type.STRING), "", (Object)JsonProperties.NULL_VALUE));
        extendedSchema.setFields(fields);
        BloomFilter filter = BloomFilterFactory.createBloomFilter((int)1000, (double)1.0E-4, (int)-1, (String)BloomFilterTypeCode.SIMPLE.name());
        HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(extendedSchema), extendedSchema, Option.of((Object)filter), new Properties());
        ParquetWriter writer = new ParquetWriter(new Path(filePath), (WriteSupport)writeSupport, CompressionCodecName.GZIP, 0x7800000, 0x100000);
        for (String rowKey : rowKeys) {
            GenericData.Record record = new GenericData.Record(extendedSchema);
            record.put("_row_key", (Object)rowKey);
            record.put("time", (Object)1234567L);
            record.put("number", (Object)12345L);
            record.put("extra_field", (Object)"extra_value");
            writer.write((Object)record);
            writeSupport.add(rowKey);
        }
        writer.close();
    }

    class TestBaseKeyGen
    extends BaseKeyGenerator {
        private String recordKeyField;
        private String partitionField;

        public TestBaseKeyGen(String recordKeyField, String partitionField) {
            super(new TypedProperties());
            this.recordKeyField = recordKeyField;
            this.partitionField = partitionField;
        }

        public String getRecordKey(GenericRecord record) {
            return record.get(this.recordKeyField).toString();
        }

        public String getPartitionPath(GenericRecord record) {
            return record.get(this.partitionField).toString();
        }

        public List<String> getRecordKeyFieldNames() {
            return Arrays.asList(this.recordKeyField);
        }

        public List<String> getPartitionPathFields() {
            return Arrays.asList(this.partitionField);
        }
    }
}

