/*
 * Decompiled with CFR 0.152.
 */
package org.apache.paimon.format.parquet;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.paimon.data.InternalRow;
import org.apache.paimon.data.columnar.writable.WritableColumnVector;
import org.apache.paimon.format.FormatReaderFactory;
import org.apache.paimon.format.parquet.ParquetInputFile;
import org.apache.paimon.format.parquet.ParquetSchemaConverter;
import org.apache.paimon.format.parquet.VariantUtils;
import org.apache.paimon.format.parquet.reader.ParquetReaderUtil;
import org.apache.paimon.format.parquet.reader.VectorizedParquetRecordReader;
import org.apache.paimon.format.parquet.type.ParquetField;
import org.apache.paimon.options.Options;
import org.apache.paimon.reader.FileRecordReader;
import org.apache.paimon.shade.org.apache.parquet.ParquetReadOptions;
import org.apache.paimon.shade.org.apache.parquet.filter2.compat.FilterCompat;
import org.apache.paimon.shade.org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.paimon.shade.org.apache.parquet.io.ColumnIOFactory;
import org.apache.paimon.shade.org.apache.parquet.io.MessageColumnIO;
import org.apache.paimon.shade.org.apache.parquet.schema.ConversionPatterns;
import org.apache.paimon.shade.org.apache.parquet.schema.GroupType;
import org.apache.paimon.shade.org.apache.parquet.schema.MessageType;
import org.apache.paimon.shade.org.apache.parquet.schema.OriginalType;
import org.apache.paimon.shade.org.apache.parquet.schema.Type;
import org.apache.paimon.shade.org.apache.parquet.schema.Types;
import org.apache.paimon.types.ArrayType;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.MapType;
import org.apache.paimon.types.RowType;
import org.apache.paimon.utils.Pair;
import org.apache.paimon.utils.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ParquetReaderFactory
implements FormatReaderFactory {
    private static final Logger LOG = LoggerFactory.getLogger(ParquetReaderFactory.class);
    private static final String ALLOCATION_SIZE = "parquet.read.allocation.size";
    private final Options conf;
    private final DataField[] readFields;
    private final int batchSize;
    private final FilterCompat.Filter filter;

    public ParquetReaderFactory(Options conf, RowType readType, int batchSize, FilterCompat.Filter filter) {
        this.conf = conf;
        this.readFields = readType.getFields().toArray(new DataField[0]);
        this.batchSize = batchSize;
        this.filter = filter;
    }

    @Override
    public FileRecordReader<InternalRow> createReader(FormatReaderFactory.Context context) throws IOException {
        ParquetReadOptions.Builder builder = ParquetReadOptions.builder().withRange(0L, context.fileSize());
        this.setReadOptions(builder);
        ParquetFileReader reader = new ParquetFileReader(ParquetInputFile.fromPath(context.fileIO(), context.filePath(), context.fileSize()), builder.build(), context.selection());
        MessageType fileSchema = reader.getFileMetaData().getSchema();
        MessageType requestedSchema = this.clipParquetSchema(fileSchema);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Create reader of the parquet file {}, the fileSchema is {}, the requestedSchema is {}.", new Object[]{context.filePath(), fileSchema, requestedSchema});
        }
        reader.setRequestedSchema(requestedSchema);
        RowType[] shreddingSchemas = VariantUtils.extractShreddingSchemasFromParquetSchema(this.readFields, fileSchema);
        WritableColumnVector[] writableVectors = this.createWritableVectors();
        MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(requestedSchema);
        List<ParquetField> fields = ParquetReaderUtil.buildFieldsList(this.readFields, columnIO, shreddingSchemas);
        return new VectorizedParquetRecordReader(context.filePath(), reader, fileSchema, fields, writableVectors, this.batchSize);
    }

    private void setReadOptions(ParquetReadOptions.Builder builder) {
        builder.useSignedStringMinMax(this.conf.getBoolean("parquet.strings.signed-min-max.enabled", false));
        builder.useDictionaryFilter(this.conf.getBoolean("parquet.filter.dictionary.enabled", true));
        builder.useStatsFilter(this.conf.getBoolean("parquet.filter.stats.enabled", true));
        builder.useRecordFilter(this.conf.getBoolean("parquet.filter.record-level.enabled", true));
        builder.useColumnIndexFilter(this.conf.getBoolean("parquet.filter.columnindex.enabled", true));
        builder.usePageChecksumVerification(this.conf.getBoolean("parquet.page.verify-checksum.enabled", false));
        builder.useBloomFilter(this.conf.getBoolean("parquet.filter.bloom.enabled", true));
        builder.withMaxAllocationInBytes(this.conf.getInteger(ALLOCATION_SIZE, 0x800000));
        String badRecordThresh = this.conf.getString("parquet.read.bad.record.threshold", null);
        if (badRecordThresh != null) {
            builder.set("parquet.read.bad.record.threshold", badRecordThresh);
        }
        builder.withRecordFilter(this.filter);
    }

    private MessageType clipParquetSchema(GroupType parquetSchema) {
        Type[] types = new Type[this.readFields.length];
        for (int i = 0; i < this.readFields.length; ++i) {
            String fieldName = this.readFields[i].name();
            if (!parquetSchema.containsField(fieldName)) {
                LOG.warn("{} does not exist in {}, will fill the field with null.", (Object)fieldName, (Object)parquetSchema);
                types[i] = ParquetSchemaConverter.convertToParquetType(this.readFields[i]);
                continue;
            }
            Type parquetType = parquetSchema.getType(fieldName);
            types[i] = this.clipParquetType(this.readFields[i].type(), parquetType);
        }
        return (MessageType)((Types.GroupBuilder)Types.buildMessage().addFields(types)).named("paimon_schema");
    }

    private Type clipParquetType(DataType readType, Type parquetType) {
        switch (readType.getTypeRoot()) {
            case ROW: {
                RowType rowType = (RowType)readType;
                GroupType rowGroup = (GroupType)parquetType;
                ArrayList<Type> rowGroupFields = new ArrayList<Type>();
                for (DataField field : rowType.getFields()) {
                    String fieldName = field.name();
                    if (rowGroup.containsField(fieldName)) {
                        Type type = rowGroup.getType(fieldName);
                        rowGroupFields.add(this.clipParquetType(field.type(), type));
                        continue;
                    }
                    throw new RuntimeException("field " + fieldName + " is missing");
                }
                return rowGroup.withNewFields(rowGroupFields);
            }
            case MAP: {
                MapType mapType = (MapType)readType;
                GroupType mapGroup = (GroupType)parquetType;
                int mapSubFields = mapGroup.getFieldCount();
                Preconditions.checkArgument(mapSubFields == 1, "Parquet map group type should only have one middle level REPEATED field.");
                Pair<Type, Type> keyValueType = ParquetSchemaConverter.parquetMapKeyValueType(mapGroup);
                return ConversionPatterns.mapType(mapGroup.getRepetition(), mapGroup.getName(), mapGroup.getType(0).getName(), this.clipParquetType(mapType.getKeyType(), keyValueType.getLeft()), this.clipParquetType(mapType.getValueType(), keyValueType.getRight()));
            }
            case ARRAY: {
                ArrayType arrayType = (ArrayType)readType;
                GroupType arrayGroup = (GroupType)parquetType;
                int listSubFields = arrayGroup.getFieldCount();
                Preconditions.checkArgument(listSubFields == 1, "Parquet list group type should only have one middle level REPEATED field.");
                int level = arrayGroup.getType(0) instanceof GroupType ? 3 : 2;
                Type elementType = this.clipParquetType(arrayType.getElementType(), ParquetSchemaConverter.parquetListElementType(arrayGroup, level));
                if (level == 3) {
                    GroupType groupMiddle = new GroupType(Type.Repetition.REPEATED, arrayGroup.getType(0).getName(), elementType);
                    return new GroupType(arrayGroup.getRepetition(), arrayGroup.getName(), OriginalType.LIST, groupMiddle);
                }
                return new GroupType(arrayGroup.getRepetition(), arrayGroup.getName(), OriginalType.LIST, elementType);
            }
        }
        return parquetType;
    }

    private WritableColumnVector[] createWritableVectors() {
        WritableColumnVector[] columns = new WritableColumnVector[this.readFields.length];
        for (int i = 0; i < this.readFields.length; ++i) {
            columns[i] = ParquetReaderUtil.createWritableColumnVector(this.batchSize, this.readFields[i].type());
        }
        return columns;
    }
}

