/*
 * Decompiled with CFR 0.152.
 */
package org.apache.paimon.format.orc;

import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.Properties;
import java.util.function.BiConsumer;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import javax.annotation.concurrent.ThreadSafe;
import org.apache.hadoop.conf.Configuration;
import org.apache.paimon.annotation.VisibleForTesting;
import org.apache.paimon.format.FileFormat;
import org.apache.paimon.format.FileFormatFactory;
import org.apache.paimon.format.FormatReaderFactory;
import org.apache.paimon.format.FormatWriterFactory;
import org.apache.paimon.format.SimpleStatsExtractor;
import org.apache.paimon.format.orc.OrcReaderFactory;
import org.apache.paimon.format.orc.OrcWriterFactory;
import org.apache.paimon.format.orc.filter.OrcFilters;
import org.apache.paimon.format.orc.filter.OrcPredicateFunctionVisitor;
import org.apache.paimon.format.orc.filter.OrcSimpleStatsExtractor;
import org.apache.paimon.format.orc.reader.OrcSplitReaderUtil;
import org.apache.paimon.format.orc.writer.RowDataVectorizer;
import org.apache.paimon.options.MemorySize;
import org.apache.paimon.options.Options;
import org.apache.paimon.predicate.Predicate;
import org.apache.paimon.shade.org.apache.orc.OrcConf;
import org.apache.paimon.shade.org.apache.orc.TypeDescription;
import org.apache.paimon.statistics.SimpleColStatsCollector;
import org.apache.paimon.types.ArrayType;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.DataTypeChecks;
import org.apache.paimon.types.DataTypes;
import org.apache.paimon.types.IntType;
import org.apache.paimon.types.MapType;
import org.apache.paimon.types.MultisetType;
import org.apache.paimon.types.RowType;

@ThreadSafe
public class OrcFileFormat
extends FileFormat {
    public static final String IDENTIFIER = "orc";
    private final Properties orcProperties;
    private final Configuration readerConf;
    private final Configuration writerConf;
    private final int readBatchSize;

    public OrcFileFormat(FileFormatFactory.FormatContext formatContext) {
        super(IDENTIFIER);
        this.orcProperties = OrcFileFormat.getOrcProperties(formatContext.formatOptions(), formatContext);
        this.readerConf = new Configuration();
        this.orcProperties.forEach((BiConsumer<? super Object, ? super Object>)((BiConsumer<Object, Object>)(k, v) -> this.readerConf.set(k.toString(), v.toString())));
        this.writerConf = new Configuration();
        this.orcProperties.forEach((BiConsumer<? super Object, ? super Object>)((BiConsumer<Object, Object>)(k, v) -> this.writerConf.set(k.toString(), v.toString())));
        this.readBatchSize = formatContext.readBatchSize();
    }

    @VisibleForTesting
    public Properties orcProperties() {
        return this.orcProperties;
    }

    @VisibleForTesting
    public int readBatchSize() {
        return this.readBatchSize;
    }

    @Override
    public Optional<SimpleStatsExtractor> createStatsExtractor(RowType type, SimpleColStatsCollector.Factory[] statsCollectors) {
        return Optional.of(new OrcSimpleStatsExtractor(type, statsCollectors));
    }

    @Override
    public FormatReaderFactory createReaderFactory(RowType projectedRowType, @Nullable List<Predicate> filters) {
        ArrayList<OrcFilters.Predicate> orcPredicates = new ArrayList<OrcFilters.Predicate>();
        if (filters != null) {
            for (Predicate pred : filters) {
                Optional<OrcFilters.Predicate> orcPred = pred.visit(OrcPredicateFunctionVisitor.VISITOR);
                orcPred.ifPresent(orcPredicates::add);
            }
        }
        return new OrcReaderFactory(this.readerConf, (RowType)OrcFileFormat.refineDataType(projectedRowType), orcPredicates, this.readBatchSize);
    }

    @Override
    public void validateDataFields(RowType rowType) {
        DataType refinedType = OrcFileFormat.refineDataType(rowType);
        OrcSplitReaderUtil.toOrcType(refinedType);
    }

    @Override
    public FormatWriterFactory createWriterFactory(RowType type) {
        DataType refinedType = OrcFileFormat.refineDataType(type);
        DataType[] orcTypes = DataTypeChecks.getFieldTypes(refinedType).toArray(new DataType[0]);
        TypeDescription typeDescription = OrcSplitReaderUtil.toOrcType(refinedType);
        RowDataVectorizer vectorizer = new RowDataVectorizer(typeDescription.toString(), orcTypes);
        return new OrcWriterFactory(vectorizer, this.orcProperties, this.writerConf);
    }

    private static Properties getOrcProperties(Options options, FileFormatFactory.FormatContext formatContext) {
        MemorySize blockSize;
        Properties orcProperties = new Properties();
        Properties properties = new Properties();
        options.addAllToProperties(properties);
        properties.forEach((BiConsumer<? super Object, ? super Object>)((BiConsumer<Object, Object>)(k, v) -> orcProperties.put("orc." + k, v)));
        if (!orcProperties.containsKey(OrcConf.COMPRESSION_ZSTD_LEVEL.getAttribute())) {
            orcProperties.setProperty(OrcConf.COMPRESSION_ZSTD_LEVEL.getAttribute(), String.valueOf(formatContext.zstdLevel()));
        }
        if ((blockSize = formatContext.blockSize()) != null) {
            orcProperties.setProperty(OrcConf.STRIPE_SIZE.getAttribute(), String.valueOf(blockSize.getBytes()));
        }
        return orcProperties;
    }

    public static DataType refineDataType(DataType type) {
        switch (type.getTypeRoot()) {
            case BINARY: 
            case VARBINARY: {
                return DataTypes.BYTES();
            }
            case ARRAY: {
                ArrayType arrayType = (ArrayType)type;
                return new ArrayType(arrayType.isNullable(), OrcFileFormat.refineDataType(arrayType.getElementType()));
            }
            case MAP: {
                MapType mapType = (MapType)type;
                return new MapType(OrcFileFormat.refineDataType(mapType.getKeyType()), OrcFileFormat.refineDataType(mapType.getValueType()));
            }
            case MULTISET: {
                MultisetType multisetType = (MultisetType)type;
                return new MapType(OrcFileFormat.refineDataType(multisetType.getElementType()), OrcFileFormat.refineDataType(new IntType(false)));
            }
            case ROW: {
                RowType rowType = (RowType)type;
                return new RowType(rowType.isNullable(), rowType.getFields().stream().map(f -> new DataField(f.id(), f.name(), OrcFileFormat.refineDataType(f.type()), f.description())).collect(Collectors.toList()));
            }
        }
        return type;
    }
}

