/*
 * Decompiled with CFR 0.152.
 */
package water.parser.parquet;

import java.io.IOException;
import java.util.Collections;
import org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.parquet.hadoop.VecParquetReader;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.Type;
import water.Job;
import water.Key;
import water.fvec.ByteVec;
import water.fvec.Chunk;
import water.fvec.Vec;
import water.parser.FVecParseReader;
import water.parser.ParseReader;
import water.parser.ParseSetup;
import water.parser.ParseWriter;
import water.parser.Parser;
import water.parser.PreviewParseWriter;
import water.parser.parquet.ParquetParserProvider;
import water.util.Log;

public class ParquetParser
extends Parser {
    private static final int MAX_PREVIEW_RECORDS = 1000;
    private final byte[] _metadata;

    ParquetParser(ParseSetup setup, Key<Job> jobKey) {
        super(setup, jobKey);
        this._metadata = ((ParquetParseSetup)setup).parquetMetadata;
    }

    protected final ParseWriter parseChunk(int cidx, ParseReader din, ParseWriter dout) {
        if (!(din instanceof FVecParseReader)) {
            throw new IllegalStateException("We only accept parser readers backed by a Vec (no streaming support!).");
        }
        Chunk chunk = ((FVecParseReader)din).getChunk();
        Vec vec = chunk.vec();
        ParquetMetadataConverter.MetadataFilter chunkFilter = ParquetMetadataConverter.range((long)chunk.start(), (long)(chunk.start() + (long)chunk.len()));
        ParquetMetadata metadata = VecParquetReader.readFooter(this._metadata, chunkFilter);
        if (metadata.getBlocks().isEmpty()) {
            Log.trace((Object[])new Object[]{"Chunk #", cidx, " doesn't contain any Parquet block center."});
            return dout;
        }
        Log.info((Object[])new Object[]{"Processing ", metadata.getBlocks().size(), " blocks of chunk #", cidx});
        VecParquetReader reader = new VecParquetReader(vec, metadata, dout, this._setup.getColumnTypes());
        try {
            Integer recordNumber;
            while ((recordNumber = reader.read()) != null) {
            }
        }
        catch (IOException e) {
            throw new RuntimeException("Failed to parse records", e);
        }
        return dout;
    }

    public static ParseSetup guessSetup(ByteVec vec, byte[] bits) {
        if (bits.length < ParquetFileWriter.MAGIC.length) {
            return null;
        }
        for (int i = 0; i < ParquetFileWriter.MAGIC.length; ++i) {
            if (bits[i] == ParquetFileWriter.MAGIC[i]) continue;
            return null;
        }
        byte[] metadataBytes = VecParquetReader.readFooterAsBytes((Vec)vec);
        ParquetMetadata metadata = VecParquetReader.readFooter(metadataBytes, ParquetMetadataConverter.NO_FILTER);
        ParquetParser.checkCompatibility(metadata);
        ParquetPreviewParseWriter ppWriter = ParquetParser.readFirstRecords(metadata, vec, 1000);
        return ppWriter.toParseSetup(metadataBytes);
    }

    public static byte[] correctTypeConversions(ByteVec vec, byte[] requestedTypes) {
        byte[] metadataBytes = VecParquetReader.readFooterAsBytes((Vec)vec);
        ParquetMetadata metadata = VecParquetReader.readFooter(metadataBytes, ParquetMetadataConverter.NO_FILTER);
        byte[] roughTypes = ParquetParser.roughGuessTypes(metadata.getFileMetaData().getSchema());
        return ParquetParser.correctTypeConversions(roughTypes, requestedTypes);
    }

    private static byte[] correctTypeConversions(byte[] roughTypes, byte[] requestedTypes) {
        if (requestedTypes.length != roughTypes.length) {
            throw new IllegalArgumentException("Invalid column type specification: number of columns and number of types differ!");
        }
        byte[] resultTypes = new byte[requestedTypes.length];
        for (int i = 0; i < requestedTypes.length; ++i) {
            resultTypes[i] = roughTypes[i] == 3 || roughTypes[i] == 5 ? roughTypes[i] : (roughTypes[i] == 0 && requestedTypes[i] == 3 ? 2 : requestedTypes[i]);
        }
        return resultTypes;
    }

    private static void checkCompatibility(ParquetMetadata metadata) {
        for (BlockMetaData block : metadata.getBlocks()) {
            if (block.getRowCount() <= Integer.MAX_VALUE) continue;
            throw new RuntimeException("Current implementation doesn't support Parquet files with blocks larger than 2147483647 rows.");
        }
    }

    private static ParquetPreviewParseWriter readFirstRecords(ParquetMetadata metadata, ByteVec vec, int cnt) {
        ParquetMetadata startMetadata = new ParquetMetadata(metadata.getFileMetaData(), Collections.singletonList(ParquetParser.findFirstBlock(metadata)));
        ParquetPreviewParseWriter ppWriter = new ParquetPreviewParseWriter(metadata.getFileMetaData().getSchema());
        VecParquetReader reader = new VecParquetReader((Vec)vec, startMetadata, (ParseWriter)ppWriter, ppWriter._roughTypes);
        try {
            Integer recordNum;
            int recordCnt = 0;
            while ((recordNum = reader.read()) != null && ++recordCnt < cnt) {
            }
            return ppWriter;
        }
        catch (IOException e) {
            throw new RuntimeException("Failed to read the first few records", e);
        }
    }

    private static byte[] roughGuessTypes(MessageType messageType) {
        byte[] types = new byte[messageType.getPaths().size()];
        block4: for (int i = 0; i < types.length; ++i) {
            Type parquetType = messageType.getType(i);
            assert (parquetType.isPrimitive());
            switch (parquetType.asPrimitiveType().getPrimitiveTypeName()) {
                case INT32: 
                case BOOLEAN: 
                case FLOAT: 
                case DOUBLE: {
                    types[i] = 3;
                    continue block4;
                }
                case INT64: {
                    types[i] = OriginalType.TIMESTAMP_MILLIS.equals((Object)parquetType.getOriginalType()) ? 5 : 3;
                    continue block4;
                }
                default: {
                    types[i] = 0;
                }
            }
        }
        return types;
    }

    private static String[] columnNames(MessageType messageType) {
        String[] colNames = new String[messageType.getPaths().size()];
        int i = 0;
        for (String[] path : messageType.getPaths()) {
            assert (path.length == 1);
            colNames[i++] = path[0];
        }
        return colNames;
    }

    private static BlockMetaData findFirstBlock(ParquetMetadata metadata) {
        BlockMetaData firstBlockMeta = (BlockMetaData)metadata.getBlocks().get(0);
        for (BlockMetaData meta : metadata.getBlocks()) {
            if (firstBlockMeta.getStartingPos() >= firstBlockMeta.getStartingPos()) continue;
            firstBlockMeta = meta;
        }
        return firstBlockMeta;
    }

    public static class ParquetParseSetup
    extends ParseSetup {
        transient byte[] parquetMetadata;

        public ParquetParseSetup() {
        }

        public ParquetParseSetup(String[] columnNames, byte[] ctypes, String[][] data, byte[] parquetMetadata) {
            super(ParquetParserProvider.PARQUET_INFO, (byte)124, true, 1, columnNames.length, columnNames, ctypes, (String[][])new String[columnNames.length][], (String[][])null, data);
            this.parquetMetadata = parquetMetadata;
        }
    }

    private static class ParquetPreviewParseWriter
    extends PreviewParseWriter {
        private String[] _colNames;
        private byte[] _roughTypes;

        public ParquetPreviewParseWriter() {
        }

        ParquetPreviewParseWriter(MessageType parquetSchema) {
            super(parquetSchema.getPaths().size());
            this._colNames = ParquetParser.columnNames(parquetSchema);
            this._roughTypes = ParquetParser.roughGuessTypes(parquetSchema);
            this.setColumnNames(this._colNames);
            this._nlines = 0;
            this._data[0] = new String[this._colNames.length];
        }

        public byte[] guessTypes() {
            return ParquetParser.correctTypeConversions(this._roughTypes, super.guessTypes());
        }

        ParseSetup toParseSetup(byte[] parquetMetadata) {
            byte[] types = this.guessTypes();
            return new ParquetParseSetup(this._colNames, types, this._data, parquetMetadata);
        }
    }
}

