/*
 * Decompiled with CFR 0.152.
 */
package water.parser.parquet;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;
import water.Job;
import water.Key;
import water.exceptions.H2OUnsupportedDataFileException;
import water.fvec.ByteVec;
import water.fvec.Chunk;
import water.fvec.Vec;
import water.parser.FVecParseReader;
import water.parser.ParseReader;
import water.parser.ParseSetup;
import water.parser.ParseWriter;
import water.parser.Parser;
import water.parser.PreviewParseWriter;
import water.parser.StreamParseWriter;
import water.parser.parquet.ParquetParserProvider;
import water.parser.parquet.VecParquetReader;
import water.parser.parquet.WriterDelegate;
import water.util.IcedHashMapGeneric;
import water.util.Log;

public class ParquetParser
extends Parser {
    private static final int MAX_PREVIEW_RECORDS = 1000;
    private final byte[] _metadata;

    ParquetParser(ParseSetup setup, Key<Job> jobKey) {
        super(setup, jobKey);
        this._metadata = ((ParquetParseSetup)setup).parquetMetadata;
    }

    protected final StreamParseWriter sequentialParse(Vec vec, StreamParseWriter dout) {
        ParquetMetadata metadata = VecParquetReader.readFooter(this._metadata);
        int nChunks = vec.nChunks();
        long totalRecs = this.totalRecords(metadata);
        long nChunkRecs = totalRecs / (long)nChunks + (long)(totalRecs % (long)nChunks > 0L ? 1 : 0);
        if (nChunkRecs != (long)((int)nChunkRecs)) {
            throw new IllegalStateException("Unsupported Parquet file. Too many records (#" + totalRecs + ", nChunks=" + nChunks + ").");
        }
        WriterDelegate w = new WriterDelegate((ParseWriter)dout, this._setup.getColumnTypes().length);
        VecParquetReader reader = new VecParquetReader(vec, metadata, w, this._setup.getColumnTypes(), this._keepColumns);
        StreamParseWriter nextChunk = dout;
        try {
            long parsedRecs = 0L;
            for (int i = 0; i < nChunks; ++i) {
                Long recordNumber;
                do {
                    if ((recordNumber = reader.read()) == null) continue;
                    ++parsedRecs;
                } while (recordNumber != null && w.lineNum() < nChunkRecs);
                if (this._jobKey != null) {
                    Job.update((long)(vec.length() / (long)nChunks), (Key)this._jobKey);
                }
                nextChunk.close();
                dout.reduce(nextChunk);
                nextChunk = nextChunk.nextChunk();
                w.setWriter((ParseWriter)nextChunk);
            }
            assert (parsedRecs == totalRecs);
        }
        catch (IOException e) {
            throw new RuntimeException("Failed to parse records", e);
        }
        return dout;
    }

    private long totalRecords(ParquetMetadata metadata) {
        long nr = 0L;
        for (BlockMetaData meta : metadata.getBlocks()) {
            nr += meta.getRowCount();
        }
        return nr;
    }

    protected final ParseWriter parseChunk(int cidx, ParseReader din, ParseWriter dout) {
        if (!(din instanceof FVecParseReader)) {
            throw new IllegalStateException("We only accept parser readers backed by a Vec (no streaming support!).");
        }
        Chunk chunk = ((FVecParseReader)din).getChunk();
        Vec vec = chunk.vec();
        ParquetMetadataConverter.MetadataFilter chunkFilter = ParquetMetadataConverter.range((long)chunk.start(), (long)(chunk.start() + (long)chunk.len()));
        ParquetMetadata metadata = VecParquetReader.readFooter(this._metadata, chunkFilter);
        if (metadata.getBlocks().isEmpty()) {
            Log.trace((Object[])new Object[]{"Chunk #", cidx, " doesn't contain any Parquet block center."});
            return dout;
        }
        Log.info((Object[])new Object[]{"Processing ", metadata.getBlocks().size(), " blocks of chunk #", cidx});
        VecParquetReader reader = new VecParquetReader(vec, metadata, dout, this._setup.getColumnTypes(), this._keepColumns, this._setup.get_parse_columns_indices().length);
        try {
            Long recordNumber;
            while ((recordNumber = reader.read()) != null) {
            }
        }
        catch (IOException e) {
            throw new RuntimeException("Failed to parse records", e);
        }
        return dout;
    }

    public static ParquetParseSetup guessFormatSetup(ByteVec vec, byte[] bits) {
        if (bits.length < ParquetFileWriter.MAGIC.length) {
            return null;
        }
        for (int i = 0; i < ParquetFileWriter.MAGIC.length; ++i) {
            if (bits[i] == ParquetFileWriter.MAGIC[i]) continue;
            return null;
        }
        byte[] metadataBytes = VecParquetReader.readFooterAsBytes((Vec)vec);
        ParquetMetadata metadata = VecParquetReader.readFooter(metadataBytes);
        ParquetParser.checkCompatibility(metadata);
        return ParquetParser.toInitialSetup(metadata.getFileMetaData().getSchema(), metadataBytes);
    }

    public static String[] extractColumnTypes(ParquetMetadata metadata) {
        MessageType messageType = metadata.getFileMetaData().getSchema();
        int colNum = messageType.getFieldCount();
        String[] parquetColNames = new String[colNum];
        for (int index = 0; index < colNum; ++index) {
            parquetColNames[index] = messageType.getType(index).asPrimitiveType().getPrimitiveTypeName().name();
        }
        return parquetColNames;
    }

    private static ParquetParseSetup toInitialSetup(MessageType parquetSchema, byte[] metadataBytes) {
        byte[] roughTypes = ParquetParser.roughGuessTypes(parquetSchema);
        String[] columnNames = ParquetParser.columnNames(parquetSchema);
        return new ParquetParseSetup(columnNames, roughTypes, null, metadataBytes);
    }

    public static ParquetParseSetup guessDataSetup(ByteVec vec, ParquetParseSetup ps, boolean[] keepcolumns) {
        ParquetPreviewParseWriter ppWriter = ParquetParser.readFirstRecords(ps, vec, 1000, keepcolumns);
        return ppWriter.toParseSetup(ps.parquetMetadata);
    }

    public static byte[] correctTypeConversions(ByteVec vec, byte[] requestedTypes) {
        byte[] metadataBytes = VecParquetReader.readFooterAsBytes((Vec)vec);
        ParquetMetadata metadata = VecParquetReader.readFooter(metadataBytes, ParquetMetadataConverter.NO_FILTER);
        byte[] roughTypes = ParquetParser.roughGuessTypes(metadata.getFileMetaData().getSchema());
        return ParquetParser.correctTypeConversions(roughTypes, requestedTypes);
    }

    private static byte[] correctTypeConversions(byte[] roughTypes, byte[] requestedTypes) {
        if (requestedTypes.length != roughTypes.length) {
            throw new IllegalArgumentException("Invalid column type specification: number of columns and number of types differ!");
        }
        byte[] resultTypes = new byte[requestedTypes.length];
        for (int i = 0; i < requestedTypes.length; ++i) {
            resultTypes[i] = roughTypes[i] == 3 || roughTypes[i] == 5 ? roughTypes[i] : (roughTypes[i] == 0 && requestedTypes[i] == 3 ? 2 : requestedTypes[i]);
        }
        return resultTypes;
    }

    private static void checkCompatibility(ParquetMetadata metadata) {
        for (BlockMetaData block : metadata.getBlocks()) {
            if (block.getRowCount() <= Integer.MAX_VALUE) continue;
            IcedHashMapGeneric.IcedHashMapStringObject dbg = new IcedHashMapGeneric.IcedHashMapStringObject();
            dbg.put((Object)"startingPos", (Object)block.getStartingPos());
            dbg.put((Object)"rowCount", (Object)block.getRowCount());
            throw new H2OUnsupportedDataFileException("Unsupported Parquet file (technical limitation).", "Current implementation doesn't support Parquet files with blocks larger than 2147483647 rows.", dbg);
        }
        MessageType schema = metadata.getFileMetaData().getSchema();
        for (String[] path : schema.getPaths()) {
            if (path.length == 1) continue;
            throw new H2OUnsupportedDataFileException("Parquet files with nested structures are not supported.", "Detected a column with a nested structure " + Arrays.asList(path));
        }
    }

    private static ParquetPreviewParseWriter readFirstRecords(ParquetParseSetup initSetup, ByteVec vec, int cnt, boolean[] keepcolumns) {
        List<Object> blockMetaData;
        ParquetMetadata metadata = VecParquetReader.readFooter(initSetup.parquetMetadata);
        if (metadata.getBlocks().isEmpty()) {
            blockMetaData = Collections.emptyList();
        } else {
            BlockMetaData firstBlock = ParquetParser.findFirstBlock(metadata);
            blockMetaData = Collections.singletonList(firstBlock);
        }
        ParquetMetadata startMetadata = new ParquetMetadata(metadata.getFileMetaData(), blockMetaData);
        ParquetPreviewParseWriter ppWriter = new ParquetPreviewParseWriter(initSetup);
        VecParquetReader reader = new VecParquetReader((Vec)vec, startMetadata, (ParseWriter)ppWriter, ppWriter._roughTypes, keepcolumns, initSetup.get_parse_columns_indices().length);
        try {
            Long recordNum;
            int recordCnt = 0;
            while ((recordNum = reader.read()) != null && ++recordCnt < cnt) {
            }
            return ppWriter;
        }
        catch (IOException e) {
            throw new RuntimeException("Failed to read the first few records", e);
        }
    }

    private static byte[] roughGuessTypes(MessageType messageType) {
        byte[] types = new byte[messageType.getPaths().size()];
        for (int i = 0; i < types.length; ++i) {
            Type parquetType = messageType.getType(i);
            assert (parquetType.isPrimitive());
            OriginalType ot = parquetType.getOriginalType();
            PrimitiveType pt = parquetType.asPrimitiveType();
            types[i] = ParquetParser.convertType(ot, pt);
        }
        return types;
    }

    private static byte convertType(OriginalType ot, PrimitiveType pt) {
        if (OriginalType.TIMESTAMP_MILLIS.equals((Object)ot) || OriginalType.DATE.equals((Object)ot)) {
            return 5;
        }
        if (OriginalType.DECIMAL.equals((Object)ot)) {
            return 3;
        }
        switch (pt.getPrimitiveTypeName()) {
            case BOOLEAN: {
                return 4;
            }
            case INT32: 
            case FLOAT: 
            case DOUBLE: 
            case INT64: {
                return 3;
            }
            case INT96: {
                return 5;
            }
        }
        return 0;
    }

    private static String[] columnNames(MessageType messageType) {
        String[] colNames = new String[messageType.getPaths().size()];
        int i = 0;
        for (String[] path : messageType.getPaths()) {
            assert (path.length == 1);
            colNames[i++] = path[0];
        }
        return colNames;
    }

    private static BlockMetaData findFirstBlock(ParquetMetadata metadata) {
        BlockMetaData firstBlockMeta = (BlockMetaData)metadata.getBlocks().get(0);
        for (BlockMetaData meta : metadata.getBlocks()) {
            if (meta.getStartingPos() >= firstBlockMeta.getStartingPos()) continue;
            firstBlockMeta = meta;
        }
        return firstBlockMeta;
    }

    public static class ParquetParseSetup
    extends ParseSetup {
        transient byte[] parquetMetadata;

        public ParquetParseSetup() {
        }

        public ParquetParseSetup(String[] columnNames, byte[] ctypes, String[][] data, byte[] parquetMetadata) {
            super(ParquetParserProvider.PARQUET_INFO, (byte)124, true, 1, columnNames.length, columnNames, ctypes, (String[][])new String[columnNames.length][], (String[][])null, data);
            this.parquetMetadata = parquetMetadata;
            if (this.getForceColTypes() && parquetMetadata != null) {
                this.parquetColumnTypes = ParquetParser.extractColumnTypes(VecParquetReader.readFooter(parquetMetadata));
            }
        }
    }

    private static class ParquetPreviewParseWriter
    extends PreviewParseWriter {
        private String[] _colNames;
        private byte[] _roughTypes;

        public ParquetPreviewParseWriter() {
        }

        ParquetPreviewParseWriter(ParquetParseSetup setup) {
            super(setup.getColumnNames().length);
            this._colNames = setup.getColumnNames();
            this._roughTypes = setup.getColumnTypes();
            this.setColumnNames(this._colNames);
            this._nlines = 0;
            this._data[0] = new String[this._colNames.length];
        }

        public byte[] guessTypes() {
            return ParquetParser.correctTypeConversions(this._roughTypes, super.guessTypes());
        }

        ParquetParseSetup toParseSetup(byte[] parquetMetadata) {
            byte[] types = this.guessTypes();
            return new ParquetParseSetup(this._colNames, types, this._data, parquetMetadata);
        }
    }
}

