public class VectorizedParquetRecordReader extends ParquetRecordReaderBase implements org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.NullWritable,VectorizedRowBatch>, RowPositionAwareVectorizedRecordReader
| Modifier and Type | Field and Description |
|---|---|
protected org.apache.parquet.schema.MessageType |
fileSchema |
static org.slf4j.Logger |
LOG |
protected org.apache.parquet.schema.MessageType |
requestedSchema |
protected long |
totalRowCount
The total number of rows this RecordReader will eventually read.
|
filePath, fileSplit, filteredBlocks, jobConf, legacyConversionEnabled, parquetInputSplit, parquetMetadata, projectionPusher, reader, schemaSize, serDeStats, skipProlepticConversion, skipTimestampConversion| Constructor and Description |
|---|
VectorizedParquetRecordReader(org.apache.hadoop.mapred.InputSplit oldInputSplit,
org.apache.hadoop.mapred.JobConf conf) |
VectorizedParquetRecordReader(org.apache.hadoop.mapred.InputSplit oldInputSplit,
org.apache.hadoop.mapred.JobConf conf,
FileMetadataCache metadataCache,
DataCache dataCache,
org.apache.hadoop.conf.Configuration cacheConf) |
VectorizedParquetRecordReader(org.apache.hadoop.mapred.InputSplit oldInputSplit,
org.apache.hadoop.mapred.JobConf conf,
FileMetadataCache metadataCache,
DataCache dataCache,
org.apache.hadoop.conf.Configuration cacheConf,
org.apache.parquet.hadoop.metadata.ParquetMetadata parquetMetadata) |
| Modifier and Type | Method and Description |
|---|---|
static CacheTag |
cacheTagOfParquetFile(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration cacheConf,
org.apache.hadoop.mapred.JobConf jobConf) |
void |
close() |
org.apache.hadoop.io.NullWritable |
createKey() |
VectorizedRowBatch |
createValue() |
protected org.apache.parquet.hadoop.metadata.ParquetMetadata |
getParquetMetadata(org.apache.hadoop.fs.Path path,
org.apache.hadoop.mapred.JobConf conf) |
long |
getPos() |
float |
getProgress() |
long |
getRowNumber()
Returns the row position (in the file) of the first row in the last returned batch.
|
void |
initialize(org.apache.parquet.hadoop.ParquetInputSplit split,
org.apache.hadoop.mapred.JobConf configuration) |
boolean |
next(org.apache.hadoop.io.NullWritable nullWritable,
VectorizedRowBatch vectorizedRowBatch) |
getFilteredBlocks, getSplit, getStats, setFilter, setupMetadataAndParquetSplitpublic static final org.slf4j.Logger LOG
protected org.apache.parquet.schema.MessageType fileSchema
protected org.apache.parquet.schema.MessageType requestedSchema
protected long totalRowCount
public VectorizedParquetRecordReader(org.apache.hadoop.mapred.InputSplit oldInputSplit,
org.apache.hadoop.mapred.JobConf conf)
throws IOException
IOExceptionpublic VectorizedParquetRecordReader(org.apache.hadoop.mapred.InputSplit oldInputSplit,
org.apache.hadoop.mapred.JobConf conf,
FileMetadataCache metadataCache,
DataCache dataCache,
org.apache.hadoop.conf.Configuration cacheConf,
org.apache.parquet.hadoop.metadata.ParquetMetadata parquetMetadata)
throws IOException
IOExceptionpublic VectorizedParquetRecordReader(org.apache.hadoop.mapred.InputSplit oldInputSplit,
org.apache.hadoop.mapred.JobConf conf,
FileMetadataCache metadataCache,
DataCache dataCache,
org.apache.hadoop.conf.Configuration cacheConf)
throws IOException
IOExceptionprotected org.apache.parquet.hadoop.metadata.ParquetMetadata getParquetMetadata(org.apache.hadoop.fs.Path path,
org.apache.hadoop.mapred.JobConf conf)
throws IOException
getParquetMetadata in class ParquetRecordReaderBaseIOExceptionpublic void initialize(org.apache.parquet.hadoop.ParquetInputSplit split,
org.apache.hadoop.mapred.JobConf configuration)
throws IOException,
InterruptedException,
HiveException
public static CacheTag cacheTagOfParquetFile(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration cacheConf, org.apache.hadoop.mapred.JobConf jobConf)
public boolean next(org.apache.hadoop.io.NullWritable nullWritable,
VectorizedRowBatch vectorizedRowBatch)
throws IOException
next in interface org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.NullWritable,VectorizedRowBatch>IOExceptionpublic org.apache.hadoop.io.NullWritable createKey()
createKey in interface org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.NullWritable,VectorizedRowBatch>public VectorizedRowBatch createValue()
createValue in interface org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.NullWritable,VectorizedRowBatch>public long getPos()
throws IOException
getPos in interface org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.NullWritable,VectorizedRowBatch>IOExceptionpublic void close()
throws IOException
close in interface Closeableclose in interface AutoCloseableclose in interface org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.NullWritable,VectorizedRowBatch>IOExceptionpublic float getProgress()
throws IOException
getProgress in interface org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.NullWritable,VectorizedRowBatch>IOExceptionpublic long getRowNumber()
throws IOException
RowPositionAwareVectorizedRecordReadergetRowNumber in interface RowPositionAwareVectorizedRecordReaderIOExceptionCopyright © 2024 The Apache Software Foundation. All rights reserved.