public class ParquetFileReader extends Object implements Closeable
| Modifier and Type | Field and Description |
|---|---|
static String |
PARQUET_READ_PARALLELISM |
| Constructor and Description |
|---|
ParquetFileReader(org.apache.hadoop.conf.Configuration configuration,
FileMetaData fileMetaData,
org.apache.hadoop.fs.Path filePath,
List<BlockMetaData> blocks,
List<ColumnDescriptor> columns)
Deprecated.
will be removed in 2.0.0.
|
ParquetFileReader(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path filePath,
List<BlockMetaData> blocks,
List<ColumnDescriptor> columns)
Deprecated.
will be removed in 2.0.0.
|
ParquetFileReader(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file,
ParquetMetadata footer)
Deprecated.
will be removed in 2.0.0.
|
ParquetFileReader(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file,
ParquetMetadataConverter.MetadataFilter filter)
Deprecated.
will be removed in 2.0.0.
|
ParquetFileReader(InputFile file,
ParquetReadOptions options) |
| Modifier and Type | Method and Description |
|---|---|
void |
appendTo(ParquetFileWriter writer) |
void |
close() |
org.apache.parquet.hadoop.DictionaryPageReader |
getDictionaryReader(BlockMetaData block) |
String |
getFile() |
FileMetaData |
getFileMetaData() |
long |
getFilteredRecordCount() |
ParquetMetadata |
getFooter() |
DictionaryPageReadStore |
getNextDictionaryReader()
Returns a
DictionaryPageReadStore for the row group that would be
returned by calling readNextRowGroup() or skipped by calling
skipNextRowGroup(). |
org.apache.hadoop.fs.Path |
getPath()
Deprecated.
will be removed in 2.0.0; use
getFile() instead |
long |
getRecordCount() |
List<BlockMetaData> |
getRowGroups() |
static ParquetFileReader |
open(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file)
Deprecated.
will be removed in 2.0.0; use
open(InputFile) |
static ParquetFileReader |
open(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file,
ParquetMetadata footer)
Deprecated.
will be removed in 2.0.0
|
static ParquetFileReader |
open(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file,
ParquetMetadataConverter.MetadataFilter filter)
Deprecated.
will be removed in 2.0.0; use
open(InputFile,ParquetReadOptions) |
static ParquetFileReader |
open(InputFile file)
Open a
file. |
static ParquetFileReader |
open(InputFile file,
ParquetReadOptions options)
|
static List<Footer> |
readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus fileStatus)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static List<Footer> |
readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus fileStatus,
boolean skipRowGroups)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static List<Footer> |
readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration,
List<org.apache.hadoop.fs.FileStatus> partFiles)
Deprecated.
metadata files are not recommended and will be removed in 2.0.0
|
static List<Footer> |
readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration,
List<org.apache.hadoop.fs.FileStatus> partFiles,
boolean skipRowGroups)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static List<Footer> |
readAllFootersInParallelUsingSummaryFiles(org.apache.hadoop.conf.Configuration configuration,
Collection<org.apache.hadoop.fs.FileStatus> partFiles,
boolean skipRowGroups)
Deprecated.
metadata files are not recommended and will be removed in 2.0.0
|
static List<Footer> |
readAllFootersInParallelUsingSummaryFiles(org.apache.hadoop.conf.Configuration configuration,
List<org.apache.hadoop.fs.FileStatus> partFiles)
Deprecated.
metadata files are not recommended and will be removed in 2.0.0
|
ColumnIndex |
readColumnIndex(ColumnChunkMetaData column) |
static ParquetMetadata |
readFooter(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus file)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static ParquetMetadata |
readFooter(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus file,
ParquetMetadataConverter.MetadataFilter filter)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static ParquetMetadata |
readFooter(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path file)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static ParquetMetadata |
readFooter(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path file,
ParquetMetadataConverter.MetadataFilter filter)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static ParquetMetadata |
readFooter(InputFile file,
ParquetMetadataConverter.MetadataFilter filter)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static List<Footer> |
readFooters(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus pathStatus)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static List<Footer> |
readFooters(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus pathStatus,
boolean skipRowGroups)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static List<Footer> |
readFooters(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path path)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
PageReadStore |
readNextFilteredRowGroup()
Reads all the columns requested from the row group at the current file position.
|
PageReadStore |
readNextRowGroup()
Reads all the columns requested from the row group at the current file position.
|
OffsetIndex |
readOffsetIndex(ColumnChunkMetaData column) |
static List<Footer> |
readSummaryFile(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus summaryStatus)
Deprecated.
metadata files are not recommended and will be removed in 2.0.0
|
void |
setRequestedSchema(MessageType projection) |
boolean |
skipNextRowGroup() |
public static String PARQUET_READ_PARALLELISM
@Deprecated public ParquetFileReader(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path filePath, List<BlockMetaData> blocks, List<ColumnDescriptor> columns) throws IOException
configuration - the Hadoop conffilePath - Path for the parquet fileblocks - the blocks to readcolumns - the columns to read (their path)IOException - if the file can not be opened@Deprecated public ParquetFileReader(org.apache.hadoop.conf.Configuration configuration, FileMetaData fileMetaData, org.apache.hadoop.fs.Path filePath, List<BlockMetaData> blocks, List<ColumnDescriptor> columns) throws IOException
configuration - the Hadoop conffileMetaData - fileMetaData for parquet filefilePath - Path for the parquet fileblocks - the blocks to readcolumns - the columns to read (their path)IOException - if the file can not be opened@Deprecated public ParquetFileReader(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path file, ParquetMetadataConverter.MetadataFilter filter) throws IOException
conf - the Hadoop Configurationfile - Path to a parquet filefilter - a ParquetMetadataConverter.MetadataFilter for selecting row groupsIOException - if the file can not be opened@Deprecated public ParquetFileReader(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path file, ParquetMetadata footer) throws IOException
conf - the Hadoop Configurationfile - Path to a parquet filefooter - a ParquetMetadata footer already read from the fileIOException - if the file can not be openedpublic ParquetFileReader(InputFile file, ParquetReadOptions options) throws IOException
IOException@Deprecated public static List<Footer> readAllFootersInParallelUsingSummaryFiles(org.apache.hadoop.conf.Configuration configuration, List<org.apache.hadoop.fs.FileStatus> partFiles) throws IOException
configuration - the hadoop conf to connect to the file system;partFiles - the part files to readIOException - if there is an exception while reading footers@Deprecated public static List<Footer> readAllFootersInParallelUsingSummaryFiles(org.apache.hadoop.conf.Configuration configuration, Collection<org.apache.hadoop.fs.FileStatus> partFiles, boolean skipRowGroups) throws IOException
configuration - the hadoop conf to connect to the file system;partFiles - the part files to readskipRowGroups - to skipRowGroups in the footersIOException - if there is an exception while reading footers@Deprecated public static List<Footer> readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration, List<org.apache.hadoop.fs.FileStatus> partFiles) throws IOException
configuration - the conf to access the File SystempartFiles - the files to readIOException - if an exception was raised while reading footers@Deprecated public static List<Footer> readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration, List<org.apache.hadoop.fs.FileStatus> partFiles, boolean skipRowGroups) throws IOException
open(InputFile, ParquetReadOptions)configuration - the conf to access the File SystempartFiles - the files to readskipRowGroups - to skip the rowGroup infoIOException - if there is an exception while reading footers@Deprecated public static List<Footer> readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus fileStatus, boolean skipRowGroups) throws IOException
open(InputFile, ParquetReadOptions)configuration - a configurationfileStatus - a file status to recursively listskipRowGroups - whether to skip reading row group metadataIOException - if an exception is thrown while reading the footers@Deprecated public static List<Footer> readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus fileStatus) throws IOException
open(InputFile, ParquetReadOptions)configuration - the configuration to access the FSfileStatus - the root dirIOException - if an exception is thrown while reading the footers@Deprecated public static List<Footer> readFooters(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path path) throws IOException
open(InputFile, ParquetReadOptions)configuration - a configurationpath - a file pathIOException - if an exception is thrown while reading the footers@Deprecated public static List<Footer> readFooters(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus pathStatus) throws IOException
open(InputFile, ParquetReadOptions)configuration - a configurationpathStatus - a file status to read footers fromIOException - if an exception is thrown while reading the footers@Deprecated public static List<Footer> readFooters(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus pathStatus, boolean skipRowGroups) throws IOException
open(InputFile, ParquetReadOptions)configuration - the configuration to access the FSpathStatus - the root dirskipRowGroups - whether to skip reading row group metadataIOException - if an exception is thrown while reading the footers@Deprecated public static List<Footer> readSummaryFile(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus summaryStatus) throws IOException
configuration - a configurationsummaryStatus - file status for a summary fileIOException - if an exception is thrown while reading the summary file@Deprecated public static final ParquetMetadata readFooter(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path file) throws IOException
open(InputFile, ParquetReadOptions)configuration - a configurationfile - the parquet FileIOException - if an error occurs while reading the filepublic static ParquetMetadata readFooter(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path file, ParquetMetadataConverter.MetadataFilter filter) throws IOException
open(InputFile, ParquetReadOptions)configuration - a configurationfile - the Parquet Filefilter - the filter to apply to row groupsIOException - if an error occurs while reading the file@Deprecated public static final ParquetMetadata readFooter(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus file) throws IOException
open(InputFile, ParquetReadOptions)configuration - a configurationfile - the Parquet FileIOException - if an error occurs while reading the file@Deprecated public static final ParquetMetadata readFooter(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus file, ParquetMetadataConverter.MetadataFilter filter) throws IOException
open(InputFile, ParquetReadOptions)configuration - a configurationfile - the parquet Filefilter - the filter to apply to row groupsIOException - if an error occurs while reading the file@Deprecated public static final ParquetMetadata readFooter(InputFile file, ParquetMetadataConverter.MetadataFilter filter) throws IOException
open(InputFile, ParquetReadOptions)file - a InputFile to readfilter - the filter to apply to row groupsIOException - if an error occurs while reading the file@Deprecated public static ParquetFileReader open(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path file) throws IOException
open(InputFile)conf - a configurationfile - a file path to openIOException - if there is an error while opening the file@Deprecated public static ParquetFileReader open(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path file, ParquetMetadataConverter.MetadataFilter filter) throws IOException
open(InputFile,ParquetReadOptions)conf - a configurationfile - a file path to openfilter - a metadata filterIOException - if there is an error while opening the file@Deprecated public static ParquetFileReader open(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path file, ParquetMetadata footer) throws IOException
conf - a configurationfile - a file path to openfooter - a footer for the file if already loadedIOException - if there is an error while opening the filepublic static ParquetFileReader open(InputFile file) throws IOException
file.file - an input fileIOException - if there is an error while opening the filepublic static ParquetFileReader open(InputFile file, ParquetReadOptions options) throws IOException
file - an input fileoptions - parquet read optionsIOException - if there is an error while opening the filepublic ParquetMetadata getFooter()
public FileMetaData getFileMetaData()
public long getRecordCount()
public long getFilteredRecordCount()
@Deprecated public org.apache.hadoop.fs.Path getPath()
getFile() insteadpublic String getFile()
public List<BlockMetaData> getRowGroups()
public void setRequestedSchema(MessageType projection)
public void appendTo(ParquetFileWriter writer) throws IOException
IOExceptionpublic PageReadStore readNextRowGroup() throws IOException
IOException - if an error occurs while readingpublic PageReadStore readNextFilteredRowGroup() throws IOException
IOException - if any I/O error occurs while readingpublic boolean skipNextRowGroup()
public DictionaryPageReadStore getNextDictionaryReader()
DictionaryPageReadStore for the row group that would be
returned by calling readNextRowGroup() or skipped by calling
skipNextRowGroup().public org.apache.parquet.hadoop.DictionaryPageReader getDictionaryReader(BlockMetaData block)
@InterfaceAudience.Private public ColumnIndex readColumnIndex(ColumnChunkMetaData column) throws IOException
column - the column chunk which the column index is to be returned fornull if there is no indexIOException - if any I/O error occurs during reading the file@InterfaceAudience.Private public OffsetIndex readOffsetIndex(ColumnChunkMetaData column) throws IOException
column - the column chunk which the offset index is to be returned fornull if there is no indexIOException - if any I/O error occurs during reading the filepublic void close()
throws IOException
close in interface Closeableclose in interface AutoCloseableIOExceptionCopyright © 2020 The Apache Software Foundation. All rights reserved.