public class ParquetFileReader extends Object implements Closeable
| Modifier and Type | Field and Description |
|---|---|
static String |
PARQUET_READ_PARALLELISM |
| Constructor and Description |
|---|
ParquetFileReader(org.apache.hadoop.conf.Configuration configuration,
FileMetaData fileMetaData,
org.apache.hadoop.fs.Path filePath,
List<BlockMetaData> blocks,
List<ColumnDescriptor> columns) |
ParquetFileReader(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path filePath,
List<BlockMetaData> blocks,
List<ColumnDescriptor> columns)
Deprecated.
use @link{ParquetFileReader(Configuration configuration, FileMetaData fileMetaData,
Path filePath, List
|
| Modifier and Type | Method and Description |
|---|---|
void |
close() |
static List<Footer> |
readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus fileStatus)
Read the footers of all the files under that path (recursively)
not using summary files.
|
static List<Footer> |
readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration,
List<org.apache.hadoop.fs.FileStatus> partFiles)
Deprecated.
|
static List<Footer> |
readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration,
List<org.apache.hadoop.fs.FileStatus> partFiles,
boolean skipRowGroups)
read all the footers of the files provided
(not using summary files)
|
static List<Footer> |
readAllFootersInParallelUsingSummaryFiles(org.apache.hadoop.conf.Configuration configuration,
Collection<org.apache.hadoop.fs.FileStatus> partFiles,
boolean skipRowGroups)
for files provided, check if there's a summary file.
|
static List<Footer> |
readAllFootersInParallelUsingSummaryFiles(org.apache.hadoop.conf.Configuration configuration,
List<org.apache.hadoop.fs.FileStatus> partFiles)
Deprecated.
|
static ParquetMetadata |
readFooter(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus file)
Deprecated.
use
ParquetFileReader#readFooter(Configuration, FileStatus, MetadataFilter) |
static ParquetMetadata |
readFooter(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus file,
ParquetMetadataConverter.MetadataFilter filter)
Reads the meta data block in the footer of the file
|
static ParquetMetadata |
readFooter(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path file)
Deprecated.
|
static ParquetMetadata |
readFooter(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path file,
ParquetMetadataConverter.MetadataFilter filter)
Reads the meta data in the footer of the file.
|
static List<Footer> |
readFooters(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus pathStatus)
Deprecated.
|
static List<Footer> |
readFooters(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus pathStatus,
boolean skipRowGroups)
Read the footers of all the files under that path (recursively)
using summary files if possible
|
static List<Footer> |
readFooters(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path path)
Deprecated.
|
PageReadStore |
readNextRowGroup()
Reads all the columns requested from the row group at the current file position.
|
static List<Footer> |
readSummaryFile(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus summaryStatus)
Specifically reads a given summary file
|
public static String PARQUET_READ_PARALLELISM
public ParquetFileReader(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path filePath,
List<BlockMetaData> blocks,
List<ColumnDescriptor> columns)
throws IOException
IOExceptionpublic ParquetFileReader(org.apache.hadoop.conf.Configuration configuration,
FileMetaData fileMetaData,
org.apache.hadoop.fs.Path filePath,
List<BlockMetaData> blocks,
List<ColumnDescriptor> columns)
throws IOException
configuration - the Hadoop conffileMetaData - fileMetaData for parquet fileblocks - the blocks to readcolumns - the columns to read (their path)IOException - if the file can not be opened@Deprecated public static List<Footer> readAllFootersInParallelUsingSummaryFiles(org.apache.hadoop.conf.Configuration configuration, List<org.apache.hadoop.fs.FileStatus> partFiles) throws IOException
configuration - the hadoop conf to connect to the file system;partFiles - the part files to readIOExceptionpublic static List<Footer> readAllFootersInParallelUsingSummaryFiles(org.apache.hadoop.conf.Configuration configuration, Collection<org.apache.hadoop.fs.FileStatus> partFiles, boolean skipRowGroups) throws IOException
configuration - the hadoop conf to connect to the file system;partFiles - the part files to readskipRowGroups - to skipRowGroups in the footersIOException@Deprecated public static List<Footer> readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration, List<org.apache.hadoop.fs.FileStatus> partFiles) throws IOException
IOExceptionpublic static List<Footer> readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration, List<org.apache.hadoop.fs.FileStatus> partFiles, boolean skipRowGroups) throws IOException
configuration - the conf to access the File SystempartFiles - the files to readskipRowGroups - to skip the rowGroup infoIOExceptionpublic static List<Footer> readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus fileStatus) throws IOException
configuration - the configuration to access the FSfileStatus - the root dirIOException@Deprecated public static List<Footer> readFooters(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path path) throws IOException
IOException@Deprecated public static List<Footer> readFooters(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus pathStatus) throws IOException
configuration - pathStatus - IOExceptionpublic static List<Footer> readFooters(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus pathStatus, boolean skipRowGroups) throws IOException
configuration - the configuration to access the FSfileStatus - the root dirIOExceptionpublic static List<Footer> readSummaryFile(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus summaryStatus) throws IOException
configuration - summaryStatus - IOException@Deprecated public static final ParquetMetadata readFooter(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path file) throws IOException
configuration - file - the parquet FileIOException - if an error occurs while reading the filepublic static ParquetMetadata readFooter(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path file, ParquetMetadataConverter.MetadataFilter filter) throws IOException
configuration - file - the Parquet Filefilter - the filter to apply to row groupsIOException - if an error occurs while reading the file@Deprecated public static final ParquetMetadata readFooter(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus file) throws IOException
ParquetFileReader#readFooter(Configuration, FileStatus, MetadataFilter)IOExceptionpublic static final ParquetMetadata readFooter(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus file, ParquetMetadataConverter.MetadataFilter filter) throws IOException
configuration - file - the parquet Filefilter - the filter to apply to row groupsIOException - if an error occurs while reading the filepublic PageReadStore readNextRowGroup() throws IOException
IOException - if an error occurs while readingpublic void close()
throws IOException
close in interface Closeableclose in interface AutoCloseableIOExceptionCopyright © 2015 The Apache Software Foundation. All rights reserved.