public class HoodieCopyOnWriteTableInputFormat extends HoodieTableInputFormat
FileInputFormat allowing for reading of Hudi's
Copy-on-Write (COW) tables in various configurations:
conf| Constructor and Description |
|---|
HoodieCopyOnWriteTableInputFormat() |
| Modifier and Type | Method and Description |
|---|---|
protected boolean |
checkIfValidFileSlice(FileSlice fileSlice) |
protected org.apache.hadoop.fs.FileStatus |
createFileStatusUnchecked(FileSlice fileSlice,
Option<HoodieInstant> instantOpt,
String basePath,
Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt) |
protected org.apache.hadoop.fs.FileStatus[] |
doListStatus(org.apache.hadoop.mapred.JobConf job)
Abstracts and exposes
FileInputFormat.listStatus(JobConf) operation to subclasses that
lists files (returning an array of FileStatus) corresponding to the input paths specified
as part of provided JobConf |
protected static org.apache.hadoop.fs.FileStatus |
getFileStatusUnchecked(HoodieBaseFile baseFile) |
protected static Option<HoodieVirtualKeyInfo> |
getHoodieVirtualKeyInfo(HoodieTableMetaClient metaClient) |
org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.ArrayWritable> |
getRecordReader(org.apache.hadoop.mapred.InputSplit split,
org.apache.hadoop.mapred.JobConf job,
org.apache.hadoop.mapred.Reporter reporter) |
protected boolean |
isSplitable(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path filename) |
org.apache.hadoop.fs.FileStatus[] |
listStatus(org.apache.hadoop.mapred.JobConf job) |
protected List<org.apache.hadoop.fs.FileStatus> |
listStatusForIncrementalMode(org.apache.hadoop.mapred.JobConf job,
HoodieTableMetaClient tableMetaClient,
List<org.apache.hadoop.fs.Path> inputPaths,
String incrementalTable)
Achieves listStatus functionality for an incrementally queried table.
|
protected org.apache.hadoop.mapred.FileSplit |
makeSplit(org.apache.hadoop.fs.Path file,
long start,
long length,
String[] hosts) |
protected org.apache.hadoop.mapred.FileSplit |
makeSplit(org.apache.hadoop.fs.Path file,
long start,
long length,
String[] hosts,
String[] inMemoryHosts) |
getConf, setConfprotected boolean isSplitable(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path filename)
isSplitable in class HoodieTableInputFormatprotected org.apache.hadoop.mapred.FileSplit makeSplit(org.apache.hadoop.fs.Path file,
long start,
long length,
String[] hosts)
makeSplit in class HoodieTableInputFormatprotected org.apache.hadoop.mapred.FileSplit makeSplit(org.apache.hadoop.fs.Path file,
long start,
long length,
String[] hosts,
String[] inMemoryHosts)
makeSplit in class HoodieTableInputFormatpublic org.apache.hadoop.fs.FileStatus[] listStatus(org.apache.hadoop.mapred.JobConf job)
throws IOException
listStatus in class HoodieTableInputFormatIOExceptionpublic org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.ArrayWritable> getRecordReader(org.apache.hadoop.mapred.InputSplit split,
org.apache.hadoop.mapred.JobConf job,
org.apache.hadoop.mapred.Reporter reporter)
throws IOException
getRecordReader in interface org.apache.hadoop.mapred.InputFormat<org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.ArrayWritable>getRecordReader in class org.apache.hadoop.mapred.FileInputFormat<org.apache.hadoop.io.NullWritable,org.apache.hadoop.io.ArrayWritable>IOExceptionprotected final org.apache.hadoop.fs.FileStatus[] doListStatus(org.apache.hadoop.mapred.JobConf job)
throws IOException
FileInputFormat.listStatus(JobConf) operation to subclasses that
lists files (returning an array of FileStatus) corresponding to the input paths specified
as part of provided JobConfIOExceptionprotected List<org.apache.hadoop.fs.FileStatus> listStatusForIncrementalMode(org.apache.hadoop.mapred.JobConf job, HoodieTableMetaClient tableMetaClient, List<org.apache.hadoop.fs.Path> inputPaths, String incrementalTable) throws IOException
IOExceptionprotected org.apache.hadoop.fs.FileStatus createFileStatusUnchecked(FileSlice fileSlice, Option<HoodieInstant> instantOpt, String basePath, Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt)
protected boolean checkIfValidFileSlice(FileSlice fileSlice)
@Nonnull protected static org.apache.hadoop.fs.FileStatus getFileStatusUnchecked(HoodieBaseFile baseFile)
protected static Option<HoodieVirtualKeyInfo> getHoodieVirtualKeyInfo(HoodieTableMetaClient metaClient)
Copyright © 2022 The Apache Software Foundation. All rights reserved.