public abstract class AbstractTableFileSystemView extends Object implements SyncableFileSystemView, Serializable
TableFileSystemView.BaseFileOnlyView, TableFileSystemView.BaseFileOnlyViewWithLatestSlice, TableFileSystemView.SliceView, TableFileSystemView.SliceViewWithLatestSlice| Modifier and Type | Field and Description |
|---|---|
protected HoodieTableMetaClient |
metaClient |
| Constructor and Description |
|---|
AbstractTableFileSystemView() |
| Modifier and Type | Method and Description |
|---|---|
protected FileSlice |
addBootstrapBaseFileIfPresent(FileSlice fileSlice) |
protected HoodieFileGroup |
addBootstrapBaseFileIfPresent(HoodieFileGroup fileGroup) |
protected HoodieBaseFile |
addBootstrapBaseFileIfPresent(HoodieFileGroupId fileGroupId,
HoodieBaseFile baseFile) |
List<HoodieFileGroup> |
addFilesToView(org.apache.hadoop.fs.FileStatus[] statuses)
Adds the provided statuses into the file system view, and also caches it inside this object.
|
protected abstract void |
addReplacedFileGroups(Map<HoodieFileGroupId,HoodieInstant> replacedFileGroups)
Track instant time for new file groups replaced.
|
protected List<HoodieFileGroup> |
buildFileGroups(org.apache.hadoop.fs.FileStatus[] statuses,
HoodieTimeline timeline,
boolean addPendingCompactionFileSlice)
Build FileGroups from passed in file-status.
|
protected List<HoodieFileGroup> |
buildFileGroups(Stream<HoodieBaseFile> baseFileStream,
Stream<HoodieLogFile> logFileStream,
HoodieTimeline timeline,
boolean addPendingCompactionFileSlice) |
void |
close()
Allow View to release resources and close.
|
protected abstract Stream<Pair<HoodieFileGroupId,HoodieInstant>> |
fetchFileGroupsInPendingClustering()
Fetch all file groups in pending clustering.
|
protected Option<HoodieBaseFile> |
fetchLatestBaseFile(String partitionPath,
String fileId)
Default implementation for fetching latest base-file.
|
Stream<HoodieBaseFile> |
fetchLatestBaseFiles(String partitionPath)
Default implementation for fetching latest base-files for the partition-path.
|
protected Option<FileSlice> |
fetchLatestFileSlice(String partitionPath,
String fileId)
Default implementation for fetching file-slice.
|
protected Stream<FileSlice> |
filterBaseFileAfterPendingCompaction(FileSlice fileSlice,
boolean includeEmptyFileSlice)
With async compaction, it is possible to see partial/complete base-files due to inflight-compactions, Ignore those
base-files.
|
Stream<HoodieBaseFile> |
getAllBaseFiles(String partitionStr)
Stream all the data file versions grouped by FileId for a given partition.
|
Stream<HoodieFileGroup> |
getAllFileGroups(String partitionStr)
Stream all the file groups for a given partition.
|
Stream<FileSlice> |
getAllFileSlices(String partitionStr)
Stream all the file slices for a given partition, latest or not.
|
Stream<HoodieFileGroup> |
getAllReplacedFileGroups(String partitionPath)
Stream all the replaced file groups for given partition.
|
Option<HoodieBaseFile> |
getBaseFileOn(String partitionStr,
String instantTime,
String fileId)
Get the version of data file matching the instant time in the given partition.
|
protected abstract Option<BootstrapBaseFileMapping> |
getBootstrapBaseFile(HoodieFileGroupId fileGroupId)
Return pending compaction operation for a file-group.
|
Stream<Pair<HoodieFileGroupId,HoodieInstant>> |
getFileGroupsInPendingClustering()
Filegroups that are in pending clustering.
|
Option<HoodieInstant> |
getLastInstant()
Last Known Instant on which the view is built.
|
protected Option<HoodieBaseFile> |
getLatestBaseFile(HoodieFileGroup fileGroup) |
Option<HoodieBaseFile> |
getLatestBaseFile(String partitionStr,
String fileId)
Get Latest base file for a partition and file-Id.
|
Stream<HoodieBaseFile> |
getLatestBaseFiles()
Stream all the latest data files, in the file system view.
|
Stream<HoodieBaseFile> |
getLatestBaseFiles(String partitionStr)
Stream all the latest data files in the given partition.
|
Stream<HoodieBaseFile> |
getLatestBaseFilesBeforeOrOn(String partitionStr,
String maxCommitTime)
Stream all the latest version data files in the given partition with precondition that commitTime(file) before
maxCommitTime.
|
Stream<HoodieBaseFile> |
getLatestBaseFilesInRange(List<String> commitsToReturn)
Stream all the latest data files pass.
|
Option<FileSlice> |
getLatestFileSlice(String partitionStr,
String fileId)
Get Latest File Slice for a given fileId in a given partition.
|
Stream<FileSlice> |
getLatestFileSliceInRange(List<String> commitsToReturn)
Stream all the latest file slices, in the given range.
|
Stream<FileSlice> |
getLatestFileSlices(String partitionStr)
Stream all the latest file slices in the given partition.
|
Stream<FileSlice> |
getLatestFileSlicesBeforeOrOn(String partitionStr,
String maxCommitTime,
boolean includeFileSlicesInPendingCompaction)
Stream all latest file slices in given partition with precondition that commitTime(file) before maxCommitTime.
|
Stream<FileSlice> |
getLatestMergedFileSlicesBeforeOrOn(String partitionStr,
String maxInstantTime)
Stream all "merged" file-slices before on an instant time If a file-group has a pending compaction request, the
file-slice before and after compaction request instant is merged and returned.
|
Stream<FileSlice> |
getLatestUnCompactedFileSlices(String partitionStr)
Stream all the latest uncompacted file slices in the given partition.
|
List<org.apache.hadoop.fs.Path> |
getPartitionPaths() |
protected abstract Option<HoodieInstant> |
getPendingClusteringInstant(HoodieFileGroupId fileGroupId)
Get pending clustering instant time for specified file group.
|
Stream<Pair<String,CompactionOperation>> |
getPendingCompactionOperations()
Return Pending Compaction Operations.
|
protected abstract Option<Pair<String,CompactionOperation>> |
getPendingCompactionOperationWithInstant(HoodieFileGroupId fileGroupId)
Return pending compaction operation for a file-group.
|
Stream<HoodieFileGroup> |
getReplacedFileGroupsBefore(String maxCommitTime,
String partitionPath)
Stream all the replaced file groups before maxCommitTime for given partition.
|
Stream<HoodieFileGroup> |
getReplacedFileGroupsBeforeOrOn(String maxCommitTime,
String partitionPath)
Stream all the replaced file groups before or on maxCommitTime for given partition.
|
protected abstract Option<HoodieInstant> |
getReplaceInstant(HoodieFileGroupId fileGroupId)
Track instant time for file groups replaced.
|
HoodieTimeline |
getTimeline()
Timeline corresponding to the view.
|
HoodieTimeline |
getVisibleCommitsAndCompactionTimeline()
Return Only Commits and Compaction timeline for building file-groups.
|
protected void |
init(HoodieTableMetaClient metaClient,
HoodieTimeline visibleActiveTimeline)
Initialize the view.
|
protected boolean |
isBaseFileDueToPendingClustering(HoodieBaseFile baseFile)
With async clustering, it is possible to see partial/complete base-files due to inflight-clustering, Ignore those
base-files.
|
protected boolean |
isBaseFileDueToPendingCompaction(HoodieBaseFile baseFile)
With async compaction, it is possible to see partial/complete base-files due to inflight-compactions, Ignore those
base-files.
|
protected abstract boolean |
isBootstrapBaseFilePresentForFileId(HoodieFileGroupId fgId)
Check if there is an bootstrap base file present for this file.
|
protected boolean |
isFileSliceAfterPendingCompaction(FileSlice fileSlice)
Returns true if the file-group is under pending-compaction and the file-slice' baseInstant matches compaction
Instant.
|
protected abstract boolean |
isPendingClusteringScheduledForFileId(HoodieFileGroupId fgId)
Check if there is an outstanding clustering operation (requested/inflight) scheduled for this file.
|
protected abstract boolean |
isPendingCompactionScheduledForFileId(HoodieFileGroupId fgId)
Check if there is an outstanding compaction scheduled for this file.
|
protected org.apache.hadoop.fs.FileStatus[] |
listPartition(org.apache.hadoop.fs.Path partitionPath)
Return all the files from the partition.
|
protected void |
refreshTimeline(HoodieTimeline visibleActiveTimeline)
Refresh commits timeline.
|
protected abstract void |
removeReplacedFileIdsAtInstants(Set<String> instants)
Remove file groups that are replaced in any of the specified instants.
|
void |
reset()
Clears the partition Map and reset view states.
|
protected abstract void |
resetReplacedFileGroups(Map<HoodieFileGroupId,HoodieInstant> replacedFileGroups)
Track instant time for file groups replaced.
|
protected abstract void |
resetViewState()
Allows all view metadata in file system view storage to be reset by subclasses.
|
protected void |
runSync(HoodieTimeline oldTimeline,
HoodieTimeline newTimeline)
Performs complete reset of file-system view.
|
void |
sync()
Read the latest timeline and refresh the file-system view to match the current state of the file-system.
|
protected HoodieTableMetaClient metaClient
protected void init(HoodieTableMetaClient metaClient, HoodieTimeline visibleActiveTimeline)
protected void refreshTimeline(HoodieTimeline visibleActiveTimeline)
visibleActiveTimeline - Visible Active Timelinepublic List<HoodieFileGroup> addFilesToView(org.apache.hadoop.fs.FileStatus[] statuses)
protected List<HoodieFileGroup> buildFileGroups(org.apache.hadoop.fs.FileStatus[] statuses, HoodieTimeline timeline, boolean addPendingCompactionFileSlice)
protected List<HoodieFileGroup> buildFileGroups(Stream<HoodieBaseFile> baseFileStream, Stream<HoodieLogFile> logFileStream, HoodieTimeline timeline, boolean addPendingCompactionFileSlice)
public void close()
SyncableFileSystemViewclose in interface SyncableFileSystemViewpublic void reset()
reset in interface SyncableFileSystemViewprotected abstract void resetViewState()
protected org.apache.hadoop.fs.FileStatus[] listPartition(org.apache.hadoop.fs.Path partitionPath)
throws IOException
partitionPath - The absolute path of the partitionIOExceptionprotected boolean isBaseFileDueToPendingCompaction(HoodieBaseFile baseFile)
baseFile - base Fileprotected boolean isBaseFileDueToPendingClustering(HoodieBaseFile baseFile)
baseFile - base Fileprotected boolean isFileSliceAfterPendingCompaction(FileSlice fileSlice)
fileSlice - File Sliceprotected Stream<FileSlice> filterBaseFileAfterPendingCompaction(FileSlice fileSlice, boolean includeEmptyFileSlice)
fileSlice - File SliceincludeEmptyFileSlice - include empty file-sliceprotected HoodieFileGroup addBootstrapBaseFileIfPresent(HoodieFileGroup fileGroup)
protected FileSlice addBootstrapBaseFileIfPresent(FileSlice fileSlice)
protected HoodieBaseFile addBootstrapBaseFileIfPresent(HoodieFileGroupId fileGroupId, HoodieBaseFile baseFile)
public final Stream<Pair<String,CompactionOperation>> getPendingCompactionOperations()
TableFileSystemViewgetPendingCompactionOperations in interface TableFileSystemViewpublic final List<org.apache.hadoop.fs.Path> getPartitionPaths()
public final Stream<HoodieBaseFile> getLatestBaseFiles(String partitionStr)
TableFileSystemView.BaseFileOnlyViewWithLatestSlicegetLatestBaseFiles in interface TableFileSystemView.BaseFileOnlyViewWithLatestSlicepublic final Stream<HoodieBaseFile> getLatestBaseFiles()
TableFileSystemView.BaseFileOnlyViewWithLatestSlicegetLatestBaseFiles in interface TableFileSystemView.BaseFileOnlyViewWithLatestSlicepublic final Stream<HoodieBaseFile> getLatestBaseFilesBeforeOrOn(String partitionStr, String maxCommitTime)
TableFileSystemView.BaseFileOnlyViewWithLatestSlicegetLatestBaseFilesBeforeOrOn in interface TableFileSystemView.BaseFileOnlyViewWithLatestSlicepublic final Option<HoodieBaseFile> getBaseFileOn(String partitionStr, String instantTime, String fileId)
TableFileSystemView.BaseFileOnlyViewgetBaseFileOn in interface TableFileSystemView.BaseFileOnlyViewpublic final Option<HoodieBaseFile> getLatestBaseFile(String partitionStr, String fileId)
getLatestBaseFile in interface TableFileSystemView.BaseFileOnlyViewWithLatestSlicepublic final Stream<HoodieBaseFile> getLatestBaseFilesInRange(List<String> commitsToReturn)
TableFileSystemView.BaseFileOnlyViewWithLatestSlicegetLatestBaseFilesInRange in interface TableFileSystemView.BaseFileOnlyViewWithLatestSlicepublic final Stream<HoodieBaseFile> getAllBaseFiles(String partitionStr)
TableFileSystemView.BaseFileOnlyViewgetAllBaseFiles in interface TableFileSystemView.BaseFileOnlyViewpublic final Stream<FileSlice> getLatestFileSlices(String partitionStr)
TableFileSystemView.SliceViewWithLatestSlicegetLatestFileSlices in interface TableFileSystemView.SliceViewWithLatestSlicepublic final Option<FileSlice> getLatestFileSlice(String partitionStr, String fileId)
getLatestFileSlice in interface TableFileSystemView.SliceViewWithLatestSlicepublic final Stream<FileSlice> getLatestUnCompactedFileSlices(String partitionStr)
TableFileSystemView.SliceViewWithLatestSlicegetLatestUnCompactedFileSlices in interface TableFileSystemView.SliceViewWithLatestSlicepublic final Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionStr, String maxCommitTime, boolean includeFileSlicesInPendingCompaction)
TableFileSystemView.SliceViewWithLatestSlicegetLatestFileSlicesBeforeOrOn in interface TableFileSystemView.SliceViewWithLatestSlicepartitionStr - Partition pathmaxCommitTime - Max Instant TimeincludeFileSlicesInPendingCompaction - include file-slices that are in pending compactionpublic final Stream<FileSlice> getLatestMergedFileSlicesBeforeOrOn(String partitionStr, String maxInstantTime)
TableFileSystemView.SliceViewWithLatestSlicegetLatestMergedFileSlicesBeforeOrOn in interface TableFileSystemView.SliceViewWithLatestSlicepartitionStr - Partition PathmaxInstantTime - Max Instant Timepublic final Stream<FileSlice> getLatestFileSliceInRange(List<String> commitsToReturn)
TableFileSystemView.SliceViewWithLatestSlicegetLatestFileSliceInRange in interface TableFileSystemView.SliceViewWithLatestSlicepublic final Stream<FileSlice> getAllFileSlices(String partitionStr)
TableFileSystemView.SliceViewgetAllFileSlices in interface TableFileSystemView.SliceViewpublic final Stream<HoodieFileGroup> getAllFileGroups(String partitionStr)
TableFileSystemViewgetAllFileGroups in interface TableFileSystemViewpublic Stream<HoodieFileGroup> getReplacedFileGroupsBeforeOrOn(String maxCommitTime, String partitionPath)
TableFileSystemViewgetReplacedFileGroupsBeforeOrOn in interface TableFileSystemViewpublic Stream<HoodieFileGroup> getReplacedFileGroupsBefore(String maxCommitTime, String partitionPath)
TableFileSystemViewgetReplacedFileGroupsBefore in interface TableFileSystemViewpublic Stream<HoodieFileGroup> getAllReplacedFileGroups(String partitionPath)
TableFileSystemViewgetAllReplacedFileGroups in interface TableFileSystemViewpublic final Stream<Pair<HoodieFileGroupId,HoodieInstant>> getFileGroupsInPendingClustering()
TableFileSystemViewgetFileGroupsInPendingClustering in interface TableFileSystemViewprotected abstract boolean isPendingCompactionScheduledForFileId(HoodieFileGroupId fgId)
fgId - File-Group Idprotected abstract boolean isPendingClusteringScheduledForFileId(HoodieFileGroupId fgId)
fgId - File-Group Idprotected abstract Option<HoodieInstant> getPendingClusteringInstant(HoodieFileGroupId fileGroupId)
protected abstract Stream<Pair<HoodieFileGroupId,HoodieInstant>> fetchFileGroupsInPendingClustering()
protected abstract Option<Pair<String,CompactionOperation>> getPendingCompactionOperationWithInstant(HoodieFileGroupId fileGroupId)
fileGroupId - File-Group Idprotected abstract boolean isBootstrapBaseFilePresentForFileId(HoodieFileGroupId fgId)
fgId - File-Group Idprotected abstract Option<BootstrapBaseFileMapping> getBootstrapBaseFile(HoodieFileGroupId fileGroupId)
fileGroupId - File-Group Idprotected abstract void resetReplacedFileGroups(Map<HoodieFileGroupId,HoodieInstant> replacedFileGroups)
protected abstract void addReplacedFileGroups(Map<HoodieFileGroupId,HoodieInstant> replacedFileGroups)
protected abstract void removeReplacedFileIdsAtInstants(Set<String> instants)
protected abstract Option<HoodieInstant> getReplaceInstant(HoodieFileGroupId fileGroupId)
public Stream<HoodieBaseFile> fetchLatestBaseFiles(String partitionPath)
protected Option<HoodieBaseFile> getLatestBaseFile(HoodieFileGroup fileGroup)
protected Option<HoodieBaseFile> fetchLatestBaseFile(String partitionPath, String fileId)
partitionPath - Partition pathfileId - File Idprotected Option<FileSlice> fetchLatestFileSlice(String partitionPath, String fileId)
partitionPath - Partition pathfileId - File Idpublic Option<HoodieInstant> getLastInstant()
TableFileSystemViewgetLastInstant in interface TableFileSystemViewpublic HoodieTimeline getTimeline()
TableFileSystemViewgetTimeline in interface TableFileSystemViewpublic void sync()
SyncableFileSystemViewsync in interface SyncableFileSystemViewprotected void runSync(HoodieTimeline oldTimeline, HoodieTimeline newTimeline)
oldTimeline - Old Hoodie TimelinenewTimeline - New Hoodie Timelinepublic HoodieTimeline getVisibleCommitsAndCompactionTimeline()
HoodieTimelineCopyright © 2022 The Apache Software Foundation. All rights reserved.