public class FSUtils extends Object
| Modifier and Type | Class and Description |
|---|---|
static interface |
FSUtils.SerializableFunction<T,R>
Serializable function interface.
|
| Modifier and Type | Field and Description |
|---|---|
static Pattern |
LOG_FILE_PATTERN |
static String |
PATH_SEPARATOR |
static Pattern |
PREFIX_BY_FILE_ID_PATTERN |
| Constructor and Description |
|---|
FSUtils() |
| Modifier and Type | Method and Description |
|---|---|
static int |
computeNextLogVersion(HoodieStorage storage,
StoragePath partitionPath,
String fileId,
String logFileExtension,
String baseCommitTime)
computes the next log version for the specified fileId in the partition path.
|
static StoragePath |
constructAbsolutePath(StoragePath basePath,
String relativePartitionPath) |
static StoragePath |
constructAbsolutePath(String basePath,
String relativePartitionPath) |
static String |
createNewFileId(String idPfx,
int id) |
static String |
createNewFileIdPfx()
Returns a new unique prefix for creating a file group.
|
static void |
createPathIfNotExists(HoodieStorage storage,
StoragePath partitionPath) |
static boolean |
deleteDir(HoodieEngineContext hoodieEngineContext,
HoodieStorage storage,
StoragePath dirPath,
int parallelism)
Deletes a directory by deleting sub-paths in parallel on the file system.
|
static boolean |
deleteSubPath(String subPathStr,
StorageConfiguration<?> conf,
boolean recursive)
Deletes a sub-path.
|
static List<StoragePathInfo> |
getAllDataFilesInPartition(HoodieStorage storage,
StoragePath partitionPath) |
static List<StoragePathInfo> |
getAllDataPathInfo(HoodieStorage storage,
StoragePath path) |
static Stream<HoodieLogFile> |
getAllLogFiles(HoodieStorage storage,
StoragePath partitionPath,
String fileId,
String logFileExtension,
String baseCommitTime)
Get all the log files for the passed in file-id in the partition path.
|
static List<String> |
getAllPartitionFoldersThreeLevelsDown(HoodieStorage storage,
String basePath)
Gets all partition paths assuming date partitioning (year, month, day) three levels down.
|
static List<String> |
getAllPartitionPaths(HoodieEngineContext engineContext,
HoodieStorage storage,
HoodieMetadataConfig metadataConfig,
String basePathStr) |
static List<String> |
getAllPartitionPaths(HoodieEngineContext engineContext,
HoodieStorage storage,
String basePathStr,
boolean useFileListingFromMetadata,
boolean assumeDatePartitioning) |
static String |
getBaseCommitTimeFromLogPath(StoragePath path)
Get the first part of the file name in the log file.
|
static String |
getCommitFromCommitFile(String commitFileName) |
static String |
getCommitTime(String fullFileName) |
static String |
getFileExtension(String fullName) |
static String |
getFileExtensionFromLog(StoragePath logPath)
Get the file extension from the log file.
|
static String |
getFileId(String fullFileName) |
static String |
getFileIdFromFilePath(StoragePath filePath) |
static String |
getFileIdFromLogPath(StoragePath path) |
static String |
getFileIdPfxFromFileId(String fileId)
Returns prefix for a file group from fileId.
|
static String |
getFileName(String filePathWithPartition,
String partition)
Extracts the file name from the relative path based on the table base path.
|
static Map<String,List<StoragePathInfo>> |
getFilesInPartitions(HoodieEngineContext engineContext,
HoodieStorage storage,
HoodieMetadataConfig metadataConfig,
String basePathStr,
String[] partitionPaths) |
static long |
getFileSize(HoodieStorage storage,
StoragePath path) |
static int |
getFileVersionFromLog(StoragePath logPath)
Get the last part of the file name in the log file and convert to int.
|
static int |
getFileVersionFromLog(String logFileName) |
static List<StoragePathInfo> |
getGlobStatusExcludingMetaFolder(HoodieStorage storage,
StoragePath globPath)
Helper to filter out paths under metadata folder when running fs.globStatus.
|
static Option<HoodieLogFile> |
getLatestLogFile(HoodieStorage storage,
StoragePath partitionPath,
String fileId,
String logFileExtension,
String baseCommitTime)
Get the latest log file for the passed in file-id in the partition path
|
static Option<Pair<Integer,String>> |
getLatestLogVersion(HoodieStorage storage,
StoragePath partitionPath,
String fileId,
String logFileExtension,
String baseCommitTime)
Get the latest log version for the fileId in the partition path.
|
static List<Option<StoragePathInfo>> |
getPathInfoUnderPartition(HoodieStorage storage,
StoragePath partitionPathIncludeBasePath,
Set<String> filesNamesUnderThisPartition,
boolean ignoreMissingFiles)
Get all the files in the given partition path.
|
static StoragePath |
getPathWithoutSchemeAndAuthority(StoragePath path) |
static String |
getRelativePartitionPath(StoragePath basePath,
StoragePath fullPartitionPath) |
static Long |
getSizeInMB(long sizeInBytes) |
static Integer |
getStageIdFromLogPath(StoragePath path)
Get StageId used in log-path.
|
static Integer |
getTaskAttemptIdFromLogPath(StoragePath path)
Get Task Attempt Id used in log-path.
|
static Integer |
getTaskPartitionIdFromLogPath(StoragePath path)
Get TaskPartitionId used in log-path.
|
static String |
getWriteTokenFromLogPath(StoragePath path)
Get Write-Token used in log-path.
|
static boolean |
isBaseFile(StoragePath path) |
static boolean |
isDataFile(StoragePath path) |
static boolean |
isLogFile(StoragePath logPath) |
static boolean |
isLogFile(String fileName) |
static boolean |
isTableExists(String path,
HoodieStorage storage)
Check if table already exists in the given path.
|
static String |
makeBaseFileName(String instantTime,
String writeToken,
String fileId,
String fileExtension) |
static String |
makeBootstrapIndexFileName(String instantTime,
String fileId,
String ext) |
static String |
makeLogFileName(String fileId,
String logFileExtension,
String baseCommitTime,
int version,
String writeToken) |
static StoragePath |
makeQualified(HoodieStorage storage,
StoragePath path)
Makes path qualified with
HoodieStorage's URI. |
static String |
makeWriteToken(int taskPartitionId,
int stageId,
long taskAttemptId)
A write token uniquely identifies an attempt at one of the IOHandle operations (Merge/Create/Append).
|
static String |
maskWithoutFileId(String instantTime,
int taskPartitionId) |
static <T> Map<String,T> |
parallelizeFilesProcess(HoodieEngineContext hoodieEngineContext,
HoodieStorage storage,
int parallelism,
FSUtils.SerializableFunction<Pair<String,StorageConfiguration<?>>,T> pairFunction,
List<String> subPaths) |
static <T> Map<String,T> |
parallelizeSubPathProcess(HoodieEngineContext hoodieEngineContext,
HoodieStorage storage,
StoragePath dirPath,
int parallelism,
Predicate<StoragePathInfo> subPathPredicate,
FSUtils.SerializableFunction<Pair<String,StorageConfiguration<?>>,T> pairFunction)
Processes sub-path in parallel.
|
static void |
processFiles(HoodieStorage storage,
String basePathStr,
Function<StoragePathInfo,Boolean> consumer,
boolean excludeMetaFolder)
Recursively processes all files in the base-path.
|
public static final String PATH_SEPARATOR
public static final Pattern LOG_FILE_PATTERN
public static final Pattern PREFIX_BY_FILE_ID_PATTERN
public static boolean isTableExists(String path, HoodieStorage storage) throws IOException
path - base path of the table.storage - instance of HoodieStorage.true if table exists. false otherwise.IOExceptionpublic static StoragePath makeQualified(HoodieStorage storage, StoragePath path)
HoodieStorage's URI.storage - instance of HoodieStorage.path - to be qualified.public static String makeWriteToken(int taskPartitionId, int stageId, long taskAttemptId)
public static String makeBaseFileName(String instantTime, String writeToken, String fileId, String fileExtension)
public static String makeBootstrapIndexFileName(String instantTime, String fileId, String ext)
public static long getFileSize(HoodieStorage storage, StoragePath path) throws IOException
IOExceptionpublic static List<String> getAllPartitionFoldersThreeLevelsDown(HoodieStorage storage, String basePath) throws IOException
IOExceptionpublic static String getRelativePartitionPath(StoragePath basePath, StoragePath fullPartitionPath)
public static StoragePath getPathWithoutSchemeAndAuthority(StoragePath path)
public static void processFiles(HoodieStorage storage, String basePathStr, Function<StoragePathInfo,Boolean> consumer, boolean excludeMetaFolder) throws IOException
storage - File SystembasePathStr - Base-Pathconsumer - Callback for processingexcludeMetaFolder - Exclude .hoodie folderIOException - -public static List<String> getAllPartitionPaths(HoodieEngineContext engineContext, HoodieStorage storage, String basePathStr, boolean useFileListingFromMetadata, boolean assumeDatePartitioning)
public static List<String> getAllPartitionPaths(HoodieEngineContext engineContext, HoodieStorage storage, HoodieMetadataConfig metadataConfig, String basePathStr)
public static Map<String,List<StoragePathInfo>> getFilesInPartitions(HoodieEngineContext engineContext, HoodieStorage storage, HoodieMetadataConfig metadataConfig, String basePathStr, String[] partitionPaths)
public static List<Option<StoragePathInfo>> getPathInfoUnderPartition(HoodieStorage storage, StoragePath partitionPathIncludeBasePath, Set<String> filesNamesUnderThisPartition, boolean ignoreMissingFiles)
storage - HoodieStorage instance.partitionPathIncludeBasePath - The full partition path including the base pathfilesNamesUnderThisPartition - The names of the files under this partition for which file status is neededignoreMissingFiles - If true, missing files will be ignored and empty Option will be added to the result listpublic static String createNewFileIdPfx()
public static String getFileIdPfxFromFileId(String fileId)
public static String getFileExtensionFromLog(StoragePath logPath)
public static String getFileIdFromLogPath(StoragePath path)
public static String getFileIdFromFilePath(StoragePath filePath)
public static String getBaseCommitTimeFromLogPath(StoragePath path)
public static Integer getTaskPartitionIdFromLogPath(StoragePath path)
public static String getWriteTokenFromLogPath(StoragePath path)
public static Integer getStageIdFromLogPath(StoragePath path)
public static Integer getTaskAttemptIdFromLogPath(StoragePath path)
public static int getFileVersionFromLog(StoragePath logPath)
public static int getFileVersionFromLog(String logFileName)
public static String makeLogFileName(String fileId, String logFileExtension, String baseCommitTime, int version, String writeToken)
public static boolean isBaseFile(StoragePath path)
public static boolean isLogFile(StoragePath logPath)
public static boolean isLogFile(String fileName)
public static boolean isDataFile(StoragePath path)
public static List<StoragePathInfo> getAllDataFilesInPartition(HoodieStorage storage, StoragePath partitionPath) throws IOException
IOExceptionpublic static Option<HoodieLogFile> getLatestLogFile(HoodieStorage storage, StoragePath partitionPath, String fileId, String logFileExtension, String baseCommitTime) throws IOException
IOExceptionpublic static Stream<HoodieLogFile> getAllLogFiles(HoodieStorage storage, StoragePath partitionPath, String fileId, String logFileExtension, String baseCommitTime) throws IOException
IOExceptionpublic static Option<Pair<Integer,String>> getLatestLogVersion(HoodieStorage storage, StoragePath partitionPath, String fileId, String logFileExtension, String baseCommitTime) throws IOException
IOExceptionpublic static int computeNextLogVersion(HoodieStorage storage, StoragePath partitionPath, String fileId, String logFileExtension, String baseCommitTime) throws IOException
IOExceptionpublic static void createPathIfNotExists(HoodieStorage storage, StoragePath partitionPath) throws IOException
IOExceptionpublic static Long getSizeInMB(long sizeInBytes)
public static StoragePath constructAbsolutePath(String basePath, String relativePartitionPath)
public static StoragePath constructAbsolutePath(StoragePath basePath, String relativePartitionPath)
public static String getFileName(String filePathWithPartition, String partition)
filePathWithPartition - the relative file path based on the table base path.partition - the relative partition path. For partitioned table, `partition` contains the relative partition path;
for non-partitioned table, `partition` is emptypublic static List<StoragePathInfo> getGlobStatusExcludingMetaFolder(HoodieStorage storage, StoragePath globPath) throws IOException
storage - HoodieStorage instance.globPath - Glob PathIOException - when having trouble listing the pathpublic static boolean deleteDir(HoodieEngineContext hoodieEngineContext, HoodieStorage storage, StoragePath dirPath, int parallelism)
hoodieEngineContext - HoodieEngineContext instancestorage - HoodieStorage instance.dirPath - directory path.parallelism - parallelism to use for sub-pathstrue if the directory is delete; false otherwise.public static <T> Map<String,T> parallelizeSubPathProcess(HoodieEngineContext hoodieEngineContext, HoodieStorage storage, StoragePath dirPath, int parallelism, Predicate<StoragePathInfo> subPathPredicate, FSUtils.SerializableFunction<Pair<String,StorageConfiguration<?>>,T> pairFunction)
T - type of result to return for each sub-pathhoodieEngineContext - HoodieEngineContext instancestorage - HoodieStorage instancedirPath - directory pathparallelism - parallelism to use for sub-pathssubPathPredicate - predicate to use to filter sub-paths for processingpairFunction - actual processing logic for each sub-pathpublic static <T> Map<String,T> parallelizeFilesProcess(HoodieEngineContext hoodieEngineContext, HoodieStorage storage, int parallelism, FSUtils.SerializableFunction<Pair<String,StorageConfiguration<?>>,T> pairFunction, List<String> subPaths)
public static boolean deleteSubPath(String subPathStr, StorageConfiguration<?> conf, boolean recursive)
subPathStr - sub-path Stringconf - storage configrecursive - is recursive or nottrue if the sub-path is deleted; false otherwise.public static List<StoragePathInfo> getAllDataPathInfo(HoodieStorage storage, StoragePath path) throws IOException
IOExceptionCopyright © 2024 The Apache Software Foundation. All rights reserved.