public class HoodieTableMetadataUtil extends Object
| Modifier and Type | Field and Description |
|---|---|
static String |
PARTITION_NAME_BLOOM_FILTERS |
static String |
PARTITION_NAME_COLUMN_STATS |
static String |
PARTITION_NAME_FILES |
static String |
PARTITION_NAME_RECORD_INDEX |
| Modifier and Type | Method and Description |
|---|---|
static Map<String,HoodieColumnRangeMetadata<Comparable>> |
collectColumnRangeMetadata(List<org.apache.avro.generic.IndexedRecord> records,
List<org.apache.avro.Schema.Field> targetFields,
String filePath)
Collects
HoodieColumnRangeMetadata for the provided collection of records, pretending
as if provided records have been persisted w/in given filePath |
static HoodieColumnRangeMetadata<Comparable> |
convertColumnStatsRecordToColumnRangeMetadata(HoodieMetadataColumnStats columnStats)
Converts instance of
HoodieMetadataColumnStats to HoodieColumnRangeMetadata |
static HoodieData<HoodieRecord> |
convertFilesToBloomFilterRecords(HoodieEngineContext engineContext,
Map<String,List<String>> partitionToDeletedFiles,
Map<String,Map<String,Long>> partitionToAppendedFiles,
String instantTime,
HoodieTableMetaClient dataMetaClient,
int bloomIndexParallelism,
String bloomFilterType)
Convert added and deleted files metadata to bloom filter index records.
|
static HoodieData<HoodieRecord> |
convertFilesToColumnStatsRecords(HoodieEngineContext engineContext,
Map<String,List<String>> partitionToDeletedFiles,
Map<String,Map<String,Long>> partitionToAppendedFiles,
HoodieTableMetaClient dataMetaClient,
boolean isColumnStatsIndexEnabled,
int columnStatsIndexParallelism,
List<String> targetColumnsForColumnStatsIndex)
Convert added and deleted action metadata to column stats index records.
|
protected static List<HoodieRecord> |
convertFilesToFilesPartitionRecords(Map<String,List<String>> partitionToDeletedFiles,
Map<String,Map<String,Long>> partitionToAppendedFiles,
String instantTime,
String operation)
Convert rollback action metadata to files partition records.
|
static HoodieData<HoodieRecord> |
convertMetadataToBloomFilterRecords(HoodieCleanMetadata cleanMetadata,
HoodieEngineContext engineContext,
String instantTime,
int bloomIndexParallelism)
Convert clean metadata to bloom filter index records.
|
static HoodieData<HoodieRecord> |
convertMetadataToBloomFilterRecords(HoodieEngineContext context,
HoodieConfig hoodieConfig,
HoodieCommitMetadata commitMetadata,
String instantTime,
HoodieTableMetaClient dataMetaClient,
String bloomFilterType,
int bloomIndexParallelism)
Convert commit action metadata to bloom filter records.
|
static HoodieData<HoodieRecord> |
convertMetadataToColumnStatsRecords(HoodieCleanMetadata cleanMetadata,
HoodieEngineContext engineContext,
HoodieTableMetaClient dataMetaClient,
boolean isColumnStatsIndexEnabled,
int columnStatsIndexParallelism,
List<String> targetColumnsForColumnStatsIndex)
Convert clean metadata to column stats index records.
|
static HoodieData<HoodieRecord> |
convertMetadataToColumnStatsRecords(HoodieCommitMetadata commitMetadata,
HoodieEngineContext engineContext,
HoodieTableMetaClient dataMetaClient,
boolean isColumnStatsIndexEnabled,
int columnStatsIndexParallelism,
List<String> targetColumnsForColumnStatsIndex) |
static List<HoodieRecord> |
convertMetadataToFilesPartitionRecords(HoodieCleanMetadata cleanMetadata,
String instantTime)
Finds all files that were deleted as part of a clean and creates metadata table records for them.
|
static List<HoodieRecord> |
convertMetadataToFilesPartitionRecords(HoodieCommitMetadata commitMetadata,
String instantTime)
Finds all new files/partitions created as part of commit and creates metadata table records for them.
|
static Map<MetadataPartitionType,HoodieData<HoodieRecord>> |
convertMetadataToRecords(HoodieEngineContext engineContext,
HoodieCleanMetadata cleanMetadata,
String instantTime,
HoodieTableMetaClient dataMetaClient,
List<MetadataPartitionType> enabledPartitionTypes,
int bloomIndexParallelism,
boolean isColumnStatsIndexEnabled,
int columnStatsIndexParallelism,
List<String> targetColumnsForColumnStatsIndex)
Convert the clean action to metadata records.
|
static Map<MetadataPartitionType,HoodieData<HoodieRecord>> |
convertMetadataToRecords(HoodieEngineContext context,
HoodieConfig hoodieConfig,
HoodieCommitMetadata commitMetadata,
String instantTime,
HoodieTableMetaClient dataMetaClient,
List<MetadataPartitionType> enabledPartitionTypes,
String bloomFilterType,
int bloomIndexParallelism,
boolean isColumnStatsIndexEnabled,
int columnStatsIndexParallelism,
List<String> targetColumnsForColumnStatsIndex)
Convert commit action to metadata records for the enabled partition types.
|
static Map<MetadataPartitionType,HoodieData<HoodieRecord>> |
convertMetadataToRecords(HoodieEngineContext engineContext,
HoodieTableMetaClient dataTableMetaClient,
HoodieRollbackMetadata rollbackMetadata,
String instantTime)
Convert rollback action metadata to metadata table records.
|
static Map<MetadataPartitionType,HoodieData<HoodieRecord>> |
convertMissingPartitionRecords(HoodieEngineContext engineContext,
List<String> deletedPartitions,
Map<String,Map<String,Long>> filesAdded,
Map<String,List<String>> filesDeleted,
String instantTime) |
static String |
createAsyncIndexerTimestamp(String timestamp) |
static String |
createCleanTimestamp(String timestamp)
Create the timestamp for a clean operation on the metadata table.
|
static String |
createCompactionTimestamp(String timestamp)
Create the timestamp for a compaction operation on the metadata table.
|
static String |
createIndexInitTimestamp(String timestamp,
int offset)
Create the timestamp for an index initialization operation on the metadata table.
|
static String |
createLogCompactionTimestamp(String timestamp)
Create the timestamp for a compaction operation on the metadata table.
|
static String |
createRestoreTimestamp(String timestamp) |
static String |
createRollbackTimestamp(String timestamp) |
static void |
deleteMetadataPartition(String basePath,
HoodieEngineContext context,
MetadataPartitionType partitionType)
Deletes the metadata partition from the file system.
|
static String |
deleteMetadataTable(HoodieTableMetaClient dataMetaClient,
HoodieEngineContext context,
boolean backup)
Delete the metadata table for the dataset and backup if required.
|
static void |
deleteMetadataTable(String basePath,
HoodieEngineContext context)
Delete the metadata table for the dataset.
|
static String |
deleteMetadataTablePartition(HoodieTableMetaClient dataMetaClient,
HoodieEngineContext context,
MetadataPartitionType partitionType,
boolean backup)
Delete a partition within the metadata table.
|
static int |
estimateFileGroupCount(MetadataPartitionType partitionType,
long recordCount,
int averageRecordSize,
int minFileGroupCount,
int maxFileGroupCount,
float growthFactor,
int maxFileGroupSizeBytes)
Estimates the file group count to use for a MDT partition.
|
static String |
getBloomFilterIndexPartitionIdentifier(String partitionName) |
static String |
getColumnStatsIndexPartitionIdentifier(String partitionName) |
static int |
getFileGroupIndexFromFileId(String fileId)
Extract the index from the fileID of a file group in the MDT partition.
|
static String |
getFileGroupPrefix(String fileId)
Extract the fileID prefix from the fileID of a file group in the MDT partition.
|
static String |
getFileIDForFileGroup(MetadataPartitionType partitionType,
int index)
Return the complete fileID for a file group within a MDT partition.
|
static HoodieTableFileSystemView |
getFileSystemView(HoodieTableMetaClient metaClient)
Get metadata table file system view.
|
static Set<String> |
getInflightAndCompletedMetadataPartitions(HoodieTableConfig tableConfig) |
static Set<String> |
getInflightMetadataPartitions(HoodieTableConfig tableConfig) |
static HoodieRecordGlobalLocation |
getLocationFromRecordIndexInfo(HoodieRecordIndexInfo recordIndexInfo)
Gets the location from record index content.
|
static HoodieRecordGlobalLocation |
getLocationFromRecordIndexInfo(String partition,
int fileIdEncoding,
long fileIdHighBits,
long fileIdLowBits,
int fileIndex,
String originalFileId,
Long instantTime)
Gets the location from record index content.
|
static boolean |
getMetadataPartitionsNeedingWriteStatusTracking(HoodieMetadataConfig config,
HoodieTableMetaClient metaClient)
Returns true if any enabled metadata partition in the given hoodie table requires WriteStatus to track the written records.
|
static String |
getPartitionIdentifierForFilesPartition(String relativePartitionPath) |
static List<FileSlice> |
getPartitionLatestFileSlices(HoodieTableMetaClient metaClient,
Option<HoodieTableFileSystemView> fsView,
String partition)
Get the latest file slices for a Metadata Table partition.
|
static List<FileSlice> |
getPartitionLatestFileSlicesIncludingInflight(HoodieTableMetaClient metaClient,
Option<HoodieTableFileSystemView> fileSystemView,
String partition)
Get the latest file slices for a given partition including the inflight ones.
|
static List<FileSlice> |
getPartitionLatestMergedFileSlices(HoodieTableMetaClient metaClient,
HoodieTableFileSystemView fsView,
String partition)
Get the latest file slices for a Metadata Table partition.
|
static Set<String> |
getValidInstantTimestamps(HoodieTableMetaClient dataMetaClient,
HoodieTableMetaClient metadataMetaClient) |
static Set<String> |
getWritePartitionPaths(List<HoodieCommitMetadata> metadataList)
Returns all the incremental write partition paths as a set with the given commits metadata.
|
static boolean |
isFilesPartitionAvailable(HoodieTableMetaClient metaClient)
Returns whether the files partition of metadata table is ready for read.
|
static boolean |
isIndexingCommit(String instantTime)
Checks if a delta commit in metadata table is written by async indexer.
|
static boolean |
isValidInstant(HoodieInstant instant)
Checks if the Instant is a delta commit and has a valid suffix for operations on MDT.
|
static int |
mapRecordKeyToFileGroupIndex(String recordKey,
int numFileGroups)
Map a record key to a file group in partition of interest.
|
static boolean |
metadataPartitionExists(String basePath,
HoodieEngineContext context,
MetadataPartitionType partitionType)
Check if the given metadata partition exists.
|
static HoodieData<HoodieRecord> |
readRecordKeysFromBaseFiles(HoodieEngineContext engineContext,
HoodieConfig config,
List<Pair<String,HoodieBaseFile>> partitionBaseFilePairs,
boolean forDelete,
int recordIndexMaxParallelism,
String basePath,
StorageConfiguration<?> configuration,
String activeModule)
Deprecated.
|
static HoodieData<HoodieRecord> |
readRecordKeysFromFileSlices(HoodieEngineContext engineContext,
List<Pair<String,FileSlice>> partitionFileSlicePairs,
boolean forDelete,
int recordIndexMaxParallelism,
String activeModule,
HoodieTableMetaClient metaClient,
EngineType engineType)
Reads the record keys from the given file slices and returns a
HoodieData of HoodieRecord to be updated in the metadata table. |
static BigDecimal |
tryUpcastDecimal(BigDecimal value,
org.apache.avro.LogicalTypes.Decimal decimal)
Does an upcast for
BigDecimal instance to align it with scale/precision expected by
the LogicalTypes.Decimal Avro logical type |
public static final String PARTITION_NAME_FILES
public static final String PARTITION_NAME_COLUMN_STATS
public static final String PARTITION_NAME_BLOOM_FILTERS
public static final String PARTITION_NAME_RECORD_INDEX
public static boolean isFilesPartitionAvailable(HoodieTableMetaClient metaClient)
metaClient - HoodieTableMetaClient instance.public static Map<String,HoodieColumnRangeMetadata<Comparable>> collectColumnRangeMetadata(List<org.apache.avro.generic.IndexedRecord> records, List<org.apache.avro.Schema.Field> targetFields, String filePath)
HoodieColumnRangeMetadata for the provided collection of records, pretending
as if provided records have been persisted w/in given filePathrecords - target records to compute column range metadata fortargetFields - columns (fields) to be collectedfilePath - file path value required for HoodieColumnRangeMetadataHoodieColumnRangeMetadata for each of the provided target fields for
the collection of provided recordspublic static HoodieColumnRangeMetadata<Comparable> convertColumnStatsRecordToColumnRangeMetadata(HoodieMetadataColumnStats columnStats)
HoodieMetadataColumnStats to HoodieColumnRangeMetadatapublic static void deleteMetadataTable(String basePath, HoodieEngineContext context)
basePath - base path of the datasetcontext - instance of HoodieEngineContext.public static void deleteMetadataPartition(String basePath, HoodieEngineContext context, MetadataPartitionType partitionType)
basePath - - base path of the datasetcontext - - instance of HoodieEngineContextpartitionType - - MetadataPartitionType of the partition to deletepublic static boolean metadataPartitionExists(String basePath, HoodieEngineContext context, MetadataPartitionType partitionType)
basePath - base path of the datasetcontext - instance of HoodieEngineContext.public static Map<MetadataPartitionType,HoodieData<HoodieRecord>> convertMetadataToRecords(HoodieEngineContext context, HoodieConfig hoodieConfig, HoodieCommitMetadata commitMetadata, String instantTime, HoodieTableMetaClient dataMetaClient, List<MetadataPartitionType> enabledPartitionTypes, String bloomFilterType, int bloomIndexParallelism, boolean isColumnStatsIndexEnabled, int columnStatsIndexParallelism, List<String> targetColumnsForColumnStatsIndex)
context - - Engine context to usehoodieConfig - - Hudi configscommitMetadata - - Commit action metadatainstantTime - - Action instant timedataMetaClient - - HoodieTableMetaClient for dataenabledPartitionTypes - - List of enabled MDT partitionsbloomFilterType - - Type of generated bloom filter recordsbloomIndexParallelism - - Parallelism for bloom filter record generationisColumnStatsIndexEnabled - - Is column stats index enabledcolumnStatsIndexParallelism - - Parallelism for column stats index records generationtargetColumnsForColumnStatsIndex - - List of columns for column stats indexpublic static List<HoodieRecord> convertMetadataToFilesPartitionRecords(HoodieCommitMetadata commitMetadata, String instantTime)
commitMetadata - - Commit action metadatainstantTime - - Commit action instant timepublic static Set<String> getWritePartitionPaths(List<HoodieCommitMetadata> metadataList)
metadataList - The commits metadatapublic static HoodieData<HoodieRecord> convertMetadataToBloomFilterRecords(HoodieEngineContext context, HoodieConfig hoodieConfig, HoodieCommitMetadata commitMetadata, String instantTime, HoodieTableMetaClient dataMetaClient, String bloomFilterType, int bloomIndexParallelism)
context - - Engine context to usehoodieConfig - - Hudi configscommitMetadata - - Commit action metadatainstantTime - - Action instant timedataMetaClient - - HoodieTableMetaClient for databloomFilterType - - Type of generated bloom filter recordsbloomIndexParallelism - - Parallelism for bloom filter record generationpublic static Map<MetadataPartitionType,HoodieData<HoodieRecord>> convertMetadataToRecords(HoodieEngineContext engineContext, HoodieCleanMetadata cleanMetadata, String instantTime, HoodieTableMetaClient dataMetaClient, List<MetadataPartitionType> enabledPartitionTypes, int bloomIndexParallelism, boolean isColumnStatsIndexEnabled, int columnStatsIndexParallelism, List<String> targetColumnsForColumnStatsIndex)
public static List<HoodieRecord> convertMetadataToFilesPartitionRecords(HoodieCleanMetadata cleanMetadata, String instantTime)
cleanMetadata - instantTime - public static Map<MetadataPartitionType,HoodieData<HoodieRecord>> convertMissingPartitionRecords(HoodieEngineContext engineContext, List<String> deletedPartitions, Map<String,Map<String,Long>> filesAdded, Map<String,List<String>> filesDeleted, String instantTime)
public static HoodieData<HoodieRecord> convertMetadataToBloomFilterRecords(HoodieCleanMetadata cleanMetadata, HoodieEngineContext engineContext, String instantTime, int bloomIndexParallelism)
cleanMetadata - - Clean action metadataengineContext - - Engine contextinstantTime - - Clean action instant timebloomIndexParallelism - - Parallelism for bloom filter record generationpublic static HoodieData<HoodieRecord> convertMetadataToColumnStatsRecords(HoodieCleanMetadata cleanMetadata, HoodieEngineContext engineContext, HoodieTableMetaClient dataMetaClient, boolean isColumnStatsIndexEnabled, int columnStatsIndexParallelism, List<String> targetColumnsForColumnStatsIndex)
cleanMetadata - - Clean action metadataengineContext - - Engine contextdataMetaClient - - HoodieTableMetaClient for dataisColumnStatsIndexEnabled - - Is column stats index enabledcolumnStatsIndexParallelism - - Parallelism for column stats index records generationtargetColumnsForColumnStatsIndex - - List of columns for column stats indexpublic static Map<MetadataPartitionType,HoodieData<HoodieRecord>> convertMetadataToRecords(HoodieEngineContext engineContext, HoodieTableMetaClient dataTableMetaClient, HoodieRollbackMetadata rollbackMetadata, String instantTime)
We only need to handle FILES partition here as HUDI rollbacks on MOR table may end up adding a new log file. All other partitions are handled by actual rollback of the deltacommit which added records to those partitions.
protected static List<HoodieRecord> convertFilesToFilesPartitionRecords(Map<String,List<String>> partitionToDeletedFiles, Map<String,Map<String,Long>> partitionToAppendedFiles, String instantTime, String operation)
public static String getColumnStatsIndexPartitionIdentifier(String partitionName)
public static String getBloomFilterIndexPartitionIdentifier(String partitionName)
public static String getPartitionIdentifierForFilesPartition(String relativePartitionPath)
public static HoodieData<HoodieRecord> convertFilesToBloomFilterRecords(HoodieEngineContext engineContext, Map<String,List<String>> partitionToDeletedFiles, Map<String,Map<String,Long>> partitionToAppendedFiles, String instantTime, HoodieTableMetaClient dataMetaClient, int bloomIndexParallelism, String bloomFilterType)
public static HoodieData<HoodieRecord> convertFilesToColumnStatsRecords(HoodieEngineContext engineContext, Map<String,List<String>> partitionToDeletedFiles, Map<String,Map<String,Long>> partitionToAppendedFiles, HoodieTableMetaClient dataMetaClient, boolean isColumnStatsIndexEnabled, int columnStatsIndexParallelism, List<String> targetColumnsForColumnStatsIndex)
public static int mapRecordKeyToFileGroupIndex(String recordKey, int numFileGroups)
Note: For hashing, the algorithm is same as String.hashCode() but is being defined here as hashCode() implementation is not guaranteed by the JVM to be consistent across JVM versions and implementations.
recordKey - record key for which the file group index is looked up for.public static List<FileSlice> getPartitionLatestMergedFileSlices(HoodieTableMetaClient metaClient, HoodieTableFileSystemView fsView, String partition)
metaClient - Instance of HoodieTableMetaClient.fsView - Metadata table filesystem view.partition - The name of the partition whose file groups are to be loaded.public static List<FileSlice> getPartitionLatestFileSlices(HoodieTableMetaClient metaClient, Option<HoodieTableFileSystemView> fsView, String partition)
metaClient - - Instance of HoodieTableMetaClient.fsView - - Metadata table filesystem viewpartition - - The name of the partition whose file groups are to be loaded.public static HoodieTableFileSystemView getFileSystemView(HoodieTableMetaClient metaClient)
metaClient - - Metadata table meta clientpublic static List<FileSlice> getPartitionLatestFileSlicesIncludingInflight(HoodieTableMetaClient metaClient, Option<HoodieTableFileSystemView> fileSystemView, String partition)
metaClient - - instance of HoodieTableMetaClientfileSystemView - - hoodie table file system view, which will be fetched from meta client if not already presentpartition - - name of the partition whose file groups are to be loadedpublic static HoodieData<HoodieRecord> convertMetadataToColumnStatsRecords(HoodieCommitMetadata commitMetadata, HoodieEngineContext engineContext, HoodieTableMetaClient dataMetaClient, boolean isColumnStatsIndexEnabled, int columnStatsIndexParallelism, List<String> targetColumnsForColumnStatsIndex)
public static BigDecimal tryUpcastDecimal(BigDecimal value, org.apache.avro.LogicalTypes.Decimal decimal)
BigDecimal instance to align it with scale/precision expected by
the LogicalTypes.Decimal Avro logical typepublic static Set<String> getInflightMetadataPartitions(HoodieTableConfig tableConfig)
public static Set<String> getInflightAndCompletedMetadataPartitions(HoodieTableConfig tableConfig)
public static Set<String> getValidInstantTimestamps(HoodieTableMetaClient dataMetaClient, HoodieTableMetaClient metadataMetaClient)
public static boolean isValidInstant(HoodieInstant instant)
instant - HoodieInstant to check.true if the instant is valid.public static boolean isIndexingCommit(String instantTime)
TODO(HUDI-5733): This should be cleaned up once the proper fix of rollbacks in the metadata table is landed.
instantTime - Instant time to check.true if from async indexer; false otherwise.public static String deleteMetadataTable(HoodieTableMetaClient dataMetaClient, HoodieEngineContext context, boolean backup)
dataMetaClient - HoodieTableMetaClient of the dataset for which metadata table is to be deletedcontext - instance of HoodieEngineContext.backup - Whether metadata table should be backed up before deletion. If true, the table is backed up to the
directory with name metadata_public static String deleteMetadataTablePartition(HoodieTableMetaClient dataMetaClient, HoodieEngineContext context, MetadataPartitionType partitionType, boolean backup)
This can be used to delete a partition so that it can be re-bootstrapped.
dataMetaClient - HoodieTableMetaClient of the dataset for which metadata table is to be deletedcontext - instance of HoodieEngineContext.backup - Whether metadata table should be backed up before deletion. If true, the table is backed up to the
directory with name metadata_partitionType - The partition to deletepublic static String getFileIDForFileGroup(MetadataPartitionType partitionType, int index)
MDT fileGroups have the format
partitionType - The type of the MDT partitionindex - Index of the file group within the partitionpublic static int getFileGroupIndexFromFileId(String fileId)
getFileIDForFileGroup for the format of the fileID.fileId - fileID of a file group.public static String getFileGroupPrefix(String fileId)
getFileIDForFileGroup for the format of the fileID.fileId - fileID of a file group.public static String createCleanTimestamp(String timestamp)
public static String createCompactionTimestamp(String timestamp)
public static String createIndexInitTimestamp(String timestamp, int offset)
Since many MDT partitions can be initialized one after other the offset parameter controls generating a unique timestamp.
public static String createLogCompactionTimestamp(String timestamp)
public static int estimateFileGroupCount(MetadataPartitionType partitionType, long recordCount, int averageRecordSize, int minFileGroupCount, int maxFileGroupCount, float growthFactor, int maxFileGroupSizeBytes)
partitionType - Type of the partition for which the file group count is to be estimated.recordCount - The number of records expected to be written.averageRecordSize - Average size of each record to be written.minFileGroupCount - Minimum number of file groups to use.maxFileGroupCount - Maximum number of file groups to use.growthFactor - By what factor are the records (recordCount) expected to grow?maxFileGroupSizeBytes - Maximum size of the file group.public static boolean getMetadataPartitionsNeedingWriteStatusTracking(HoodieMetadataConfig config, HoodieTableMetaClient metaClient)
config - MDT configmetaClient - HoodieTableMetaClient of the data tablepublic static HoodieRecordGlobalLocation getLocationFromRecordIndexInfo(HoodieRecordIndexInfo recordIndexInfo)
recordIndexInfo - HoodieRecordIndexInfo instance.HoodieRecordGlobalLocation containing the location.public static HoodieRecordGlobalLocation getLocationFromRecordIndexInfo(String partition, int fileIdEncoding, long fileIdHighBits, long fileIdLowBits, int fileIndex, String originalFileId, Long instantTime)
HoodieRecordIndexInfo contains.partition - The partition name the record belongs to.fileIdEncoding - FileId encoding. Possible values are 0 and 1. O represents UUID based
fileID, and 1 represents raw string format of the fileId.fileIdHighBits - High 64 bits if the fileId is based on UUID format.fileIdLowBits - Low 64 bits if the fileId is based on UUID format.fileIndex - Index representing file index which is used to re-construct UUID based fileID.originalFileId - FileId of the location where record belongs to.
When the encoding is 1, fileID is stored in raw string format.instantTime - Epoch time in millisecond representing the commit time at which record was added.HoodieRecordGlobalLocation containing the location.@Deprecated public static HoodieData<HoodieRecord> readRecordKeysFromBaseFiles(HoodieEngineContext engineContext, HoodieConfig config, List<Pair<String,HoodieBaseFile>> partitionBaseFilePairs, boolean forDelete, int recordIndexMaxParallelism, String basePath, StorageConfiguration<?> configuration, String activeModule)
HoodieData of HoodieRecord to be updated in the metadata table.
Use readRecordKeysFromFileSlices(HoodieEngineContext, List, boolean, int, String, HoodieTableMetaClient, EngineType) instead.public static HoodieData<HoodieRecord> readRecordKeysFromFileSlices(HoodieEngineContext engineContext, List<Pair<String,FileSlice>> partitionFileSlicePairs, boolean forDelete, int recordIndexMaxParallelism, String activeModule, HoodieTableMetaClient metaClient, EngineType engineType)
HoodieData of HoodieRecord to be updated in the metadata table.
If file slice does not have any base file, then iterates over the log files to get the record keys.Copyright © 2024 The Apache Software Foundation. All rights reserved.