public class HoodieTableMetadataUtil extends Object
| Modifier and Type | Field and Description |
|---|---|
static String |
PARTITION_NAME_BLOOM_FILTERS |
static String |
PARTITION_NAME_COLUMN_STATS |
static String |
PARTITION_NAME_FILES |
| Constructor and Description |
|---|
HoodieTableMetadataUtil() |
| Modifier and Type | Method and Description |
|---|---|
static Map<String,HoodieColumnRangeMetadata<Comparable>> |
collectColumnRangeMetadata(List<org.apache.avro.generic.IndexedRecord> records,
List<org.apache.avro.Schema.Field> targetFields,
String filePath)
Collects
HoodieColumnRangeMetadata for the provided collection of records, pretending
as if provided records have been persisted w/in given filePath |
static HoodieColumnRangeMetadata<Comparable> |
convertColumnStatsRecordToColumnRangeMetadata(HoodieMetadataColumnStats columnStats)
Converts instance of
HoodieMetadataColumnStats to HoodieColumnRangeMetadata |
static HoodieData<HoodieRecord> |
convertFilesToBloomFilterRecords(HoodieEngineContext engineContext,
Map<String,List<String>> partitionToDeletedFiles,
Map<String,Map<String,Long>> partitionToAppendedFiles,
MetadataRecordsGenerationParams recordsGenerationParams,
String instantTime)
Convert added and deleted files metadata to bloom filter index records.
|
static HoodieData<HoodieRecord> |
convertFilesToColumnStatsRecords(HoodieEngineContext engineContext,
Map<String,List<String>> partitionToDeletedFiles,
Map<String,Map<String,Long>> partitionToAppendedFiles,
MetadataRecordsGenerationParams recordsGenerationParams)
Convert added and deleted action metadata to column stats index records.
|
static HoodieData<HoodieRecord> |
convertMetadataToBloomFilterRecords(HoodieCleanMetadata cleanMetadata,
HoodieEngineContext engineContext,
String instantTime,
MetadataRecordsGenerationParams recordsGenerationParams)
Convert clean metadata to bloom filter index records.
|
static HoodieData<HoodieRecord> |
convertMetadataToBloomFilterRecords(HoodieEngineContext context,
HoodieCommitMetadata commitMetadata,
String instantTime,
MetadataRecordsGenerationParams recordsGenerationParams)
Convert commit action metadata to bloom filter records.
|
static HoodieData<HoodieRecord> |
convertMetadataToColumnStatsRecords(HoodieCleanMetadata cleanMetadata,
HoodieEngineContext engineContext,
MetadataRecordsGenerationParams recordsGenerationParams)
Convert clean metadata to column stats index records.
|
static HoodieData<HoodieRecord> |
convertMetadataToColumnStatsRecords(HoodieCommitMetadata commitMetadata,
HoodieEngineContext engineContext,
MetadataRecordsGenerationParams recordsGenerationParams) |
static List<HoodieRecord> |
convertMetadataToFilesPartitionRecords(HoodieCleanMetadata cleanMetadata,
String instantTime)
Finds all files that were deleted as part of a clean and creates metadata table records for them.
|
static List<HoodieRecord> |
convertMetadataToFilesPartitionRecords(HoodieCommitMetadata commitMetadata,
String instantTime)
Finds all new files/partitions created as part of commit and creates metadata table records for them.
|
static Map<MetadataPartitionType,HoodieData<HoodieRecord>> |
convertMetadataToRecords(HoodieEngineContext engineContext,
HoodieActiveTimeline metadataTableTimeline,
HoodieRestoreMetadata restoreMetadata,
MetadataRecordsGenerationParams recordsGenerationParams,
String instantTime,
Option<String> lastSyncTs)
Convert restore action metadata to metadata table records.
|
static Map<MetadataPartitionType,HoodieData<HoodieRecord>> |
convertMetadataToRecords(HoodieEngineContext engineContext,
HoodieActiveTimeline metadataTableTimeline,
HoodieRollbackMetadata rollbackMetadata,
MetadataRecordsGenerationParams recordsGenerationParams,
String instantTime,
Option<String> lastSyncTs,
boolean wasSynced)
Convert rollback action metadata to metadata table records.
|
static Map<MetadataPartitionType,HoodieData<HoodieRecord>> |
convertMetadataToRecords(HoodieEngineContext engineContext,
HoodieCleanMetadata cleanMetadata,
MetadataRecordsGenerationParams recordsGenerationParams,
String instantTime)
Convert the clean action to metadata records.
|
static Map<MetadataPartitionType,HoodieData<HoodieRecord>> |
convertMetadataToRecords(HoodieEngineContext context,
HoodieCommitMetadata commitMetadata,
String instantTime,
MetadataRecordsGenerationParams recordsGenerationParams)
Convert commit action to metadata records for the enabled partition types.
|
static void |
deleteMetadataPartition(String basePath,
HoodieEngineContext context,
MetadataPartitionType partitionType)
Deletes the metadata partition from the file system.
|
static void |
deleteMetadataTable(String basePath,
HoodieEngineContext context)
Delete the metadata table for the dataset.
|
static HoodieTableFileSystemView |
getFileSystemView(HoodieTableMetaClient metaClient)
Get metadata table file system view.
|
static Set<String> |
getInflightAndCompletedMetadataPartitions(HoodieTableConfig tableConfig) |
static Set<String> |
getInflightMetadataPartitions(HoodieTableConfig tableConfig) |
static int |
getPartitionFileGroupCount(MetadataPartitionType partitionType,
Option<HoodieTableMetaClient> metaClient,
Option<HoodieTableFileSystemView> fsView,
HoodieMetadataConfig metadataConfig,
boolean isBootstrapCompleted)
Get file group count for a metadata table partition.
|
static String |
getPartitionIdentifier(String relativePartitionPath)
Returns partition name for the given path.
|
static List<FileSlice> |
getPartitionLatestFileSlices(HoodieTableMetaClient metaClient,
Option<HoodieTableFileSystemView> fsView,
String partition)
Get the latest file slices for a Metadata Table partition.
|
static List<FileSlice> |
getPartitionLatestFileSlicesIncludingInflight(HoodieTableMetaClient metaClient,
Option<HoodieTableFileSystemView> fileSystemView,
String partition)
Get the latest file slices for a given partition including the inflight ones.
|
static List<FileSlice> |
getPartitionLatestMergedFileSlices(HoodieTableMetaClient metaClient,
HoodieTableFileSystemView fsView,
String partition)
Get the latest file slices for a Metadata Table partition.
|
static boolean |
isFilesPartitionAvailable(HoodieTableMetaClient metaClient)
Returns whether the files partition of metadata table is ready for read.
|
static int |
mapRecordKeyToFileGroupIndex(String recordKey,
int numFileGroups)
Map a record key to a file group in partition of interest.
|
static boolean |
metadataPartitionExists(String basePath,
HoodieEngineContext context,
MetadataPartitionType partitionType)
Check if the given metadata partition exists.
|
static BigDecimal |
tryUpcastDecimal(BigDecimal value,
org.apache.avro.LogicalTypes.Decimal decimal)
Does an upcast for
BigDecimal instance to align it with scale/precision expected by
the LogicalTypes.Decimal Avro logical type |
public static final String PARTITION_NAME_FILES
public static final String PARTITION_NAME_COLUMN_STATS
public static final String PARTITION_NAME_BLOOM_FILTERS
public static boolean isFilesPartitionAvailable(HoodieTableMetaClient metaClient)
metaClient - HoodieTableMetaClient instance.public static Map<String,HoodieColumnRangeMetadata<Comparable>> collectColumnRangeMetadata(List<org.apache.avro.generic.IndexedRecord> records, List<org.apache.avro.Schema.Field> targetFields, String filePath)
HoodieColumnRangeMetadata for the provided collection of records, pretending
as if provided records have been persisted w/in given filePathrecords - target records to compute column range metadata fortargetFields - columns (fields) to be collectedfilePath - file path value required for HoodieColumnRangeMetadataHoodieColumnRangeMetadata for each of the provided target fields for
the collection of provided recordspublic static HoodieColumnRangeMetadata<Comparable> convertColumnStatsRecordToColumnRangeMetadata(HoodieMetadataColumnStats columnStats)
HoodieMetadataColumnStats to HoodieColumnRangeMetadatapublic static void deleteMetadataTable(String basePath, HoodieEngineContext context)
basePath - base path of the datasetcontext - instance of HoodieEngineContext.public static void deleteMetadataPartition(String basePath, HoodieEngineContext context, MetadataPartitionType partitionType)
basePath - - base path of the datasetcontext - - instance of HoodieEngineContextpartitionType - - MetadataPartitionType of the partition to deletepublic static boolean metadataPartitionExists(String basePath, HoodieEngineContext context, MetadataPartitionType partitionType)
basePath - base path of the datasetcontext - instance of HoodieEngineContext.public static Map<MetadataPartitionType,HoodieData<HoodieRecord>> convertMetadataToRecords(HoodieEngineContext context, HoodieCommitMetadata commitMetadata, String instantTime, MetadataRecordsGenerationParams recordsGenerationParams)
commitMetadata - - Commit action metadatainstantTime - - Action instant timerecordsGenerationParams - - Parameters for the record generationpublic static List<HoodieRecord> convertMetadataToFilesPartitionRecords(HoodieCommitMetadata commitMetadata, String instantTime)
commitMetadata - - Commit action metadatainstantTime - - Commit action instant timepublic static HoodieData<HoodieRecord> convertMetadataToBloomFilterRecords(HoodieEngineContext context, HoodieCommitMetadata commitMetadata, String instantTime, MetadataRecordsGenerationParams recordsGenerationParams)
context - - Engine context to usecommitMetadata - - Commit action metadatainstantTime - - Action instant timerecordsGenerationParams - - Parameters for bloom filter record generationpublic static Map<MetadataPartitionType,HoodieData<HoodieRecord>> convertMetadataToRecords(HoodieEngineContext engineContext, HoodieCleanMetadata cleanMetadata, MetadataRecordsGenerationParams recordsGenerationParams, String instantTime)
public static List<HoodieRecord> convertMetadataToFilesPartitionRecords(HoodieCleanMetadata cleanMetadata, String instantTime)
cleanMetadata - instantTime - public static HoodieData<HoodieRecord> convertMetadataToBloomFilterRecords(HoodieCleanMetadata cleanMetadata, HoodieEngineContext engineContext, String instantTime, MetadataRecordsGenerationParams recordsGenerationParams)
cleanMetadata - - Clean action metadataengineContext - - Engine contextinstantTime - - Clean action instant timerecordsGenerationParams - - Parameters for bloom filter record generationpublic static HoodieData<HoodieRecord> convertMetadataToColumnStatsRecords(HoodieCleanMetadata cleanMetadata, HoodieEngineContext engineContext, MetadataRecordsGenerationParams recordsGenerationParams)
cleanMetadata - - Clean action metadataengineContext - - Engine contextrecordsGenerationParams - - Parameters for bloom filter record generationpublic static Map<MetadataPartitionType,HoodieData<HoodieRecord>> convertMetadataToRecords(HoodieEngineContext engineContext, HoodieActiveTimeline metadataTableTimeline, HoodieRestoreMetadata restoreMetadata, MetadataRecordsGenerationParams recordsGenerationParams, String instantTime, Option<String> lastSyncTs)
public static Map<MetadataPartitionType,HoodieData<HoodieRecord>> convertMetadataToRecords(HoodieEngineContext engineContext, HoodieActiveTimeline metadataTableTimeline, HoodieRollbackMetadata rollbackMetadata, MetadataRecordsGenerationParams recordsGenerationParams, String instantTime, Option<String> lastSyncTs, boolean wasSynced)
public static String getPartitionIdentifier(@Nonnull String relativePartitionPath)
public static HoodieData<HoodieRecord> convertFilesToBloomFilterRecords(HoodieEngineContext engineContext, Map<String,List<String>> partitionToDeletedFiles, Map<String,Map<String,Long>> partitionToAppendedFiles, MetadataRecordsGenerationParams recordsGenerationParams, String instantTime)
public static HoodieData<HoodieRecord> convertFilesToColumnStatsRecords(HoodieEngineContext engineContext, Map<String,List<String>> partitionToDeletedFiles, Map<String,Map<String,Long>> partitionToAppendedFiles, MetadataRecordsGenerationParams recordsGenerationParams)
public static int mapRecordKeyToFileGroupIndex(String recordKey, int numFileGroups)
Note: For hashing, the algorithm is same as String.hashCode() but is being defined here as hashCode() implementation is not guaranteed by the JVM to be consistent across JVM versions and implementations.
recordKey - record key for which the file group index is looked up for.public static List<FileSlice> getPartitionLatestMergedFileSlices(HoodieTableMetaClient metaClient, HoodieTableFileSystemView fsView, String partition)
metaClient - Instance of HoodieTableMetaClient.fsView - Metadata table filesystem view.partition - The name of the partition whose file groups are to be loaded.public static List<FileSlice> getPartitionLatestFileSlices(HoodieTableMetaClient metaClient, Option<HoodieTableFileSystemView> fsView, String partition)
metaClient - - Instance of HoodieTableMetaClient.fsView - - Metadata table filesystem viewpartition - - The name of the partition whose file groups are to be loaded.public static HoodieTableFileSystemView getFileSystemView(HoodieTableMetaClient metaClient)
metaClient - - Metadata table meta clientpublic static List<FileSlice> getPartitionLatestFileSlicesIncludingInflight(HoodieTableMetaClient metaClient, Option<HoodieTableFileSystemView> fileSystemView, String partition)
metaClient - - instance of HoodieTableMetaClientfileSystemView - - hoodie table file system view, which will be fetched from meta client if not already presentpartition - - name of the partition whose file groups are to be loadedpublic static HoodieData<HoodieRecord> convertMetadataToColumnStatsRecords(HoodieCommitMetadata commitMetadata, HoodieEngineContext engineContext, MetadataRecordsGenerationParams recordsGenerationParams)
public static int getPartitionFileGroupCount(MetadataPartitionType partitionType, Option<HoodieTableMetaClient> metaClient, Option<HoodieTableFileSystemView> fsView, HoodieMetadataConfig metadataConfig, boolean isBootstrapCompleted)
partitionType - - Metadata table partition typemetaClient - - Metadata table meta clientfsView - - Filesystem viewmetadataConfig - - Metadata configisBootstrapCompleted - - Is bootstrap completed for the metadata tablepublic static BigDecimal tryUpcastDecimal(BigDecimal value, org.apache.avro.LogicalTypes.Decimal decimal)
BigDecimal instance to align it with scale/precision expected by
the LogicalTypes.Decimal Avro logical typepublic static Set<String> getInflightMetadataPartitions(HoodieTableConfig tableConfig)
public static Set<String> getInflightAndCompletedMetadataPartitions(HoodieTableConfig tableConfig)
Copyright © 2022 The Apache Software Foundation. All rights reserved.