public abstract class FileFormatUtils extends Object
| Constructor and Description |
|---|
FileFormatUtils() |
| Modifier and Type | Method and Description |
|---|---|
abstract List<HoodieKey> |
fetchHoodieKeys(HoodieStorage storage,
StoragePath filePath)
Fetch
HoodieKeys from the given data file. |
abstract List<HoodieKey> |
fetchHoodieKeys(HoodieStorage storage,
StoragePath filePath,
Option<BaseKeyGenerator> keyGeneratorOpt)
Fetch
HoodieKeys from the given data file. |
abstract Set<String> |
filterRowKeys(HoodieStorage storage,
StoragePath filePath,
Set<String> filter)
Read the rowKey list matching the given filter, from the given data file.
|
abstract HoodieFileFormat |
getFormat() |
abstract ClosableIterator<HoodieKey> |
getHoodieKeyIterator(HoodieStorage storage,
StoragePath filePath)
Provides a closable iterator for reading the given data file.
|
abstract ClosableIterator<HoodieKey> |
getHoodieKeyIterator(HoodieStorage storage,
StoragePath filePath,
Option<BaseKeyGenerator> keyGeneratorOpt)
Provides a closable iterator for reading the given data file.
|
abstract long |
getRowCount(HoodieStorage storage,
StoragePath filePath)
Returns the number of records in the data file.
|
abstract List<org.apache.avro.generic.GenericRecord> |
readAvroRecords(HoodieStorage storage,
StoragePath filePath)
Read the data file
NOTE: This literally reads the entire file contents, thus should be used with caution.
|
abstract List<org.apache.avro.generic.GenericRecord> |
readAvroRecords(HoodieStorage storage,
StoragePath filePath,
org.apache.avro.Schema schema)
Read the data file using the given schema
NOTE: This literally reads the entire file contents, thus should be used with caution.
|
abstract org.apache.avro.Schema |
readAvroSchema(HoodieStorage storage,
StoragePath filePath)
Read the Avro schema of the data file.
|
BloomFilter |
readBloomFilterFromMetadata(HoodieStorage storage,
StoragePath filePath)
Read the bloom filter from the metadata of the given data file.
|
abstract List<HoodieColumnRangeMetadata<Comparable>> |
readColumnStatsFromMetadata(HoodieStorage storage,
StoragePath filePath,
List<String> columnList)
Reads column statistics stored in the metadata.
|
abstract Map<String,String> |
readFooter(HoodieStorage storage,
boolean required,
StoragePath filePath,
String... footerNames)
Read the footer data of the given data file.
|
String[] |
readMinMaxRecordKeys(HoodieStorage storage,
StoragePath filePath)
Read the min and max record key from the metadata of the given data file.
|
Set<String> |
readRowKeys(HoodieStorage storage,
StoragePath filePath)
Read the rowKey list from the given data file.
|
abstract byte[] |
serializeRecordsToLogBlock(HoodieStorage storage,
List<HoodieRecord> records,
org.apache.avro.Schema writerSchema,
org.apache.avro.Schema readerSchema,
String keyFieldName,
Map<String,String> paramsMap)
Serializes Hudi records to the log block.
|
abstract void |
writeMetaFile(HoodieStorage storage,
StoragePath filePath,
Properties props)
Writes properties to the meta file.
|
public Set<String> readRowKeys(HoodieStorage storage, StoragePath filePath)
storage - HoodieStorage instance.filePath - the data file path.public BloomFilter readBloomFilterFromMetadata(HoodieStorage storage, StoragePath filePath)
storage - HoodieStorage instance.filePath - the data file path.public String[] readMinMaxRecordKeys(HoodieStorage storage, StoragePath filePath)
storage - HoodieStorage instance.filePath - the data file path.public abstract List<org.apache.avro.generic.GenericRecord> readAvroRecords(HoodieStorage storage, StoragePath filePath)
storage - HoodieStorage instance.filePath - the data file path.public abstract List<org.apache.avro.generic.GenericRecord> readAvroRecords(HoodieStorage storage, StoragePath filePath, org.apache.avro.Schema schema)
storage - HoodieStorage instance.filePath - the data file path.public abstract Map<String,String> readFooter(HoodieStorage storage, boolean required, StoragePath filePath, String... footerNames)
storage - HoodieStorage instance.required - require the footer data to be in data file.filePath - the data file path.footerNames - the footer names to read.public abstract long getRowCount(HoodieStorage storage, StoragePath filePath)
storage - HoodieStorage instance.filePath - the data file path.public abstract Set<String> filterRowKeys(HoodieStorage storage, StoragePath filePath, Set<String> filter)
storage - HoodieStorage instance.filePath - the data file path.filter - record keys filter.public abstract List<HoodieKey> fetchHoodieKeys(HoodieStorage storage, StoragePath filePath)
HoodieKeys from the given data file.storage - HoodieStorage instance.filePath - the data file path.List of HoodieKeys fetched from the data file.public abstract ClosableIterator<HoodieKey> getHoodieKeyIterator(HoodieStorage storage, StoragePath filePath, Option<BaseKeyGenerator> keyGeneratorOpt)
storage - HoodieStorage instance.filePath - the data file path.keyGeneratorOpt - instance of KeyGenerator.ClosableIterator of HoodieKeys for reading the file.public abstract ClosableIterator<HoodieKey> getHoodieKeyIterator(HoodieStorage storage, StoragePath filePath)
storage - HoodieStorage instance.filePath - the data file path.ClosableIterator of HoodieKeys for reading the file.public abstract List<HoodieKey> fetchHoodieKeys(HoodieStorage storage, StoragePath filePath, Option<BaseKeyGenerator> keyGeneratorOpt)
HoodieKeys from the given data file.storage - HoodieStorage instance.filePath - the data file path.keyGeneratorOpt - instance of KeyGenerator.List ofHoodieKeys fetched from the data file.public abstract org.apache.avro.Schema readAvroSchema(HoodieStorage storage, StoragePath filePath)
storage - HoodieStorage instance.filePath - the data file path.public abstract List<HoodieColumnRangeMetadata<Comparable>> readColumnStatsFromMetadata(HoodieStorage storage, StoragePath filePath, List<String> columnList)
storage - HoodieStorage instance.filePath - the data file path.columnList - List of columns to get column statistics.List of HoodieColumnRangeMetadata.public abstract HoodieFileFormat getFormat()
HoodieFileFormat.public abstract void writeMetaFile(HoodieStorage storage, StoragePath filePath, Properties props) throws IOException
storage - HoodieStorage instance.filePath - file path to write to.props - properties to write.IOException - upon write error.public abstract byte[] serializeRecordsToLogBlock(HoodieStorage storage, List<HoodieRecord> records, org.apache.avro.Schema writerSchema, org.apache.avro.Schema readerSchema, String keyFieldName, Map<String,String> paramsMap) throws IOException
storage - HoodieStorage instance.records - a list of HoodieRecord.writerSchema - writer schema string from the log block header.readerSchema - keyFieldName - paramsMap - additional params for serialization.IOException - upon serialization error.Copyright © 2024 The Apache Software Foundation. All rights reserved.