public abstract class HoodieWriteHandle<T extends HoodieRecordPayload,I,K,O> extends HoodieIOHandle<T,I,K,O>
| Modifier and Type | Field and Description |
|---|---|
protected String |
fileId |
static org.apache.hudi.io.HoodieWriteHandle.IgnoreRecord |
IGNORE_RECORD
A special record returned by
HoodieRecordPayload, which means
HoodieWriteHandle should just skip this record. |
protected String |
partitionPath |
protected boolean |
schemaOnReadEnabled |
protected org.apache.avro.Schema |
tableSchema
The specified schema of the table.
|
protected org.apache.avro.Schema |
tableSchemaWithMetaFields |
protected TaskContextSupplier |
taskContextSupplier |
protected HoodieTimer |
timer |
protected org.apache.avro.Schema |
writeSchema
The write schema.
|
protected org.apache.avro.Schema |
writeSchemaWithMetaFields |
protected WriteStatus |
writeStatus |
protected String |
writeToken |
config, fs, hoodieTable, instantTime| Modifier | Constructor and Description |
|---|---|
protected |
HoodieWriteHandle(HoodieWriteConfig config,
String instantTime,
String partitionPath,
String fileId,
HoodieTable<T,I,K,O> hoodieTable,
Option<org.apache.avro.Schema> overriddenSchema,
TaskContextSupplier taskContextSupplier) |
|
HoodieWriteHandle(HoodieWriteConfig config,
String instantTime,
String partitionPath,
String fileId,
HoodieTable<T,I,K,O> hoodieTable,
TaskContextSupplier taskContextSupplier) |
| Modifier and Type | Method and Description |
|---|---|
boolean |
canWrite(HoodieRecord record)
Determines whether we can accept the incoming records, into the current file.
|
abstract List<WriteStatus> |
close() |
protected void |
createMarkerFile(String partitionPath,
String dataFileName)
Creates an empty marker file corresponding to storage writer path.
|
protected HoodieFileWriter |
createNewFileWriter(String instantTime,
org.apache.hadoop.fs.Path path,
HoodieTable<T,I,K,O> hoodieTable,
HoodieWriteConfig config,
org.apache.avro.Schema schema,
TaskContextSupplier taskContextSupplier) |
protected long |
getAttemptId() |
protected org.apache.hadoop.fs.FileSystem |
getFileSystem() |
abstract IOType |
getIOType() |
protected int |
getPartitionId() |
String |
getPartitionPath() |
protected int |
getStageId() |
org.apache.avro.Schema |
getWriterSchemaWithMetaFields() |
protected org.apache.hadoop.fs.Path |
makeNewFilePath(String partitionPath,
String fileName)
Make new file path with given file name.
|
org.apache.hadoop.fs.Path |
makeNewPath(String partitionPath) |
protected org.apache.avro.generic.GenericRecord |
rewriteRecord(org.apache.avro.generic.GenericRecord record)
Rewrite the GenericRecord with the Schema containing the Hoodie Metadata fields.
|
protected org.apache.avro.generic.GenericRecord |
rewriteRecordWithMetadata(org.apache.avro.generic.GenericRecord record,
String fileName) |
void |
write(HoodieRecord record,
Option<org.apache.avro.generic.IndexedRecord> insertValue)
Perform the actual writing of the given record into the backing file.
|
void |
write(HoodieRecord record,
Option<org.apache.avro.generic.IndexedRecord> avroRecord,
Option<Exception> exception)
Perform the actual writing of the given record into the backing file.
|
List<WriteStatus> |
writeStatuses() |
public static org.apache.hudi.io.HoodieWriteHandle.IgnoreRecord IGNORE_RECORD
HoodieRecordPayload, which means
HoodieWriteHandle should just skip this record.
This record is only used for HoodieRecordPayload currently, so it should not
shuffle though network, we can compare the record locally by the equal method.
The HoodieRecordPayload#combineAndGetUpdateValue and HoodieRecordPayload#getInsertValue
have 3 kind of return:
1、Option.empty
This means we should delete this record.
2、IGNORE_RECORD
This means we should not process this record,just skip.
3、Other non-empty record
This means we should process this record.
We can see the usage of IGNORE_RECORD in
org.apache.spark.sql.hudi.command.payload.ExpressionPayloadprotected final org.apache.avro.Schema tableSchema
protected final org.apache.avro.Schema tableSchemaWithMetaFields
protected final org.apache.avro.Schema writeSchema
protected final org.apache.avro.Schema writeSchemaWithMetaFields
protected HoodieTimer timer
protected WriteStatus writeStatus
protected final String partitionPath
protected final String fileId
protected final String writeToken
protected final TaskContextSupplier taskContextSupplier
protected final boolean schemaOnReadEnabled
public HoodieWriteHandle(HoodieWriteConfig config, String instantTime, String partitionPath, String fileId, HoodieTable<T,I,K,O> hoodieTable, TaskContextSupplier taskContextSupplier)
protected HoodieWriteHandle(HoodieWriteConfig config, String instantTime, String partitionPath, String fileId, HoodieTable<T,I,K,O> hoodieTable, Option<org.apache.avro.Schema> overriddenSchema, TaskContextSupplier taskContextSupplier)
public org.apache.hadoop.fs.Path makeNewPath(String partitionPath)
protected org.apache.hadoop.fs.Path makeNewFilePath(String partitionPath, String fileName)
protected void createMarkerFile(String partitionPath, String dataFileName)
partitionPath - Partition pathpublic org.apache.avro.Schema getWriterSchemaWithMetaFields()
public boolean canWrite(HoodieRecord record)
- Whether it belongs to the same partitionPath as existing records - Whether the current file written bytes lt max file size
public void write(HoodieRecord record, Option<org.apache.avro.generic.IndexedRecord> insertValue)
public void write(HoodieRecord record, Option<org.apache.avro.generic.IndexedRecord> avroRecord, Option<Exception> exception)
protected org.apache.avro.generic.GenericRecord rewriteRecord(org.apache.avro.generic.GenericRecord record)
protected org.apache.avro.generic.GenericRecord rewriteRecordWithMetadata(org.apache.avro.generic.GenericRecord record,
String fileName)
public abstract List<WriteStatus> close()
public List<WriteStatus> writeStatuses()
public String getPartitionPath()
public abstract IOType getIOType()
protected org.apache.hadoop.fs.FileSystem getFileSystem()
getFileSystem in class HoodieIOHandle<T extends HoodieRecordPayload,I,K,O>protected int getPartitionId()
protected int getStageId()
protected long getAttemptId()
protected HoodieFileWriter createNewFileWriter(String instantTime, org.apache.hadoop.fs.Path path, HoodieTable<T,I,K,O> hoodieTable, HoodieWriteConfig config, org.apache.avro.Schema schema, TaskContextSupplier taskContextSupplier) throws IOException
IOExceptionCopyright © 2022 The Apache Software Foundation. All rights reserved.