public class HoodieAvroUtils extends Object
| Modifier and Type | Field and Description |
|---|---|
static String |
AVRO_VERSION |
static org.apache.avro.Conversions.DecimalConversion |
DECIMAL_CONVERSION |
static org.apache.avro.Schema |
METADATA_FIELD_SCHEMA |
static org.apache.avro.Schema |
RECORD_KEY_SCHEMA |
| Constructor and Description |
|---|
HoodieAvroUtils() |
| Modifier and Type | Method and Description |
|---|---|
static org.apache.avro.generic.GenericRecord |
addCommitMetadataToRecord(org.apache.avro.generic.GenericRecord record,
String instantTime,
String commitSeqno)
Adds the Hoodie commit metadata into the provided Generic Record.
|
static org.apache.avro.generic.GenericRecord |
addHoodieKeyToRecord(org.apache.avro.generic.GenericRecord record,
String recordKey,
String partitionPath,
String fileName) |
static String |
addMetadataColumnTypes(String hiveColumnTypes) |
static org.apache.avro.Schema |
addMetadataFields(org.apache.avro.Schema schema)
Adds the Hoodie metadata fields to the given schema.
|
static org.apache.avro.Schema |
addMetadataFields(org.apache.avro.Schema schema,
boolean withOperationField)
Adds the Hoodie metadata fields to the given schema.
|
static org.apache.avro.generic.GenericRecord |
addOperationToRecord(org.apache.avro.generic.GenericRecord record,
HoodieOperation operation) |
static byte[] |
avroToBytes(org.apache.avro.generic.GenericRecord record)
Convert a given avro record to bytes.
|
static byte[] |
avroToJson(org.apache.avro.generic.GenericRecord record,
boolean pretty)
Convert a given avro record to json and return the encoded bytes.
|
static org.apache.avro.generic.GenericRecord |
bytesToAvro(byte[] bytes,
org.apache.avro.Schema schema)
Convert serialized bytes back into avro record.
|
static org.apache.avro.generic.GenericRecord |
bytesToAvro(byte[] bytes,
org.apache.avro.Schema writerSchema,
org.apache.avro.Schema readerSchema)
Convert serialized bytes back into avro record.
|
static Object |
convertValueForSpecificDataTypes(org.apache.avro.Schema fieldSchema,
Object fieldValue,
boolean consistentLogicalTimestampEnabled)
This method converts values for fields with certain Avro/Parquet data types that require special handling.
|
static org.apache.avro.Schema |
createHoodieWriteSchema(org.apache.avro.Schema originalSchema) |
static org.apache.avro.Schema |
createHoodieWriteSchema(String originalSchema) |
static org.apache.avro.Schema |
createHoodieWriteSchema(String originalSchema,
boolean withOperationField) |
static int |
fromJavaDate(Date date)
convert Date to days
NOTE: This method could only be used in tests
|
static org.apache.avro.Schema |
generateProjectionSchema(org.apache.avro.Schema originalSchema,
List<String> fieldNames)
Generate a reader schema off the provided writeSchema, to just project out the provided columns.
|
static Object |
getFieldVal(org.apache.avro.generic.GenericRecord record,
String key)
Obtain value of the provided key, which is consistent with avro before 1.10
|
static Object |
getFieldVal(org.apache.avro.generic.GenericRecord record,
String key,
boolean returnNullIfNotFound)
Obtain value of the provided key, when set returnNullIfNotFound false,
it is consistent with avro after 1.10
|
static org.apache.avro.Schema |
getNestedFieldSchemaFromRecord(org.apache.avro.generic.GenericRecord record,
String fieldName)
Get schema for the given field and record.
|
static org.apache.avro.Schema |
getNestedFieldSchemaFromWriteSchema(org.apache.avro.Schema writeSchema,
String fieldName)
Get schema for the given field and write schema.
|
static Object |
getNestedFieldVal(org.apache.avro.generic.GenericRecord record,
String fieldName,
boolean returnNullIfNotFound,
boolean consistentLogicalTimestampEnabled)
Obtain value of the provided field, denoted by dot notation.
|
static String |
getNestedFieldValAsString(org.apache.avro.generic.GenericRecord record,
String fieldName,
boolean returnNullIfNotFound,
boolean consistentLogicalTimestampEnabled)
Obtain value of the provided field as string, denoted by dot notation.
|
static Option<String> |
getNullableValAsString(org.apache.avro.generic.GenericRecord rec,
String fieldName)
Returns the string value of the given record
rec and field fieldName. |
static org.apache.avro.Schema |
getNullSchema() |
static Object |
getRecordColumnValues(HoodieRecord<? extends HoodieRecordPayload> record,
String[] columns,
org.apache.avro.Schema schema,
boolean consistentLogicalTimestampEnabled)
Gets record column values into one object.
|
static Object |
getRecordColumnValues(HoodieRecord<? extends HoodieRecordPayload> record,
String[] columns,
SerializableSchema schema,
boolean consistentLogicalTimestampEnabled)
Gets record column values into one object.
|
static org.apache.avro.Schema |
getRecordKeyPartitionPathSchema()
Fetch schema for record key and partition path.
|
static org.apache.avro.Schema |
getRecordKeySchema() |
static String |
getRootLevelFieldName(String fieldName)
Obtain the root-level field name of a full field name, possibly a nested field.
|
static org.apache.avro.Schema |
getSchemaForFields(org.apache.avro.Schema fileSchema,
List<String> fields)
Fetch schema for record key and partition path.
|
static boolean |
gteqAvro1_10() |
static boolean |
gteqAvro1_9() |
static <T extends org.apache.avro.generic.IndexedRecord> |
indexedRecordToBytes(T record) |
static boolean |
isMetadataField(String fieldName) |
static org.apache.avro.generic.GenericRecord |
jsonBytesToAvro(byte[] bytes,
org.apache.avro.Schema schema)
Convert json bytes back into avro record.
|
static org.apache.avro.generic.GenericRecord |
removeFields(org.apache.avro.generic.GenericRecord record,
Set<String> fieldsToRemove)
Given an Avro record and list of columns to remove, this method removes the list of columns from
the given avro record using rewriteRecord method.
|
static org.apache.avro.Schema |
removeFields(org.apache.avro.Schema schema,
Set<String> fieldsToRemove) |
static org.apache.avro.Schema |
removeMetadataFields(org.apache.avro.Schema schema) |
static org.apache.avro.generic.GenericRecord |
rewriteEvolutionRecordWithMetadata(org.apache.avro.generic.GenericRecord genericRecord,
org.apache.avro.Schema newSchema,
String fileName) |
static org.apache.avro.generic.GenericRecord |
rewriteRecord(org.apache.avro.generic.GenericRecord oldRecord,
org.apache.avro.Schema newSchema)
Given an Avro record with a given schema, rewrites it into the new schema while setting fields only from the new
schema.
|
static org.apache.avro.generic.GenericRecord |
rewriteRecordDeep(org.apache.avro.generic.GenericRecord oldRecord,
org.apache.avro.Schema newSchema) |
static List<org.apache.avro.generic.GenericRecord> |
rewriteRecords(List<org.apache.avro.generic.GenericRecord> records,
org.apache.avro.Schema newSchema)
Converts list of
GenericRecord provided into the GenericRecord adhering to the
provided newSchema. |
static org.apache.avro.generic.GenericRecord |
rewriteRecordWithMetadata(org.apache.avro.generic.GenericRecord genericRecord,
org.apache.avro.Schema newSchema,
String fileName) |
static org.apache.avro.generic.GenericRecord |
rewriteRecordWithNewSchema(org.apache.avro.generic.IndexedRecord oldRecord,
org.apache.avro.Schema newSchema,
Map<String,String> renameCols)
Given a avro record with a given schema, rewrites it into the new schema while setting fields only from the new schema.
|
static Iterator<org.apache.avro.generic.GenericRecord> |
rewriteRecordWithNewSchema(Iterator<org.apache.avro.generic.GenericRecord> oldRecords,
org.apache.avro.Schema newSchema,
Map<String,String> renameCols)
Given avro records, rewrites them with new schema.
|
static String |
sanitizeName(String name)
Sanitizes Name according to Avro rule for names.
|
static org.apache.avro.generic.GenericRecord |
stitchRecords(org.apache.avro.generic.GenericRecord left,
org.apache.avro.generic.GenericRecord right,
org.apache.avro.Schema stitchedSchema) |
static Date |
toJavaDate(int days)
convert days to Date
NOTE: This method could only be used in tests
|
public static final String AVRO_VERSION
public static final org.apache.avro.Conversions.DecimalConversion DECIMAL_CONVERSION
public static final org.apache.avro.Schema METADATA_FIELD_SCHEMA
public static final org.apache.avro.Schema RECORD_KEY_SCHEMA
public static byte[] avroToBytes(org.apache.avro.generic.GenericRecord record)
public static <T extends org.apache.avro.generic.IndexedRecord> byte[] indexedRecordToBytes(T record)
public static byte[] avroToJson(org.apache.avro.generic.GenericRecord record,
boolean pretty)
throws IOException
record - The GenericRecord to convertpretty - Whether to pretty-print the json outputIOExceptionpublic static org.apache.avro.generic.GenericRecord bytesToAvro(byte[] bytes,
org.apache.avro.Schema schema)
throws IOException
IOExceptionpublic static org.apache.avro.generic.GenericRecord bytesToAvro(byte[] bytes,
org.apache.avro.Schema writerSchema,
org.apache.avro.Schema readerSchema)
throws IOException
IOExceptionpublic static org.apache.avro.generic.GenericRecord jsonBytesToAvro(byte[] bytes,
org.apache.avro.Schema schema)
throws IOException
IOExceptionpublic static boolean isMetadataField(String fieldName)
public static org.apache.avro.Schema createHoodieWriteSchema(org.apache.avro.Schema originalSchema)
public static org.apache.avro.Schema createHoodieWriteSchema(String originalSchema)
public static org.apache.avro.Schema createHoodieWriteSchema(String originalSchema, boolean withOperationField)
public static org.apache.avro.Schema addMetadataFields(org.apache.avro.Schema schema)
schema - The schemapublic static org.apache.avro.Schema addMetadataFields(org.apache.avro.Schema schema,
boolean withOperationField)
schema - The schemawithOperationField - Whether to include the '_hoodie_operation' fieldpublic static org.apache.avro.Schema removeMetadataFields(org.apache.avro.Schema schema)
public static org.apache.avro.Schema removeFields(org.apache.avro.Schema schema,
Set<String> fieldsToRemove)
public static org.apache.avro.Schema getRecordKeySchema()
public static org.apache.avro.Schema getRecordKeyPartitionPathSchema()
public static org.apache.avro.Schema getSchemaForFields(org.apache.avro.Schema fileSchema,
List<String> fields)
public static org.apache.avro.generic.GenericRecord addHoodieKeyToRecord(org.apache.avro.generic.GenericRecord record,
String recordKey,
String partitionPath,
String fileName)
public static org.apache.avro.generic.GenericRecord addOperationToRecord(org.apache.avro.generic.GenericRecord record,
HoodieOperation operation)
public static org.apache.avro.generic.GenericRecord addCommitMetadataToRecord(org.apache.avro.generic.GenericRecord record,
String instantTime,
String commitSeqno)
public static org.apache.avro.generic.GenericRecord stitchRecords(org.apache.avro.generic.GenericRecord left,
org.apache.avro.generic.GenericRecord right,
org.apache.avro.Schema stitchedSchema)
public static org.apache.avro.generic.GenericRecord rewriteRecord(org.apache.avro.generic.GenericRecord oldRecord,
org.apache.avro.Schema newSchema)
public static org.apache.avro.generic.GenericRecord rewriteRecordWithMetadata(org.apache.avro.generic.GenericRecord genericRecord,
org.apache.avro.Schema newSchema,
String fileName)
public static org.apache.avro.generic.GenericRecord rewriteEvolutionRecordWithMetadata(org.apache.avro.generic.GenericRecord genericRecord,
org.apache.avro.Schema newSchema,
String fileName)
public static List<org.apache.avro.generic.GenericRecord> rewriteRecords(List<org.apache.avro.generic.GenericRecord> records, org.apache.avro.Schema newSchema)
GenericRecord provided into the GenericRecord adhering to the
provided newSchema.
To better understand conversion rules please check rewriteRecord(GenericRecord, Schema)
public static org.apache.avro.generic.GenericRecord removeFields(org.apache.avro.generic.GenericRecord record,
Set<String> fieldsToRemove)
To better understand how it removes please check rewriteRecord(GenericRecord, Schema)
public static org.apache.avro.Schema generateProjectionSchema(org.apache.avro.Schema originalSchema,
List<String> fieldNames)
public static String getRootLevelFieldName(String fieldName)
fieldName - The field name.public static Object getFieldVal(org.apache.avro.generic.GenericRecord record, String key)
public static Object getFieldVal(org.apache.avro.generic.GenericRecord record, String key, boolean returnNullIfNotFound)
public static String getNestedFieldValAsString(org.apache.avro.generic.GenericRecord record, String fieldName, boolean returnNullIfNotFound, boolean consistentLogicalTimestampEnabled)
public static Object getNestedFieldVal(org.apache.avro.generic.GenericRecord record, String fieldName, boolean returnNullIfNotFound, boolean consistentLogicalTimestampEnabled)
public static org.apache.avro.Schema getNestedFieldSchemaFromRecord(org.apache.avro.generic.GenericRecord record,
String fieldName)
record - - record containing the value of the given fieldfieldName - - name of the fieldpublic static org.apache.avro.Schema getNestedFieldSchemaFromWriteSchema(org.apache.avro.Schema writeSchema,
String fieldName)
getNestedFieldSchemaFromRecord(GenericRecord, String)writeSchema - - write schema of the recordfieldName - - name of the fieldpublic static Option<String> getNullableValAsString(org.apache.avro.generic.GenericRecord rec, String fieldName)
rec and field fieldName.
The field and value both could be missing.rec - The recordfieldName - The field namepublic static Object convertValueForSpecificDataTypes(org.apache.avro.Schema fieldSchema, Object fieldValue, boolean consistentLogicalTimestampEnabled)
fieldSchema - avro field schemafieldValue - avro field valuepublic static org.apache.avro.Schema getNullSchema()
public static String sanitizeName(String name)
name - input namepublic static Object getRecordColumnValues(HoodieRecord<? extends HoodieRecordPayload> record, String[] columns, org.apache.avro.Schema schema, boolean consistentLogicalTimestampEnabled)
record - Hoodie record.columns - Names of the columns to get values.schema - Schema instance.public static Object getRecordColumnValues(HoodieRecord<? extends HoodieRecordPayload> record, String[] columns, SerializableSchema schema, boolean consistentLogicalTimestampEnabled)
record - Hoodie record.columns - Names of the columns to get values.schema - SerializableSchema instance.public static org.apache.avro.generic.GenericRecord rewriteRecordWithNewSchema(org.apache.avro.generic.IndexedRecord oldRecord,
org.apache.avro.Schema newSchema,
Map<String,String> renameCols)
oldRecord - oldRecord to be rewrittennewSchema - newSchema used to rewrite oldRecordrenameCols - a map store all rename cols, (k, v)-> (colNameFromNewSchema, colNameFromOldSchema)public static Date toJavaDate(int days)
public static int fromJavaDate(Date date)
public static Iterator<org.apache.avro.generic.GenericRecord> rewriteRecordWithNewSchema(Iterator<org.apache.avro.generic.GenericRecord> oldRecords, org.apache.avro.Schema newSchema, Map<String,String> renameCols)
oldRecords - oldRecords to be rewritenewSchema - newSchema used to rewrite oldRecordrenameCols - a map store all rename cols, (k, v)-> (colNameFromNewSchema, colNameFromOldSchema)public static org.apache.avro.generic.GenericRecord rewriteRecordDeep(org.apache.avro.generic.GenericRecord oldRecord,
org.apache.avro.Schema newSchema)
public static boolean gteqAvro1_9()
public static boolean gteqAvro1_10()
Copyright © 2022 The Apache Software Foundation. All rights reserved.