public class DeltaGenerator extends Object implements Serializable
| Constructor and Description |
|---|
DeltaGenerator(DFSDeltaConfig deltaOutputConfig,
org.apache.spark.api.java.JavaSparkContext jsc,
org.apache.spark.sql.SparkSession sparkSession,
String schemaStr,
org.apache.hudi.keygen.BuiltinKeyGenerator keyGenerator) |
| Modifier and Type | Method and Description |
|---|---|
org.apache.spark.api.java.JavaRDD<org.apache.avro.generic.GenericRecord> |
adjustRDDToGenerateExactNumUpdates(org.apache.spark.api.java.JavaRDD<org.apache.avro.generic.GenericRecord> updates,
org.apache.spark.api.java.JavaSparkContext jsc,
long totalRecordsRequired) |
org.apache.spark.api.java.JavaRDD<org.apache.avro.generic.GenericRecord> |
generateDeletes(DeltaConfig.Config config) |
org.apache.spark.api.java.JavaRDD<org.apache.avro.generic.GenericRecord> |
generateInserts(DeltaConfig.Config operation) |
org.apache.spark.api.java.JavaRDD<org.apache.avro.generic.GenericRecord> |
generateUpdates(DeltaConfig.Config config) |
Map<Integer,Long> |
getAdjustedPartitionsCount(Map<Integer,Long> partitionCountMap,
long recordsToRemove) |
int |
getBatchId() |
Map<Integer,Long> |
getPartitionToCountMap(org.apache.spark.api.java.JavaRDD<org.apache.avro.generic.GenericRecord> records) |
org.apache.hudi.common.util.collection.Pair<Integer,org.apache.spark.api.java.JavaRDD<DeltaWriteStats>> |
writeRecords(org.apache.spark.api.java.JavaRDD<org.apache.avro.generic.GenericRecord> records) |
public DeltaGenerator(DFSDeltaConfig deltaOutputConfig, org.apache.spark.api.java.JavaSparkContext jsc, org.apache.spark.sql.SparkSession sparkSession, String schemaStr, org.apache.hudi.keygen.BuiltinKeyGenerator keyGenerator)
public org.apache.hudi.common.util.collection.Pair<Integer,org.apache.spark.api.java.JavaRDD<DeltaWriteStats>> writeRecords(org.apache.spark.api.java.JavaRDD<org.apache.avro.generic.GenericRecord> records)
public int getBatchId()
public org.apache.spark.api.java.JavaRDD<org.apache.avro.generic.GenericRecord> generateInserts(DeltaConfig.Config operation)
public org.apache.spark.api.java.JavaRDD<org.apache.avro.generic.GenericRecord> generateUpdates(DeltaConfig.Config config) throws IOException
IOExceptionpublic org.apache.spark.api.java.JavaRDD<org.apache.avro.generic.GenericRecord> generateDeletes(DeltaConfig.Config config) throws IOException
IOExceptionpublic Map<Integer,Long> getPartitionToCountMap(org.apache.spark.api.java.JavaRDD<org.apache.avro.generic.GenericRecord> records)
public Map<Integer,Long> getAdjustedPartitionsCount(Map<Integer,Long> partitionCountMap, long recordsToRemove)
public org.apache.spark.api.java.JavaRDD<org.apache.avro.generic.GenericRecord> adjustRDDToGenerateExactNumUpdates(org.apache.spark.api.java.JavaRDD<org.apache.avro.generic.GenericRecord> updates,
org.apache.spark.api.java.JavaSparkContext jsc,
long totalRecordsRequired)
Copyright © 2023 The Apache Software Foundation. All rights reserved.