public class SparkDataSourceContinuousIngestTool
extends Object
Sample command
./bin/spark-submit --packages org.apache.spark:spark-avro_2.11:2.4.4 --driver-memory 4g --executor-memory 4g \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.sql.catalogImplementation=hive \
--class org.apache.hudi.integ.testsuite.SparkDSContinuousIngestTool \
${HUDI_ROOT_DIR}/packaging/hudi-integ-test-bundle/target/hudi-integ-test-bundle-0.11.0-SNAPSHOT.jar \
--source-path file:${SOURCE_DIR}/spark_ds_continuous --checkpoint-file-path /tmp/hudi/checkpoint \
--base-path file:///tmp/hudi/tbl_path/ --props /tmp/hudi_props.out
Contents of hudi.properties
hoodie.insert.shuffle.parallelism=4
hoodie.upsert.shuffle.parallelism=4
hoodie.bulkinsert.shuffle.parallelism=4
hoodie.delete.shuffle.parallelism=4
hoodie.datasource.write.recordkey.field=VendorID
hoodie.datasource.write.partitionpath.field=date_col
hoodie.datasource.write.operation=upsert
hoodie.datasource.write.precombine.field=tpep_pickup_datetime
hoodie.metadata.enable=false
hoodie.table.name=hudi_tbl