public class HDFSParquetImporterUtils extends Object implements Serializable
| Constructor and Description |
|---|
HDFSParquetImporterUtils(String command,
String srcPath,
String targetPath,
String tableName,
String tableType,
String rowKey,
String partitionKey,
int parallelism,
String schemaFile,
int retry,
String propsFilePath) |
| Modifier and Type | Method and Description |
|---|---|
org.apache.spark.api.java.JavaRDD<HoodieRecord<HoodieRecordPayload>> |
buildHoodieRecordsForImport(org.apache.spark.api.java.JavaSparkContext jsc,
String schemaStr) |
static TypedProperties |
buildProperties(List<String> props) |
static SparkRDDWriteClient<HoodieRecordPayload> |
createHoodieClient(org.apache.spark.api.java.JavaSparkContext jsc,
String basePath,
String schemaStr,
int parallelism,
Option<String> compactionStrategyClass,
TypedProperties properties)
Build Hoodie write client.
|
int |
dataImport(org.apache.spark.api.java.JavaSparkContext jsc) |
int |
dataImport(org.apache.spark.api.java.JavaSparkContext jsc,
org.apache.hadoop.fs.FileSystem fs) |
static int |
handleErrors(org.apache.spark.api.java.JavaSparkContext jsc,
String instantTime,
org.apache.spark.api.java.JavaRDD<WriteStatus> writeResponse) |
boolean |
isUpsert() |
<T extends HoodieRecordPayload> |
load(SparkRDDWriteClient<T> client,
String instantTime,
org.apache.spark.api.java.JavaRDD<HoodieRecord<T>> hoodieRecords)
Imports records to Hoodie table.
|
static String |
parseSchema(org.apache.hadoop.fs.FileSystem fs,
String schemaFile)
Parse Schema from file.
|
static DFSPropertiesConfiguration |
readConfig(org.apache.hadoop.conf.Configuration hadoopConfig,
org.apache.hadoop.fs.Path cfgPath,
List<String> overriddenProps) |
public boolean isUpsert()
public int dataImport(org.apache.spark.api.java.JavaSparkContext jsc)
public int dataImport(org.apache.spark.api.java.JavaSparkContext jsc,
org.apache.hadoop.fs.FileSystem fs)
public org.apache.spark.api.java.JavaRDD<HoodieRecord<HoodieRecordPayload>> buildHoodieRecordsForImport(org.apache.spark.api.java.JavaSparkContext jsc, String schemaStr) throws IOException
IOExceptionpublic <T extends HoodieRecordPayload> org.apache.spark.api.java.JavaRDD<WriteStatus> load(SparkRDDWriteClient<T> client, String instantTime, org.apache.spark.api.java.JavaRDD<HoodieRecord<T>> hoodieRecords)
T - Typeclient - Hoodie ClientinstantTime - Instant TimehoodieRecords - Hoodie Recordspublic static TypedProperties buildProperties(List<String> props)
public static DFSPropertiesConfiguration readConfig(org.apache.hadoop.conf.Configuration hadoopConfig, org.apache.hadoop.fs.Path cfgPath, List<String> overriddenProps)
public static SparkRDDWriteClient<HoodieRecordPayload> createHoodieClient(org.apache.spark.api.java.JavaSparkContext jsc, String basePath, String schemaStr, int parallelism, Option<String> compactionStrategyClass, TypedProperties properties)
jsc - Java Spark ContextbasePath - Base PathschemaStr - Schemaparallelism - Parallelismpublic static String parseSchema(org.apache.hadoop.fs.FileSystem fs, String schemaFile) throws Exception
fs - File SystemschemaFile - Schema FileExceptionpublic static int handleErrors(org.apache.spark.api.java.JavaSparkContext jsc,
String instantTime,
org.apache.spark.api.java.JavaRDD<WriteStatus> writeResponse)
Copyright © 2023 The Apache Software Foundation. All rights reserved.