trait IngestionJob extends SparkJob
- Alphabetic
- By Inheritance
- IngestionJob
- SparkJob
- JobBase
- StrictLogging
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Abstract Value Members
- abstract def domain: Domain
-
abstract
def
ingest(dataset: DataFrame): (RDD[_], RDD[_])
ingestion algorithm
ingestion algorithm
- Attributes
- protected
-
abstract
def
loadDataSet(): Try[DataFrame]
Dataset loading strategy (JSON / CSV / ...)
Dataset loading strategy (JSON / CSV / ...)
- returns
Spark Dataframe loaded using metadata options
- Attributes
- protected
-
abstract
def
name: String
- Definition Classes
- JobBase
- abstract def options: Map[String, String]
- abstract def path: List[Path]
- abstract def schema: Schema
- abstract def schemaHandler: SchemaHandler
-
implicit abstract
def
settings: Settings
- Definition Classes
- JobBase
- abstract def storageHandler: StorageHandler
- abstract def types: List[Type]
Concrete Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
analyze(fullTableName: String): Any
- Attributes
- protected
- Definition Classes
- SparkJob
-
def
applyIgnore(dfIn: DataFrame): Dataset[Row]
- Attributes
- protected
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
createSparkViews(views: Views, sqlParameters: Map[String, String]): Unit
- Attributes
- protected
- Definition Classes
- SparkJob
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- lazy val extension: String
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
val
flatRowValidator: GenericRowValidator
- Attributes
- protected
- lazy val format: String
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- def getWriteMode(): WriteMode
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
val
logger: Logger
- Attributes
- protected
- Definition Classes
- StrictLogging
-
lazy val
metadata: Metadata
Merged metadata
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- val now: Timestamp
-
def
parseViewDefinition(valueWithEnv: String): (SinkType, Option[JdbcConfigName], String)
- valueWithEnv
in the form [SinkType:[configName:]]viewName
- returns
(SinkType, configName, viewName)
- Attributes
- protected
- Definition Classes
- JobBase
-
def
partitionDataset(dataset: DataFrame, partition: List[String]): DataFrame
- Attributes
- protected
- Definition Classes
- SparkJob
-
def
partitionedDatasetWriter(dataset: DataFrame, partition: List[String]): DataFrameWriter[Row]
Partition a dataset using dataset columns.
Partition a dataset using dataset columns. To partition the dataset using the ingestion time, use the reserved column names :
- comet_date
- comet_year
- comet_month
- comet_day
- comet_hour
- comet_minute These columns are renamed to "date", "year", "month", "day", "hour", "minute" in the dataset and their values is set to the current date/time.
- dataset
: Input dataset
- partition
: list of columns to use for partitioning.
- returns
The Spark session used to run this job
- Attributes
- protected
- Definition Classes
- SparkJob
-
def
registerUdf(udf: String): Unit
- Attributes
- protected
- Definition Classes
- SparkJob
- def reorderAttributes(dataFrame: DataFrame): List[Attribute]
-
def
run(): Try[JobResult]
Main entry point as required by the Spark Job interface
Main entry point as required by the Spark Job interface
- returns
: Spark Session used for the job
- Definition Classes
- IngestionJob → JobBase
-
def
saveAccepted(dataframe: DataFrame): (DataFrame, Path)
Merge new and existing dataset if required Save using overwrite / Append mode
Merge new and existing dataset if required Save using overwrite / Append mode
- Attributes
- protected
-
def
saveRejected(rejectedRDD: RDD[String]): Try[Path]
- Attributes
- protected
-
lazy val
session: SparkSession
- Definition Classes
- SparkJob
-
lazy val
sparkEnv: SparkEnv
- Definition Classes
- SparkJob
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
val
treeRowValidator: GenericRowValidator
- Attributes
- protected
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()