t

com.ebiznext.comet.job.ingest

IngestionJob

trait IngestionJob extends SparkJob

Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. IngestionJob
  2. SparkJob
  3. JobBase
  4. StrictLogging
  5. AnyRef
  6. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Type Members

  1. type JdbcConfigName = String
    Definition Classes
    JobBase

Abstract Value Members

  1. abstract def domain: Domain
  2. abstract def ingest(dataset: DataFrame): (RDD[_], RDD[_])

    ingestion algorithm

    ingestion algorithm

    Attributes
    protected
  3. abstract def loadDataSet(): Try[DataFrame]

    Dataset loading strategy (JSON / CSV / ...)

    Dataset loading strategy (JSON / CSV / ...)

    returns

    Spark Dataframe loaded using metadata options

    Attributes
    protected
  4. abstract def name: String
    Definition Classes
    JobBase
  5. abstract def options: Map[String, String]
  6. abstract def path: List[Path]
  7. abstract def schema: Schema
  8. abstract def schemaHandler: SchemaHandler
  9. implicit abstract def settings: Settings
    Definition Classes
    JobBase
  10. abstract def storageHandler: StorageHandler
  11. abstract def types: List[Type]

Concrete Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. def analyze(fullTableName: String): Any
    Attributes
    protected
    Definition Classes
    SparkJob
  5. def applyIgnore(dfIn: DataFrame): Dataset[Row]
    Attributes
    protected
  6. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  7. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  8. def createSparkViews(views: Views, sqlParameters: Map[String, String]): Unit
    Attributes
    protected
    Definition Classes
    SparkJob
  9. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  10. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  11. lazy val extension: String
  12. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  13. val flatRowValidator: GenericRowValidator
    Attributes
    protected
  14. lazy val format: String
  15. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  16. def getWriteMode(): WriteMode
  17. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  18. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  19. val logger: Logger
    Attributes
    protected
    Definition Classes
    StrictLogging
  20. lazy val metadata: Metadata

    Merged metadata

  21. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  22. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  23. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  24. val now: Timestamp
  25. def parseViewDefinition(valueWithEnv: String): (SinkType, Option[JdbcConfigName], String)

    valueWithEnv

    in the form [SinkType:[configName:]]viewName

    returns

    (SinkType, configName, viewName)

    Attributes
    protected
    Definition Classes
    JobBase
  26. def partitionDataset(dataset: DataFrame, partition: List[String]): DataFrame
    Attributes
    protected
    Definition Classes
    SparkJob
  27. def partitionedDatasetWriter(dataset: DataFrame, partition: List[String]): DataFrameWriter[Row]

    Partition a dataset using dataset columns.

    Partition a dataset using dataset columns. To partition the dataset using the ingestion time, use the reserved column names :

    • comet_date
    • comet_year
    • comet_month
    • comet_day
    • comet_hour
    • comet_minute These columns are renamed to "date", "year", "month", "day", "hour", "minute" in the dataset and their values is set to the current date/time.
    dataset

    : Input dataset

    partition

    : list of columns to use for partitioning.

    returns

    The Spark session used to run this job

    Attributes
    protected
    Definition Classes
    SparkJob
  28. def registerUdf(udf: String): Unit
    Attributes
    protected
    Definition Classes
    SparkJob
  29. def reorderAttributes(dataFrame: DataFrame): List[Attribute]
  30. def run(): Try[JobResult]

    Main entry point as required by the Spark Job interface

    Main entry point as required by the Spark Job interface

    returns

    : Spark Session used for the job

    Definition Classes
    IngestionJobJobBase
  31. def saveAccepted(dataframe: DataFrame): (DataFrame, Path)

    Merge new and existing dataset if required Save using overwrite / Append mode

    Merge new and existing dataset if required Save using overwrite / Append mode

    Attributes
    protected
  32. def saveRejected(rejectedRDD: RDD[String]): Try[Path]
    Attributes
    protected
  33. lazy val session: SparkSession
    Definition Classes
    SparkJob
  34. lazy val sparkEnv: SparkEnv
    Definition Classes
    SparkJob
  35. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  36. def toString(): String
    Definition Classes
    AnyRef → Any
  37. val treeRowValidator: GenericRowValidator
    Attributes
    protected
  38. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  39. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  40. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()

Inherited from SparkJob

Inherited from JobBase

Inherited from StrictLogging

Inherited from AnyRef

Inherited from Any

Ungrouped