c

com.ebiznext.comet.job.ingest

KafkaIngestionJob

class KafkaIngestionJob extends JsonIngestionJob

Main class to ingest JSON messages from Kafka

Linear Supertypes
JsonIngestionJob, IngestionJob, SparkJob, JobBase, StrictLogging, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. KafkaIngestionJob
  2. JsonIngestionJob
  3. IngestionJob
  4. SparkJob
  5. JobBase
  6. StrictLogging
  7. AnyRef
  8. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new KafkaIngestionJob(domain: Domain, schema: Schema, types: List[Type], path: List[Path], storageHandler: StorageHandler, schemaHandler: SchemaHandler, options: Map[String, String], mode: Mode)(implicit settings: Settings)

    domain

    : Output Dataset Domain

    schema

    : Topic Name

    types

    : List of globally defined types

    path

    : Unused

    storageHandler

    : Storage Handler

Type Members

  1. type JdbcConfigName = String
    Definition Classes
    JobBase

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. def analyze(fullTableName: String): Any
    Attributes
    protected
    Definition Classes
    SparkJob
  5. def applyIgnore(dfIn: DataFrame): Dataset[Row]
    Attributes
    protected
    Definition Classes
    IngestionJob
  6. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  7. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  8. def createSparkViews(views: Views, sqlParameters: Map[String, String]): Unit
    Attributes
    protected
    Definition Classes
    SparkJob
  9. val domain: Domain
    Definition Classes
    JsonIngestionJobIngestionJob
  10. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  11. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  12. lazy val extension: String
    Definition Classes
    IngestionJob
  13. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  14. val flatRowValidator: GenericRowValidator
    Attributes
    protected
    Definition Classes
    IngestionJob
  15. lazy val format: String
    Definition Classes
    IngestionJob
  16. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  17. def getWriteMode(): WriteMode
    Definition Classes
    IngestionJob
  18. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  19. def ingest(dataset: DataFrame): (RDD[_], RDD[_])

    Where the magic happen

    Where the magic happen

    dataset

    input dataset as a RDD of string

    Attributes
    protected
    Definition Classes
    JsonIngestionJobIngestionJob
  20. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  21. def loadDataSet(): Try[DataFrame]

    load the json as an RDD of String

    load the json as an RDD of String

    returns

    Spark Dataframe loaded using metadata options

    Attributes
    protected
    Definition Classes
    JsonIngestionJobIngestionJob
  22. def loadJsonData(): Dataset[String]

    Load dataset using spark csv reader and all metadata.

    Load dataset using spark csv reader and all metadata. Does not infer schema. columns not defined in the schema are dropped fro the dataset (require datsets with a header)

    returns

    Spark DataFrame where each row holds a single string

    Attributes
    protected
    Definition Classes
    KafkaIngestionJobJsonIngestionJob
  23. val logger: Logger
    Attributes
    protected
    Definition Classes
    StrictLogging
  24. lazy val metadata: Metadata

    Merged metadata

    Merged metadata

    Definition Classes
    IngestionJob
  25. def name: String
    Definition Classes
    JsonIngestionJobJobBase
  26. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  27. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  28. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  29. val now: Timestamp
    Definition Classes
    IngestionJob
  30. var offsets: List[(Int, Long)]
  31. val options: Map[String, String]
    Definition Classes
    JsonIngestionJobIngestionJob
  32. def parseViewDefinition(valueWithEnv: String): (SinkType, Option[JdbcConfigName], String)

    valueWithEnv

    in the form [SinkType:[configName:]]viewName

    returns

    (SinkType, configName, viewName)

    Attributes
    protected
    Definition Classes
    JobBase
  33. def partitionDataset(dataset: DataFrame, partition: List[String]): DataFrame
    Attributes
    protected
    Definition Classes
    SparkJob
  34. def partitionedDatasetWriter(dataset: DataFrame, partition: List[String]): DataFrameWriter[Row]

    Partition a dataset using dataset columns.

    Partition a dataset using dataset columns. To partition the dataset using the ingestion time, use the reserved column names :

    • comet_date
    • comet_year
    • comet_month
    • comet_day
    • comet_hour
    • comet_minute These columns are renamed to "date", "year", "month", "day", "hour", "minute" in the dataset and their values is set to the current date/time.
    dataset

    : Input dataset

    partition

    : list of columns to use for partitioning.

    returns

    The Spark session used to run this job

    Attributes
    protected
    Definition Classes
    SparkJob
  35. val path: List[Path]
    Definition Classes
    JsonIngestionJobIngestionJob
  36. def registerUdf(udf: String): Unit
    Attributes
    protected
    Definition Classes
    SparkJob
  37. def reorderAttributes(dataFrame: DataFrame): List[Attribute]
    Definition Classes
    IngestionJob
  38. def run(): Try[JobResult]

    Main entry point as required by the Spark Job interface

    Main entry point as required by the Spark Job interface

    returns

    : Spark Session used for the job

    Definition Classes
    KafkaIngestionJobIngestionJobJobBase
  39. def saveAccepted(dataframe: DataFrame): (DataFrame, Path)

    Merge new and existing dataset if required Save using overwrite / Append mode

    Merge new and existing dataset if required Save using overwrite / Append mode

    Attributes
    protected
    Definition Classes
    IngestionJob
  40. def saveRejected(rejectedRDD: RDD[String]): Try[Path]
    Attributes
    protected
    Definition Classes
    IngestionJob
  41. val schema: Schema
    Definition Classes
    JsonIngestionJobIngestionJob
  42. val schemaHandler: SchemaHandler
    Definition Classes
    JsonIngestionJobIngestionJob
  43. lazy val schemaSparkType: StructType
    Definition Classes
    JsonIngestionJob
  44. lazy val session: SparkSession
    Definition Classes
    SparkJob
  45. implicit val settings: Settings
    Definition Classes
    JsonIngestionJobJobBase
  46. lazy val sparkEnv: SparkEnv
    Definition Classes
    SparkJob
  47. val storageHandler: StorageHandler
    Definition Classes
    JsonIngestionJobIngestionJob
  48. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  49. def toString(): String
    Definition Classes
    AnyRef → Any
  50. val treeRowValidator: GenericRowValidator
    Attributes
    protected
    Definition Classes
    IngestionJob
  51. val types: List[Type]
    Definition Classes
    JsonIngestionJobIngestionJob
  52. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  53. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  54. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()

Inherited from JsonIngestionJob

Inherited from IngestionJob

Inherited from SparkJob

Inherited from JobBase

Inherited from StrictLogging

Inherited from AnyRef

Inherited from Any

Ungrouped