case class Schema(name: String, pattern: Pattern, attributes: List[Attribute], metadata: Option[Metadata], merge: Option[MergeOptions], comment: Option[String], presql: Option[List[String]], postsql: Option[List[String]] = None, tags: Option[Set[String]] = None, rls: Option[List[RowLevelSecurity]] = None, assertions: Option[Map[String, String]] = None, primaryKey: Option[List[String]] = None) extends Product with Serializable

Dataset Schema

name

: Schema name, must be unique among all the schemas belonging to the same domain. Will become the hive table name On Premise or BigQuery Table name on GCP.

pattern

: filename pattern to which this schema must be applied. This instructs the framework to use this schema to parse any file with a filename that match this pattern.

attributes

: Attributes parsing rules. See :ref:attribute_concept

metadata

: Dataset metadata See :ref:metadata_concept

comment

: free text

presql

: Reserved for future use.

postsql

: We use this attribute to execute sql queries before writing the final dataFrame after ingestion

tags

: Set of string to attach to this Schema

rls

: Experimental. Row level security to this to this schema. See :ref:rowlevelsecurity_concept

Linear Supertypes
Serializable, Serializable, Product, Equals, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. Schema
  2. Serializable
  3. Serializable
  4. Product
  5. Equals
  6. AnyRef
  7. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new Schema(name: String, pattern: Pattern, attributes: List[Attribute], metadata: Option[Metadata], merge: Option[MergeOptions], comment: Option[String], presql: Option[List[String]], postsql: Option[List[String]] = None, tags: Option[Set[String]] = None, rls: Option[List[RowLevelSecurity]] = None, assertions: Option[Map[String, String]] = None, primaryKey: Option[List[String]] = None)

    name

    : Schema name, must be unique among all the schemas belonging to the same domain. Will become the hive table name On Premise or BigQuery Table name on GCP.

    pattern

    : filename pattern to which this schema must be applied. This instructs the framework to use this schema to parse any file with a filename that match this pattern.

    attributes

    : Attributes parsing rules. See :ref:attribute_concept

    metadata

    : Dataset metadata See :ref:metadata_concept

    comment

    : free text

    presql

    : Reserved for future use.

    postsql

    : We use this attribute to execute sql queries before writing the final dataFrame after ingestion

    tags

    : Set of string to attach to this Schema

    rls

    : Experimental. Row level security to this to this schema. See :ref:rowlevelsecurity_concept

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. def asDot(domain: String, includeAllAttrs: Boolean): String
  5. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  6. val assertions: Option[Map[String, String]]
  7. val attributes: List[Attribute]
  8. lazy val attributesWithoutScript: List[Attribute]
    Annotations
    @JsonIgnore()
  9. def attributesWithoutScriptedFields(): List[Attribute]
  10. def bqSchema(schemaHandler: SchemaHandler): google.cloud.bigquery.Schema
  11. def checkValidity(domainMetaData: Option[Metadata], schemaHandler: SchemaHandler): Either[List[String], Boolean]

    Check attribute definition correctness :

    Check attribute definition correctness :

    • schema name should be a valid table identifier
    • attribute name should be a valid Hive column identifier
    • attribute name can occur only once in the schema
    returns

    error list or true

  12. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  13. val comment: Option[String]
  14. def continuousAttrs(schemaHandler: SchemaHandler): List[Attribute]
  15. def discreteAttrs(schemaHandler: SchemaHandler): List[Attribute]
  16. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  17. def finalSparkSchema(schemaHandler: SchemaHandler): StructType

    This Schema as a Spark Catalyst Schema, with renamed attributes

    This Schema as a Spark Catalyst Schema, with renamed attributes

    returns

    Spark Catalyst Schema

  18. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  19. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  20. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  21. def mapping(template: Option[String], domainName: String, schemaHandler: SchemaHandler)(implicit settings: Settings): String
  22. val merge: Option[MergeOptions]
  23. def mergedMetadata(domainMetadata: Option[Metadata]): Metadata
  24. val metadata: Option[Metadata]
  25. val name: String
  26. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  27. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  28. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  29. val pattern: Pattern
  30. val postsql: Option[List[String]]
  31. val presql: Option[List[String]]
  32. val primaryKey: Option[List[String]]
  33. def renamedAttributes(): List[(String, String)]

    return the list of renamed attributes

    return the list of renamed attributes

    returns

    list of tuples (oldname, newname)

  34. val rls: Option[List[RowLevelSecurity]]
  35. def sparkSchema(schemaHandler: SchemaHandler): StructType

    This Schema as a Spark Catalyst Schema

    This Schema as a Spark Catalyst Schema

    returns

    Spark Catalyst Schema

  36. def sparkSchemaWithoutScriptedFields(schemaHandler: SchemaHandler): StructType

    This Schema as a Spark Catalyst Schema, without scripted fields

    This Schema as a Spark Catalyst Schema, without scripted fields

    returns

    Spark Catalyst Schema

  37. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  38. val tags: Option[Set[String]]
  39. def validatePartitionColumns(): Boolean

    returns

    Are the parittions columns defined in the metadata valid column names

  40. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  41. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  42. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()

Inherited from Serializable

Inherited from Serializable

Inherited from Product

Inherited from Equals

Inherited from AnyRef

Inherited from Any

Ungrouped