case class Metadata(mode: Option[Mode] = None, format: Option[Format] = None, encoding: Option[String] = None, multiline: Option[Boolean] = None, array: Option[Boolean] = None, withHeader: Option[Boolean] = None, separator: Option[String] = None, quote: Option[String] = None, escape: Option[String] = None, write: Option[WriteMode] = None, partition: Option[Partition] = None, sink: Option[Sink] = None, ignore: Option[String] = None, clustering: Option[Seq[String]] = None, xml: Option[Map[String, String]] = None) extends Product with Serializable
Specify Schema properties. These properties may be specified at the schema or domain level Any property not specified at the schema level is taken from the one specified at the domain level or else the default value is returned.
- mode
: FILE mode by default. FILE and STREAM are the two accepted values. FILE is currently the only supported mode.
- format
: DSV by default. Supported file formats are :
- DSV : Delimiter-separated values file. Delimiter value iss specified in the "separator" field.
- POSITION : FIXED format file where values are located at an exact position in each line.
- SIMPLE_JSON : For optimisation purpose, we differentiate JSON with top level values from JSON with deep level fields. SIMPLE_JSON are JSON files with top level fields only.
- JSON : Deep JSON file. Use only when your json documents contain subdocuments, otherwise prefer to use SIMPLE_JSON since it is much faster.
- XML : XML files
- encoding
: UTF-8 if not specified.
- multiline
: are json objects on a single line or multiple line ? Single by default. false means single. false also means faster
- array
: Is the json stored as a single object array ? false by default. This means that by default we have on json document per line.
- withHeader
: does the dataset has a header ? true bu default
- separator
: the values delimiter, ';' by default value may be a multichar string starting from Spark3
- quote
: The String quote char, '"' by default
- escape
: escaping char '\' by default
- write
: Write mode, APPEND by default
- partition
: Partition columns, no partitioning by default
- sink
: should the dataset be indexed in elasticsearch after ingestion ?
- ignore
: Pattern to ignore or UDF to apply to ignore some lines
- clustering
: List of attributes to use for clustering
- xml
: com.databricks.spark.xml options to use (eq. rowTag)
- Alphabetic
- By Inheritance
- Metadata
- Serializable
- Serializable
- Product
- Equals
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
-
new
Metadata(mode: Option[Mode] = None, format: Option[Format] = None, encoding: Option[String] = None, multiline: Option[Boolean] = None, array: Option[Boolean] = None, withHeader: Option[Boolean] = None, separator: Option[String] = None, quote: Option[String] = None, escape: Option[String] = None, write: Option[WriteMode] = None, partition: Option[Partition] = None, sink: Option[Sink] = None, ignore: Option[String] = None, clustering: Option[Seq[String]] = None, xml: Option[Map[String, String]] = None)
- mode
: FILE mode by default. FILE and STREAM are the two accepted values. FILE is currently the only supported mode.
- format
: DSV by default. Supported file formats are :
- DSV : Delimiter-separated values file. Delimiter value iss specified in the "separator" field.
- POSITION : FIXED format file where values are located at an exact position in each line.
- SIMPLE_JSON : For optimisation purpose, we differentiate JSON with top level values from JSON with deep level fields. SIMPLE_JSON are JSON files with top level fields only.
- JSON : Deep JSON file. Use only when your json documents contain subdocuments, otherwise prefer to use SIMPLE_JSON since it is much faster.
- XML : XML files
- encoding
: UTF-8 if not specified.
- multiline
: are json objects on a single line or multiple line ? Single by default. false means single. false also means faster
- array
: Is the json stored as a single object array ? false by default. This means that by default we have on json document per line.
- withHeader
: does the dataset has a header ? true bu default
- separator
: the values delimiter, ';' by default value may be a multichar string starting from Spark3
- quote
: The String quote char, '"' by default
- escape
: escaping char '\' by default
- write
: Write mode, APPEND by default
- partition
: Partition columns, no partitioning by default
- sink
: should the dataset be indexed in elasticsearch after ingestion ?
- ignore
: Pattern to ignore or UDF to apply to ignore some lines
- clustering
: List of attributes to use for clustering
- xml
: com.databricks.spark.xml options to use (eq. rowTag)
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- val array: Option[Boolean]
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
- def checkValidity(schemaHandler: SchemaHandler): Either[List[String], Boolean]
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
- val clustering: Option[Seq[String]]
- val encoding: Option[String]
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- val escape: Option[String]
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
- val format: Option[Format]
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- def getEncoding(): String
- def getEscape(): String
- def getFormat(): Format
- def getMode(): Mode
- def getMultiline(): Boolean
-
def
getPartitionAttributes(): List[String]
- Annotations
- @JsonIgnore()
- def getQuote(): String
-
def
getSamplingStrategy(): Double
- Annotations
- @JsonIgnore()
- def getSeparator(): String
- def getSink(): Option[Sink]
- def getWrite(): WriteMode
- val ignore: Option[String]
-
def
import(child: Metadata): Metadata
Merge this metadata with its child.
Merge this metadata with its child. Any property defined at the child level overrides the one defined at this level This allow a schema to override the domain metadata attribute Applied to a Domain level metadata
- child
: Schema level metadata
- returns
the metadata resulting of the merge of the schema and the domain metadata.
- def isArray(): Boolean
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
- def isWithHeader(): Boolean
-
def
merge[T](parent: Option[T], child: Option[T]): Option[T]
Merge a single attribute
Merge a single attribute
- parent
: Domain level metadata attribute
- child
: Schema level metadata attribute
- returns
attribute if merge, the domain attribute otherwise.
- Attributes
- protected
- val mode: Option[Mode]
- val multiline: Option[Boolean]
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- val partition: Option[Partition]
- val quote: Option[String]
- val separator: Option[String]
- val sink: Option[Sink]
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- Metadata → AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
- val withHeader: Option[Boolean]
- val write: Option[WriteMode]
- val xml: Option[Map[String, String]]