object SchemaUtils
Utilities for working with Spark SQL component.
Linear Supertypes
Ordering
- Alphabetic
- By Inheritance
Inherited
- SchemaUtils
- AnyRef
- Any
- Hide All
- Show All
Visibility
- Public
- All
Type Members
- case class FieldWithOrder(field: StructField, order: Iterable[Any]) extends Product with Serializable
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- def appendFlattenedStructsToDataFrame(df: DataFrame, prefixForNewColumns: String): DataFrame
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
collectMaxElementSizes(rdd: RDD[Row], schema: StructType): Array[Int]
Collect max size of each element in DataFrame.
Collect max size of each element in DataFrame. For array -> max array size For vectors -> max vector size For simple types -> 1
- returns
array containing size of each element
- def collectVectorLikeTypes(flatSchema: StructType): Seq[Int]
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
expandedSchema(flatSchema: StructType, elemMaxSizes: Array[Int]): Seq[StructField]
Returns expanded schema
Returns expanded schema
- schema is represented as list of types
- all arrays are expanded into columns based on the longest one
- all vectors are expanded into columns based on the longest one
- flatSchema
flat schema of spark data frame
- returns
list of types with their positions
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
- def flattenDataFrame(df: DataFrame, flatSchema: StructType): DataFrame
- def flattenDataFrame(df: DataFrame): DataFrame
- def flattenSchema(df: DataFrame): StructType
- def flattenStructsInDataFrame(df: DataFrame): DataFrame
- def flattenStructsInSchema(schema: StructType, sourceColPrefix: Option[String] = None, targetColPrefix: Option[String] = None, nullable: Boolean = false): Seq[(StructField, String)]
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- def rowsToRowSchemas(df: DataFrame): Dataset[ArrayBuffer[FieldWithOrder]]
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()