class InferSchemaJob extends AnyRef
* Infers the schema of a given datapath, domain name, schema name.
- Alphabetic
- By Inheritance
- InferSchemaJob
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
createDataFrameWithFormat(lines: List[String], dataPath: String, header: Boolean): DataFrame
Create the dataframe with its associated format
Create the dataframe with its associated format
- lines
: list of lines read from file
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getDomainDirectoryName(path: Path): String
Get domain directory name
Get domain directory name
- path
: file path
- returns
the domain directory name
-
def
getFormatFile(lines: List[String]): String
Get format file by using the first and the last line of the dataset We use mapPartitionsWithIndex to retrieve these information to make sure that the first line really corresponds to the first line (same for the last)
Get format file by using the first and the last line of the dataset We use mapPartitionsWithIndex to retrieve these information to make sure that the first line really corresponds to the first line (same for the last)
- lines
: list of lines read from file
-
def
getSchemaPattern(path: Path): String
Get schema pattern
Get schema pattern
- path
: file path
- returns
the schema pattern
-
def
getSeparator(lines: List[String]): String
Get separator file by taking the character that appears the most in 10 lines of the dataset
Get separator file by taking the character that appears the most in 10 lines of the dataset
- lines
: list of lines read from file
- returns
the file separator
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
infer(domainName: String, schemaName: String, dataPath: String, savePath: String, header: Boolean): Try[Unit]
Just to force any spark job to implement its entry point using within the "run" method
Just to force any spark job to implement its entry point using within the "run" method
- returns
: Spark Session used for the job
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
- def name: String
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
readFile(path: Path): Dataset[String]
Read file without specifying the format
Read file without specifying the format
- path
: file path
- returns
a dataset of string that contains data file
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()