class TextClassifier extends Serializable
This example use a (pre-trained GloVe embedding) to convert word to vector, and uses it to train a text classification model on the 20 Newsgroup dataset with 20 different categories. This model can achieve around 90% accuracy after 2 epochs training.
- Alphabetic
- By Inheritance
- TextClassifier
- Serializable
- Serializable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
- new TextClassifier(param: AbstractTextClassificationParams)
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
analyzeTexts(dataRdd: RDD[(String, Float)]): (Map[String, WordMeta], Map[Float, Array[Float]])
Go through the whole data set to gather some meta info for the tokens.
Go through the whole data set to gather some meta info for the tokens. Tokens would be discarded if the frequency ranking is less then maxWordsNum
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
buildModel(classNum: Int): Sequential[Float]
Return a text classification model with the specific num of class
-
def
buildWord2Vec(word2Meta: Map[String, WordMeta]): Map[Float, Array[Float]]
Load the pre-trained word2Vec
Load the pre-trained word2Vec
- returns
A map from word to vector
-
def
buildWord2VecWithIndex(word2Meta: Map[String, Int]): Map[Float, Array[Float]]
Load the pre-trained word2Vec
Load the pre-trained word2Vec
- returns
A map from word to vector
- var classNum: Int
-
def
clone(): AnyRef
- Attributes
- protected[java.lang]
- Definition Classes
- AnyRef
- Annotations
- @native() @throws( ... )
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[java.lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getData(sc: SparkContext): (Array[RDD[(Array[Array[Float]], Float)]], Map[String, WordMeta], Map[Float, Array[Float]])
Create train and val RDDs from input
- val gloveDir: String
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
- val log: Logger
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
- val textDataDir: String
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
def
train(): Unit
Start to train the text classification model
-
def
trainFromData(sc: SparkContext, rdds: Array[RDD[(Array[Array[Float]], Float)]]): Module[Float]
Train the text classification model with train and val RDDs
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @throws( ... )