class Dictionary extends Serializable
Class that help build a dictionary either from tokenized text or from saved dictionary
- Alphabetic
- By Inheritance
- Dictionary
- Serializable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
- new Dictionary(directory: String)
- new Dictionary(sentences: Stream[Array[String]], vocabSize: Int)
- new Dictionary(words: Array[String], vocabSize: Int)
- new Dictionary(sentences: Iterator[Array[String]], vocabSize: Int)
- new Dictionary(dataset: RDD[Array[String]], vocabSize: Int)
- new Dictionary()
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[java.lang]
- Definition Classes
- AnyRef
- Annotations
- @native() @throws( ... )
-
def
discardVocab(): Array[String]
Return the array of all discarded words.
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[java.lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getDiscardSize(): Int
Selected words with top-k frequencies and discarded the remaining words.
Selected words with top-k frequencies and discarded the remaining words. Return the length of the discarded words.
-
def
getIndex(word: String): Int
return the encoding number of a word, if word does not existed in the dictionary, it will return the dictionary length as the default index.
-
def
getVocabSize(): Int
The length of the vocabulary
-
def
getWord(index: Int): String
return the word with regard to the index, if index is out of boundary, it will randomly return a word in the discarded word list.
return the word with regard to the index, if index is out of boundary, it will randomly return a word in the discarded word list. If discard word list is Empty, it will randomly return a word in the existed dictionary.
- def getWord(index: Double): String
- def getWord(index: Float): String
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- def index2Word(): Map[Int, String]
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
print(): Unit
print word-to-index dictionary
-
def
printDiscard(): Unit
print discard dictionary
-
def
save(saveFolder: String): Unit
Save the dictionary, discarded words to the saveFolder directory.
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
def
vocabulary(): Array[String]
Return the array of all selected words.
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @throws( ... )
-
def
word2Index(): Map[String, Int]
Word encoding by its index in the dictionary