public abstract class Corpus extends Object
| 限定符和类型 | 字段和说明 |
|---|---|
protected File |
cacheFile |
protected Config |
config |
protected Charset |
encoding |
protected boolean |
eoc |
protected int[] |
table |
protected int |
trainWords |
protected VocabWord[] |
vocab |
protected Map<String,Integer> |
vocabIndexMap |
protected int |
vocabMaxSize |
protected int |
vocabSize |
| 限定符和类型 | 方法和说明 |
|---|---|
protected int |
addWordToVocab(String word)
Adds a word to the vocabulary
|
void |
close()
Close the corpus and it cannot be read any more.
|
boolean |
endOfCorpus() |
int |
getTrainWords() |
VocabWord[] |
getVocab() |
Map<String,Integer> |
getVocabIndexMap() |
int |
getVocabSize() |
abstract String |
nextWord()
Read the next word from the corpus
|
int |
readWordIndex() |
void |
rewind(int numThreads,
int id)
reset current corpus to initial status
|
void |
shutdown() |
protected File cacheFile
protected Config config
protected int trainWords
protected int vocabSize
protected int vocabMaxSize
protected VocabWord[] vocab
protected boolean eoc
protected Charset encoding
protected int[] table
public Corpus(Config config) throws IOException
IOExceptionpublic Corpus(Corpus cloneSrc) throws IOException
IOExceptionpublic boolean endOfCorpus()
protected int addWordToVocab(String word)
word - public int getTrainWords()
public int getVocabSize()
public VocabWord[] getVocab()
public void rewind(int numThreads,
int id)
throws IOException
IOExceptionpublic int readWordIndex()
throws IOException
IOExceptionpublic abstract String nextWord() throws IOException
IOExceptionpublic void close()
throws IOException
IOExceptionpublic void shutdown()
throws IOException
IOExceptionCopyright © 2014–2017 码农场. All rights reserved.