public class Word2VecTrainer extends Object
| 构造器和说明 |
|---|
Word2VecTrainer() |
| 限定符和类型 | 方法和说明 |
|---|---|
void |
setCallback(TrainingCallback callback)
设置训练回调
|
Word2VecTrainer |
setDownSamplingRate(float downSampleRate)
设置高频词的下采样频率(高频词频率一旦高于此频率,训练时将被随机忽略),在不使用停用词词典的情况下,停用词就符合高频词的标准
默认 1e-3, 常用取值区间为 (0, 1e-5)
|
Word2VecTrainer |
setInitialLearningRate(float initialLearningRate)
设置初始学习率
skip-gram 默认 0.025 ,CBOW 默认 0.05
|
Word2VecTrainer |
setLayerSize(int layerSize)
词向量的维度(等同于神经网络模型隐藏层的大小)
默认 100
|
Word2VecTrainer |
setMinVocabFrequency(int minFrequency)
最低词频,低于此数值将被过滤掉
默认 5
|
Word2VecTrainer |
setNumIterations(int iterations)
设置迭代次数
|
Word2VecTrainer |
setWindowSize(int windowSize)
窗口大小
默认 5
|
WordVectorModel |
train(String trainFileName,
String modelFileName)
执行训练
|
Word2VecTrainer |
type(NeuralNetworkType type)
神经网络类型
|
Word2VecTrainer |
useHierarchicalSoftmax()
启用 hierarchical softmax
默认关闭
|
Word2VecTrainer |
useNegativeSamples(int negativeSamples)
负采样样本数
一般在 5 到 10 之间
默认 0
|
Word2VecTrainer |
useNumThreads(int numThreads)
并行化训练线程数
默认
Runtime.availableProcessors() |
public void setCallback(TrainingCallback callback)
callback - 回调接口public Word2VecTrainer setLayerSize(int layerSize)
默认 100
public Word2VecTrainer setWindowSize(int windowSize)
默认 5
public Word2VecTrainer useNumThreads(int numThreads)
public Word2VecTrainer type(NeuralNetworkType type)
NeuralNetworkType}
默认 {@link NeuralNetworkType#SKIP_GRAM}
public Word2VecTrainer useHierarchicalSoftmax()
默认关闭
public Word2VecTrainer useNegativeSamples(int negativeSamples)
默认 0
public Word2VecTrainer setMinVocabFrequency(int minFrequency)
默认 5
public Word2VecTrainer setInitialLearningRate(float initialLearningRate)
skip-gram 默认 0.025 ,CBOW 默认 0.05
public Word2VecTrainer setDownSamplingRate(float downSampleRate)
默认 1e-3, 常用取值区间为 (0, 1e-5)
public Word2VecTrainer setNumIterations(int iterations)
public WordVectorModel train(String trainFileName, String modelFileName)
trainFileName - 输入语料文件modelFileName - 输出模型路径Copyright © 2014–2018 码农场. All rights reserved.