public class TopicModel extends Object implements org.apache.hadoop.conf.Configurable, Iterable<MatrixSlice>
Matrix of counts of occurrences of (topic, term) pairs. Dividing
{code topicTermCount.viewRow(topic).get(term)} by the sum over the values for all terms in that
row yields p(term | topic). Instead dividing it by all topic columns for that term yields
p(topic | term).
Multithreading is enabled for the update(Matrix) method: this method is async, and
merely submits the matrix to a work queue. When all work has been submitted,
awaitTermination() should be called, which will block until updates have been
accumulated.| Constructor and Description |
|---|
TopicModel(org.apache.hadoop.conf.Configuration conf,
double eta,
double alpha,
String[] dictionary,
int numThreads,
double modelWeight,
org.apache.hadoop.fs.Path... modelpath) |
TopicModel(int numTopics,
int numTerms,
double eta,
double alpha,
Random random,
String[] dictionary,
int numThreads,
double modelWeight) |
TopicModel(int numTopics,
int numTerms,
double eta,
double alpha,
String[] dictionary,
double modelWeight) |
TopicModel(int numTopics,
int numTerms,
double eta,
double alpha,
String[] dictionary,
int numThreads,
double modelWeight) |
TopicModel(Matrix topicTermCounts,
double eta,
double alpha,
String[] dictionary,
int numThreads,
double modelWeight) |
TopicModel(Matrix topicTermCounts,
Vector topicSums,
double eta,
double alpha,
String[] dictionary,
double modelWeight) |
TopicModel(Matrix topicTermCounts,
Vector topicSums,
double eta,
double alpha,
String[] dictionary,
int numThreads,
double modelWeight) |
| Modifier and Type | Method and Description |
|---|---|
org.apache.hadoop.conf.Configuration |
getConf() |
int |
getNumTerms() |
int |
getNumTopics() |
Vector |
infer(Vector original,
Vector docTopics) |
Iterator<MatrixSlice> |
iterator() |
static Pair<Matrix,Vector> |
loadModel(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path... modelPaths) |
double |
perplexity(Vector document,
Vector docTopics)
\(sum_x sum_a (c_ai * log(p(x|i) * p(a|x)))\)
|
void |
persist(org.apache.hadoop.fs.Path outputDir,
boolean overwrite) |
void |
renormalize() |
void |
reset() |
int |
sampleTerm(int topic) |
int |
sampleTerm(Vector topicDistribution) |
void |
setConf(org.apache.hadoop.conf.Configuration configuration) |
void |
stop() |
Vector |
topicSums() |
String |
toString() |
void |
trainDocTopicModel(Vector original,
Vector topics,
Matrix docTopicModel) |
void |
update(int termId,
Vector topicCounts) |
void |
update(Matrix docTopicCounts) |
void |
updateTopic(int topic,
Vector docTopicCounts) |
static String |
vectorToSortedString(Vector vector,
String[] dictionary) |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, waitforEach, spliteratorpublic TopicModel(int numTopics,
int numTerms,
double eta,
double alpha,
String[] dictionary,
double modelWeight)
public TopicModel(org.apache.hadoop.conf.Configuration conf,
double eta,
double alpha,
String[] dictionary,
int numThreads,
double modelWeight,
org.apache.hadoop.fs.Path... modelpath)
throws IOException
IOExceptionpublic TopicModel(int numTopics,
int numTerms,
double eta,
double alpha,
String[] dictionary,
int numThreads,
double modelWeight)
public TopicModel(int numTopics,
int numTerms,
double eta,
double alpha,
Random random,
String[] dictionary,
int numThreads,
double modelWeight)
public TopicModel(Matrix topicTermCounts, Vector topicSums, double eta, double alpha, String[] dictionary, double modelWeight)
public TopicModel(Matrix topicTermCounts, double eta, double alpha, String[] dictionary, int numThreads, double modelWeight)
public int getNumTerms()
public int getNumTopics()
public Iterator<MatrixSlice> iterator()
iterator in interface Iterable<MatrixSlice>public Vector topicSums()
public static Pair<Matrix,Vector> loadModel(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path... modelPaths) throws IOException
IOExceptionpublic int sampleTerm(Vector topicDistribution)
public int sampleTerm(int topic)
public void reset()
public void stop()
public void renormalize()
public void trainDocTopicModel(Vector original, Vector topics, Matrix docTopicModel)
public void update(Matrix docTopicCounts)
public void updateTopic(int topic,
Vector docTopicCounts)
public void update(int termId,
Vector topicCounts)
public void persist(org.apache.hadoop.fs.Path outputDir,
boolean overwrite)
throws IOException
IOExceptionpublic double perplexity(Vector document, Vector docTopics)
public void setConf(org.apache.hadoop.conf.Configuration configuration)
setConf in interface org.apache.hadoop.conf.Configurablepublic org.apache.hadoop.conf.Configuration getConf()
getConf in interface org.apache.hadoop.conf.ConfigurableCopyright © 2008–2017 The Apache Software Foundation. All rights reserved.