protected class REPTree.Tree extends java.lang.Object implements java.io.Serializable, RevisionHandler
| Modifier and Type | Field and Description |
|---|---|
protected int |
m_Attribute
The attribute to split on.
|
protected double[] |
m_ClassProbs
Class probabilities from the training data in the nominal case.
|
protected double[] |
m_Distribution
The (unnormalized) class distribution in the nominal case.
|
protected double[] |
m_HoldOutDist
Class distribution of hold-out set at node in the nominal case.
|
protected double |
m_HoldOutError
The hold-out error of the node.
|
protected Instances |
m_Info
The header information (for printing the tree).
|
protected double[] |
m_Prop
The proportions of training instances going down each branch.
|
protected double |
m_SplitPoint
The split point.
|
protected REPTree.Tree[] |
m_Successors
The subtrees of this tree.
|
| Modifier | Constructor and Description |
|---|---|
protected |
Tree() |
| Modifier and Type | Method and Description |
|---|---|
protected void |
backfitHoldOutSet()
Backfits data from holdout set.
|
protected void |
buildTree(int[][][] sortedIndices,
double[][][] weights,
Instances data,
double totalWeight,
double[] classProbs,
Instances header,
double minNum,
double minVariance,
int depth,
int maxDepth)
Recursively generates a tree.
|
protected double |
distribution(double[][] props,
double[][][] dists,
int att,
int[] sortedIndices,
double[] weights,
double[][] subsetWeights,
Instances data)
Computes class distribution for an attribute.
|
protected double[] |
distributionForInstance(Instance instance)
Computes class distribution of an instance using the tree.
|
protected void |
doSmoothing()
Smoothes class probabilities stored at node.
|
protected double |
gain(double[][] dist,
double priorVal)
Computes value of splitting criterion after split.
|
java.lang.String |
getRevision()
Returns the revision string.
|
protected void |
insertHoldOutInstance(Instance inst,
double weight,
REPTree.Tree parent)
Inserts an instance from the hold-out set into the tree.
|
protected void |
insertHoldOutSet(Instances data)
Inserts hold-out set into tree.
|
protected java.lang.String |
leafString(REPTree.Tree parent)
Outputs description of a leaf node.
|
protected double |
numericDistribution(double[][] props,
double[][][] dists,
int att,
int[] sortedIndices,
double[] weights,
double[][] subsetWeights,
Instances data,
double[] vals)
Computes class distribution for an attribute.
|
protected int |
numNodes()
Computes size of the tree.
|
protected double |
priorVal(double[][] dist)
Computes value of splitting criterion before split.
|
protected double |
reducedErrorPrune()
Prunes the tree using the hold-out data (bottom-up).
|
protected double |
singleVariance(double s,
double sS,
double weight)
Computes the variance for a single set
|
java.lang.String |
sourceExpression(int index)
Returns a string containing java source code equivalent to the test made at this node.
|
protected void |
splitData(int[][][][] subsetIndices,
double[][][][] subsetWeights,
int att,
double splitPoint,
int[][] sortedIndices,
double[][] weights,
Instances data)
Splits instances into subsets.
|
protected int |
toGraph(java.lang.StringBuffer text,
int num,
REPTree.Tree parent)
Outputs one node for graph.
|
java.lang.StringBuffer[] |
toSource(java.lang.String className,
REPTree.Tree parent)
Returns source code for the tree as if-then statements.
|
protected java.lang.String |
toString(int level,
REPTree.Tree parent)
Recursively outputs the tree.
|
protected double |
variance(double[] s,
double[] sS,
double[] sumOfWeights)
Computes variance for subsets.
|
protected Instances m_Info
protected REPTree.Tree[] m_Successors
protected int m_Attribute
protected double m_SplitPoint
protected double[] m_Prop
protected double[] m_ClassProbs
protected double[] m_Distribution
protected double[] m_HoldOutDist
protected double m_HoldOutError
protected double[] distributionForInstance(Instance instance) throws java.lang.Exception
instance - the instance to compute the distribution forjava.lang.Exception - if computation failspublic final java.lang.String sourceExpression(int index)
index - index of the value testedpublic java.lang.StringBuffer[] toSource(java.lang.String className,
REPTree.Tree parent)
throws java.lang.Exception
className - the classname that this static classifier hasparent - parent node of the current nodejava.lang.Exception - if something goes wrongprotected int toGraph(java.lang.StringBuffer text,
int num,
REPTree.Tree parent)
throws java.lang.Exception
text - the buffer to append the output tonum - the current node idparent - the parent of the nodesjava.lang.Exception - if something goes wrongprotected java.lang.String leafString(REPTree.Tree parent) throws java.lang.Exception
parent - the parent of the nodejava.lang.Exception - if generation failsprotected java.lang.String toString(int level,
REPTree.Tree parent)
level - the current levelparent - the current parentprotected void buildTree(int[][][] sortedIndices,
double[][][] weights,
Instances data,
double totalWeight,
double[] classProbs,
Instances header,
double minNum,
double minVariance,
int depth,
int maxDepth)
throws java.lang.Exception
sortedIndices - the sorted indices of the instancesweights - the weights of the instancesdata - the data to work withtotalWeight - classProbs - the class probabilitiesheader - the header of the dataminNum - the minimum number of instances in a leafminVariance - depth - the current depth of the treemaxDepth - the maximum allowed depth of the treejava.lang.Exception - if generation failsprotected void doSmoothing()
protected int numNodes()
protected void splitData(int[][][][] subsetIndices,
double[][][][] subsetWeights,
int att,
double splitPoint,
int[][] sortedIndices,
double[][] weights,
Instances data)
throws java.lang.Exception
subsetIndices - the sorted indices in the subsetsubsetWeights - the weights of the subsetatt - the attribute indexsplitPoint - the split point for numeric attributessortedIndices - the sorted indices of the whole setweights - the weights of the whole setdata - the data to work withjava.lang.Exception - if something goes wrongprotected double distribution(double[][] props,
double[][][] dists,
int att,
int[] sortedIndices,
double[] weights,
double[][] subsetWeights,
Instances data)
throws java.lang.Exception
props - dists - att - the attribute indexsortedIndices - the sorted indices of the instancesweights - the weights of the instancessubsetWeights - the weights of the subsetdata - the data to work withjava.lang.Exception - if computation failsprotected double numericDistribution(double[][] props,
double[][][] dists,
int att,
int[] sortedIndices,
double[] weights,
double[][] subsetWeights,
Instances data,
double[] vals)
throws java.lang.Exception
props - dists - att - the attribute indexsortedIndices - the sorted indices of the instancesweights - the weights of the instancessubsetWeights - the weights of the subsetdata - the data to work withvals - java.lang.Exception - if computation failsprotected double variance(double[] s,
double[] sS,
double[] sumOfWeights)
s - sS - sumOfWeights - protected double singleVariance(double s,
double sS,
double weight)
s - sS - weight - the weightprotected double priorVal(double[][] dist)
throws java.lang.InterruptedException
dist - java.lang.InterruptedExceptionprotected double gain(double[][] dist,
double priorVal)
dist - priorVal - the splitting criterionprotected double reducedErrorPrune()
throws java.lang.Exception
java.lang.Exception - if pruning fails for some reasonprotected void insertHoldOutSet(Instances data) throws java.lang.Exception
data - the data to insertjava.lang.Exception - if something goes wrongprotected void insertHoldOutInstance(Instance inst, double weight, REPTree.Tree parent) throws java.lang.Exception
inst - the instance to insertweight - the weight of the instanceparent - the parent of the nodejava.lang.Exception - if insertion failsprotected void backfitHoldOutSet()
throws java.lang.Exception
java.lang.Exception - if insertion failspublic java.lang.String getRevision()
getRevision in interface RevisionHandler