public class DataInfo
extends water.Keyed
| Modifier and Type | Class and Description |
|---|---|
class |
DataInfo.Row |
class |
DataInfo.Rows |
static class |
DataInfo.TransformType |
| Modifier and Type | Field and Description |
|---|---|
int[] |
_activeCols |
water.fvec.Frame |
_adaptedFrame |
int[][] |
_catLvls |
int[] |
_catMissing |
int[] |
_catModes |
int[] |
_catOffsets |
int |
_cats |
boolean |
_fold |
boolean |
_imputeMissing |
boolean |
_intercept |
double[] |
_normMul |
double[] |
_normRespMul |
double[] |
_normRespSub |
double[] |
_normSub |
double[] |
_numMeans |
int |
_nums |
boolean |
_offset |
int[] |
_permutation |
DataInfo.TransformType |
_predictor_transform |
DataInfo.TransformType |
_response_transform |
int |
_responses |
boolean |
_skipMissing |
boolean |
_useAllFactorLevels |
boolean |
_valid |
boolean |
_weights |
| Constructor and Description |
|---|
DataInfo(water.Key selfKey,
water.fvec.Frame train,
water.fvec.Frame valid,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket) |
DataInfo(water.Key selfKey,
water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold) |
DataInfo(water.Key selfKey,
water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold,
boolean intercept) |
| Modifier and Type | Method and Description |
|---|---|
protected long |
checksum_impl() |
java.lang.String[] |
coefNames() |
double |
computeSparseOffset(double[] coefficients) |
DataInfo |
deep_clone() |
double[] |
denormalizeBeta(double[] beta) |
void |
dropWeights() |
DataInfo.Row |
extractDenseRow(water.fvec.Chunk[] chunks,
int rid,
DataInfo.Row row) |
DataInfo.Row[] |
extractDenseRowsVertical(water.fvec.Chunk[] chunks)
Extract (dense) rows from given chunks, one Vec at a time - should be slightly faster than per-row
|
DataInfo.Row[] |
extractSparseRows(water.fvec.Chunk[] chunks,
double offset)
Extract (sparse) rows from given chunks.
|
DataInfo |
filterExpandedColumns(int[] cols) |
int |
foldChunkId() |
int |
fullN() |
int |
getCategoricalId(int cid,
int val) |
water.fvec.Vec |
getWeightsVec() |
static int |
imputeCat(water.fvec.Vec v) |
int |
largestCat() |
static DataInfo |
makeEmpty(int fullN) |
int[] |
mapNames(java.lang.String[] names) |
DataInfo.Row |
newDenseRow() |
DataInfo.Row |
newDenseRow(double[] numVals) |
int |
numStart() |
int |
offsetChunkId() |
int |
responseChunkId() |
DataInfo.Rows |
rows(water.fvec.Chunk[] chks) |
DataInfo.Rows |
rows(water.fvec.Chunk[] chks,
boolean sparse) |
void |
setPredictorTransform(DataInfo.TransformType t) |
void |
setResponseTransform(DataInfo.TransformType t) |
water.fvec.Vec |
setWeights(java.lang.String name,
water.fvec.Vec vec) |
void |
unScaleNumericals(double[] in,
double[] out)
Undo the standardization/normalization of numerical columns
|
void |
unScaleResponses(double[] in,
double[] out)
Undo the standardization/normalization of numerical columns
|
void |
updateWeightedSigmaAndMean(double[] sigmas,
double[] mean) |
DataInfo |
validDinfo(water.fvec.Frame valid) |
int |
weightChunkId() |
checksum, getBinarySerializer, getPublishedKeys, remove_impl, remove, remove, remove, removepublic int[] _activeCols
public water.fvec.Frame _adaptedFrame
public int _responses
public DataInfo.TransformType _predictor_transform
public DataInfo.TransformType _response_transform
public boolean _useAllFactorLevels
public int _nums
public int _cats
public int[] _catOffsets
public int[] _catMissing
public int[] _catModes
public int[] _permutation
public double[] _normMul
public double[] _normSub
public double[] _normRespMul
public double[] _normRespSub
public double[] _numMeans
public boolean _intercept
public final boolean _offset
public boolean _weights
public final boolean _fold
public final boolean _skipMissing
public final boolean _imputeMissing
public boolean _valid
public final int[][] _catLvls
public DataInfo(water.Key selfKey,
water.fvec.Frame train,
water.fvec.Frame valid,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket)
public DataInfo(water.Key selfKey,
water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold)
public DataInfo(water.Key selfKey,
water.fvec.Frame train,
water.fvec.Frame valid,
int nResponses,
boolean useAllFactorLevels,
DataInfo.TransformType predictor_transform,
DataInfo.TransformType response_transform,
boolean skipMissing,
boolean imputeMissing,
boolean missingBucket,
boolean weight,
boolean offset,
boolean fold,
boolean intercept)
public water.fvec.Vec setWeights(java.lang.String name,
water.fvec.Vec vec)
public void dropWeights()
public int responseChunkId()
public int foldChunkId()
public int offsetChunkId()
public int weightChunkId()
protected long checksum_impl()
checksum_impl in class water.Keyedpublic DataInfo deep_clone()
public static DataInfo makeEmpty(int fullN)
public DataInfo validDinfo(water.fvec.Frame valid)
public double[] denormalizeBeta(double[] beta)
public static int imputeCat(water.fvec.Vec v)
public DataInfo filterExpandedColumns(int[] cols)
public void updateWeightedSigmaAndMean(double[] sigmas,
double[] mean)
public void setPredictorTransform(DataInfo.TransformType t)
public void setResponseTransform(DataInfo.TransformType t)
public final int fullN()
public final int largestCat()
public final int numStart()
public final java.lang.String[] coefNames()
public int[] mapNames(java.lang.String[] names)
public final void unScaleNumericals(double[] in,
double[] out)
in - input valuesout - output values (can be the same as input)public final void unScaleResponses(double[] in,
double[] out)
in - input valuesout - output values (can be the same as input)public final int getCategoricalId(int cid,
int val)
public final DataInfo.Row extractDenseRow(water.fvec.Chunk[] chunks, int rid, DataInfo.Row row)
public water.fvec.Vec getWeightsVec()
public DataInfo.Row newDenseRow()
public DataInfo.Row newDenseRow(double[] numVals)
public double computeSparseOffset(double[] coefficients)
public DataInfo.Rows rows(water.fvec.Chunk[] chks)
public DataInfo.Rows rows(water.fvec.Chunk[] chks, boolean sparse)
public final DataInfo.Row[] extractSparseRows(water.fvec.Chunk[] chunks, double offset)
chunks - - chunk of datasetoffset - - adjustment for 0s if running with on-the-fly standardization (i.e. zeros are not really zeros because of centering)public final DataInfo.Row[] extractDenseRowsVertical(water.fvec.Chunk[] chunks)
chunks - - chunk of dataset