public static final class InfogramV3.InfogramParametersV3 extends water.api.schemas3.ModelParametersSchemaV3<InfogramModel.InfogramParameters,InfogramV3.InfogramParametersV3>
| Modifier and Type | Field and Description |
|---|---|
InfogramModel.InfogramParameters.Algorithm |
algorithm |
java.lang.String |
algorithm_params |
boolean |
balance_classes
For imbalanced data, balance training data class counts via
over/under-sampling.
|
float[] |
class_sampling_factors
Desired over/under-sampling ratios per class (lexicographic order).
|
double |
data_fraction |
static java.lang.String[] |
fields |
float |
max_after_balance_size
When classes are balanced, limit the resulting dataset size to the
specified multiple of the original dataset size.
|
int |
max_iterations |
double |
net_information_threshold |
water.api.schemas3.KeyV3.FrameKeyV3 |
plug_values |
double |
prior |
java.lang.String[] |
protected_columns |
double |
relevance_index_threshold |
double |
safety_index_threshold |
long |
seed |
boolean |
standardize |
int |
top_n_features |
double |
total_information_threshold |
auc_type, categorical_encoding, checkpoint, custom_distribution_func, custom_metric_func, distribution, export_checkpoints_dir, fold_assignment, fold_column, gainslift_bins, huber_alpha, ignore_const_cols, ignored_columns, keep_cross_validation_fold_assignment, keep_cross_validation_models, keep_cross_validation_predictions, max_categorical_levels, max_runtime_secs, model_id, nfolds, offset_column, parallelize_cross_validation, quantile_alpha, response_column, score_each_iteration, stopping_metric, stopping_rounds, stopping_tolerance, training_frame, tweedie_power, validation_frame, weights_column| Constructor and Description |
|---|
InfogramParametersV3() |
| Modifier and Type | Method and Description |
|---|---|
InfogramModel.InfogramParameters |
fillImpl(InfogramModel.InfogramParameters impl) |
static void |
generateModelParams(InfogramModel.InfogramParameters parms,
java.util.Properties p,
java.util.ArrayList<java.lang.String> excludeList) |
append_field_arrays, extractDeclaredApiParameters, fields, fillFromImpl, getAdditionalParameters, writeParametersJSONcreateAndFillImpl, createImpl, extractVersionFromSchemaName, fillFromAny, fillFromBody, fillFromImpl, fillFromImpl, fillFromParms, fillFromParms, fillFromParms, fillImpl, getImplClass, getImplClass, getSchemaName, getSchemaType, getSchemaVersion, init_meta, markdown, markdown, newInstance, newInstance, setField, setSchemaType_doNotCallpublic static final java.lang.String[] fields
@API(help="Seed for pseudo random number generator (if applicable).",
gridable=true)
public long seed
@API(help="Standardize numeric columns to have zero mean and unit variance.",
level=critical)
public boolean standardize
@API(help="Plug Values (a single row frame containing values that will be used to impute missing values of the training/validation frame, use with conjunction missing_values_handling = PlugValues).",
direction=INPUT)
public water.api.schemas3.KeyV3.FrameKeyV3 plug_values
@API(help="Maximum number of iterations.",
level=secondary)
public int max_iterations
@API(help="Prior probability for y==1. To be used only for logistic regression iff the data has been sampled and the mean of response does not reflect reality.",
level=expert)
public double prior
@API(help="Balance training data class counts via over/under-sampling (for imbalanced data).",
level=secondary,
direction=INOUT)
public boolean balance_classes
@API(help="Desired over/under-sampling ratios per class (in lexicographic order). If not specified, sampling factors will be automatically computed to obtain class balance during training. Requires balance_classes.",
level=expert,
direction=INOUT)
public float[] class_sampling_factors
@API(help="Maximum relative size of the training data after balancing class counts (can be less than 1.0). Requires balance_classes.",
level=expert,
direction=INOUT)
public float max_after_balance_size
@API(level=critical,
direction=INOUT,
valuesProvider=InfogramV3.InfogramAlrogithmProvider.class,
help="Type of machine learning algorithm used to build the infogram. Options include \'AUTO\' (gbm), \'deeplearning\' (Deep Learning with default parameters), \'drf\' (Random Forest with default parameters), \'gbm\' (GBM with default parameters), \'glm\' (GLM with default parameters), or \'xgboost\' (if available, XGBoost with default parameters).")
public InfogramModel.InfogramParameters.Algorithm algorithm
@API(help="Customized parameters for the machine learning algorithm specified in the algorithm parameter.",
level=expert,
gridable=true)
public java.lang.String algorithm_params
@API(help="Columns that contain features that are sensitive and need to be protected (legally, or otherwise), if applicable. These features (e.g. race, gender, etc) should not drive the prediction of the response.",
level=secondary,
gridable=true)
public java.lang.String[] protected_columns
@API(help="A number between 0 and 1 representing a threshold for total information, defaulting to 0.1. For a specific feature, if the total information is higher than this threshold, and the corresponding net information is also higher than the threshold ``net_information_threshold``, that feature will be considered admissible. The total information is the x-axis of the Core Infogram. Default is -1 which gets set to 0.1.",
level=secondary,
gridable=true)
public double total_information_threshold
@API(help="A number between 0 and 1 representing a threshold for net information, defaulting to 0.1. For a specific feature, if the net information is higher than this threshold, and the corresponding total information is also higher than the total_information_threshold, that feature will be considered admissible. The net information is the y-axis of the Core Infogram. Default is -1 which gets set to 0.1.",
level=secondary,
gridable=true)
public double net_information_threshold
@API(help="A number between 0 and 1 representing a threshold for the relevance index, defaulting to 0.1. This is only used when ``protected_columns`` is set by the user. For a specific feature, if the relevance index value is higher than this threshold, and the corresponding safety index is also higher than the safety_index_threshold``, that feature will be considered admissible. The relevance index is the x-axis of the Fair Infogram. Default is -1 which gets set to 0.1.",
level=secondary,
gridable=true)
public double relevance_index_threshold
@API(help="A number between 0 and 1 representing a threshold for the safety index, defaulting to 0.1. This is only used when protected_columns is set by the user. For a specific feature, if the safety index value is higher than this threshold, and the corresponding relevance index is also higher than the relevance_index_threshold, that feature will be considered admissible. The safety index is the y-axis of the Fair Infogram. Default is -1 which gets set to 0.1.",
level=secondary,
gridable=true)
public double safety_index_threshold
@API(help="The fraction of training frame to use to build the infogram model. Defaults to 1.0, and any value greater than 0 and less than or equal to 1.0 is acceptable.",
level=secondary,
gridable=true)
public double data_fraction
@API(help="An integer specifying the number of columns to evaluate in the infogram. The columns are ranked by variable importance, and the top N are evaluated. Defaults to 50.",
level=secondary,
gridable=true)
public int top_n_features
public InfogramModel.InfogramParameters fillImpl(InfogramModel.InfogramParameters impl)
fillImpl in class water.api.schemas3.ModelParametersSchemaV3<InfogramModel.InfogramParameters,InfogramV3.InfogramParametersV3>public static void generateModelParams(InfogramModel.InfogramParameters parms, java.util.Properties p, java.util.ArrayList<java.lang.String> excludeList)