Struct TrainingOptions

Source

pub struct TrainingOptions {Show 101 fields
    pub activation_fn: Option<String>,
    pub adjust_step_changes: Option<bool>,
    pub approx_global_feature_contrib: Option<bool>,
    pub auto_arima: Option<bool>,
    pub auto_arima_max_order: Option<i64>,
    pub auto_arima_min_order: Option<i64>,
    pub auto_class_weights: Option<bool>,
    pub batch_size: Option<i64>,
    pub booster_type: Option<String>,
    pub budget_hours: Option<f64>,
    pub calculate_p_values: Option<bool>,
    pub category_encoding_method: Option<String>,
    pub clean_spikes_and_dips: Option<bool>,
    pub color_space: Option<String>,
    pub colsample_bylevel: Option<f64>,
    pub colsample_bynode: Option<f64>,
    pub colsample_bytree: Option<f64>,
    pub contribution_metric: Option<String>,
    pub dart_normalize_type: Option<String>,
    pub data_frequency: Option<String>,
    pub data_split_column: Option<String>,
    pub data_split_eval_fraction: Option<f64>,
    pub data_split_method: Option<String>,
    pub decompose_time_series: Option<bool>,
    pub dimension_id_columns: Option<Vec<String>>,
    pub distance_type: Option<String>,
    pub dropout: Option<f64>,
    pub early_stop: Option<bool>,
    pub enable_global_explain: Option<bool>,
    pub endpoint_idle_ttl: Option<Duration>,
    pub feedback_type: Option<String>,
    pub fit_intercept: Option<bool>,
    pub forecast_limit_lower_bound: Option<f64>,
    pub forecast_limit_upper_bound: Option<f64>,
    pub hidden_units: Option<Vec<i64>>,
    pub holiday_region: Option<String>,
    pub holiday_regions: Option<Vec<String>>,
    pub horizon: Option<i64>,
    pub hparam_tuning_objectives: Option<Vec<String>>,
    pub hugging_face_model_id: Option<String>,
    pub include_drift: Option<bool>,
    pub initial_learn_rate: Option<f64>,
    pub input_label_columns: Option<Vec<String>>,
    pub instance_weight_column: Option<String>,
    pub integrated_gradients_num_steps: Option<i64>,
    pub is_test_column: Option<String>,
    pub item_column: Option<String>,
    pub kmeans_initialization_column: Option<String>,
    pub kmeans_initialization_method: Option<String>,
    pub l1_reg_activation: Option<f64>,
    pub l1_regularization: Option<f64>,
    pub l2_regularization: Option<f64>,
    pub label_class_weights: Option<HashMap<String, f64>>,
    pub learn_rate: Option<f64>,
    pub learn_rate_strategy: Option<String>,
    pub loss_type: Option<String>,
    pub machine_type: Option<String>,
    pub max_iterations: Option<i64>,
    pub max_parallel_trials: Option<i64>,
    pub max_replica_count: Option<i64>,
    pub max_time_series_length: Option<i64>,
    pub max_tree_depth: Option<i64>,
    pub min_apriori_support: Option<f64>,
    pub min_relative_progress: Option<f64>,
    pub min_replica_count: Option<i64>,
    pub min_split_loss: Option<f64>,
    pub min_time_series_length: Option<i64>,
    pub min_tree_child_weight: Option<i64>,
    pub model_garden_model_name: Option<String>,
    pub model_registry: Option<String>,
    pub model_uri: Option<String>,
    pub non_seasonal_order: Option<ArimaOrder>,
    pub num_clusters: Option<i64>,
    pub num_factors: Option<i64>,
    pub num_parallel_tree: Option<i64>,
    pub num_principal_components: Option<i64>,
    pub num_trials: Option<i64>,
    pub optimization_strategy: Option<String>,
    pub optimizer: Option<String>,
    pub pca_explained_variance_ratio: Option<f64>,
    pub pca_solver: Option<String>,
    pub reservation_affinity_key: Option<String>,
    pub reservation_affinity_type: Option<String>,
    pub reservation_affinity_values: Option<Vec<String>>,
    pub sampled_shapley_num_paths: Option<i64>,
    pub scale_features: Option<bool>,
    pub standardize_features: Option<bool>,
    pub subsample: Option<f64>,
    pub tf_version: Option<String>,
    pub time_series_data_column: Option<String>,
    pub time_series_id_column: Option<String>,
    pub time_series_id_columns: Option<Vec<String>>,
    pub time_series_length_fraction: Option<f64>,
    pub time_series_timestamp_column: Option<String>,
    pub tree_method: Option<String>,
    pub trend_smoothing_window_size: Option<i64>,
    pub user_column: Option<String>,
    pub vertex_ai_model_version_aliases: Option<Vec<String>>,
    pub wals_alpha: Option<f64>,
    pub warm_start: Option<bool>,
    pub xgboost_version: Option<String>,
}

Expand description

Options used in model training.

This type is not used in any activity, and only used as part of another schema.

Fields§

§activation_fn: Option<String>

Activation function of the neural nets.

§adjust_step_changes: Option<bool>

If true, detect step changes and make data adjustment in the input time series.

§approx_global_feature_contrib: Option<bool>

Whether to use approximate feature contribution method in XGBoost model explanation for global explain.

§auto_arima: Option<bool>

Whether to enable auto ARIMA or not.

§auto_arima_max_order: Option<i64>

The max value of the sum of non-seasonal p and q.

§auto_arima_min_order: Option<i64>

The min value of the sum of non-seasonal p and q.

§auto_class_weights: Option<bool>

Whether to calculate class weights automatically based on the popularity of each label.

§batch_size: Option<i64>

Batch size for dnn models.

§booster_type: Option<String>

Booster type for boosted tree models.

§budget_hours: Option<f64>

Budget in hours for AutoML training.

§calculate_p_values: Option<bool>

Whether or not p-value test should be computed for this model. Only available for linear and logistic regression models.

§category_encoding_method: Option<String>

Categorical feature encoding method.

§clean_spikes_and_dips: Option<bool>

If true, clean spikes and dips in the input time series.

§color_space: Option<String>

Enums for color space, used for processing images in Object Table. See more details at https://www.tensorflow.org/io/tutorials/colorspace.

§colsample_bylevel: Option<f64>

Subsample ratio of columns for each level for boosted tree models.

§colsample_bynode: Option<f64>

Subsample ratio of columns for each node(split) for boosted tree models.

§colsample_bytree: Option<f64>

Subsample ratio of columns when constructing each tree for boosted tree models.

§contribution_metric: Option<String>

The contribution metric. Applies to contribution analysis models. Allowed formats supported are for summable and summable ratio contribution metrics. These include expressions such as SUM(x) or SUM(x)/SUM(y), where x and y are column names from the base table.

§dart_normalize_type: Option<String>

Type of normalization algorithm for boosted tree models using dart booster.

§data_frequency: Option<String>

The data frequency of a time series.

§data_split_column: Option<String>

The column to split data with. This column won’t be used as a feature. 1. When data_split_method is CUSTOM, the corresponding column should be boolean. The rows with true value tag are eval data, and the false are training data. 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION rows (from smallest to largest) in the corresponding column are used as training data, and the rest are eval data. It respects the order in Orderable data types: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data_type_properties

§data_split_eval_fraction: Option<f64>

The fraction of evaluation data over the whole input data. The rest of data will be used as training data. The format should be double. Accurate to two decimal places. Default value is 0.2.

§data_split_method: Option<String>

The data split type for training and evaluation, e.g. RANDOM.

§decompose_time_series: Option<bool>

If true, perform decompose time series and save the results.

§dimension_id_columns: Option<Vec<String>>

Optional. Names of the columns to slice on. Applies to contribution analysis models.

§distance_type: Option<String>

Distance type for clustering models.

§dropout: Option<f64>

Dropout probability for dnn models.

§early_stop: Option<bool>

Whether to stop early when the loss doesn’t improve significantly any more (compared to min_relative_progress). Used only for iterative training algorithms.

§enable_global_explain: Option<bool>

If true, enable global explanation during training.

§endpoint_idle_ttl: Option<Duration>

The idle TTL of the endpoint before the resources get destroyed. The default value is 6.5 hours.

§feedback_type: Option<String>

Feedback type that specifies which algorithm to run for matrix factorization.

§fit_intercept: Option<bool>

Whether the model should include intercept during model training.

§forecast_limit_lower_bound: Option<f64>

The forecast limit lower bound that was used during ARIMA model training with limits. To see more details of the algorithm: https://otexts.com/fpp2/limits.html

§forecast_limit_upper_bound: Option<f64>

The forecast limit upper bound that was used during ARIMA model training with limits.

§hidden_units: Option<Vec<i64>>

Hidden units for dnn models.

§holiday_region: Option<String>

The geographical region based on which the holidays are considered in time series modeling. If a valid value is specified, then holiday effects modeling is enabled.

§holiday_regions: Option<Vec<String>>

A list of geographical regions that are used for time series modeling.

§horizon: Option<i64>

The number of periods ahead that need to be forecasted.

§hparam_tuning_objectives: Option<Vec<String>>

The target evaluation metrics to optimize the hyperparameters for.

§hugging_face_model_id: Option<String>

The id of a Hugging Face model. For example, google/gemma-2-2b-it.

§include_drift: Option<bool>

Include drift when fitting an ARIMA model.

§initial_learn_rate: Option<f64>

Specifies the initial learning rate for the line search learn rate strategy.

§input_label_columns: Option<Vec<String>>

Name of input label columns in training data.

§instance_weight_column: Option<String>

Name of the instance weight column for training data. This column isn’t be used as a feature.

§integrated_gradients_num_steps: Option<i64>

Number of integral steps for the integrated gradients explain method.

§is_test_column: Option<String>

Name of the column used to determine the rows corresponding to control and test. Applies to contribution analysis models.

§item_column: Option<String>

Item column specified for matrix factorization models.

§kmeans_initialization_column: Option<String>

The column used to provide the initial centroids for kmeans algorithm when kmeans_initialization_method is CUSTOM.

§kmeans_initialization_method: Option<String>

The method used to initialize the centroids for kmeans algorithm.

§l1_reg_activation: Option<f64>

L1 regularization coefficient to activations.

§l1_regularization: Option<f64>

L1 regularization coefficient.

§l2_regularization: Option<f64>

L2 regularization coefficient.

§label_class_weights: Option<HashMap<String, f64>>

Weights associated with each label class, for rebalancing the training data. Only applicable for classification models.

§learn_rate: Option<f64>

Learning rate in training. Used only for iterative training algorithms.

§learn_rate_strategy: Option<String>

The strategy to determine learn rate for the current iteration.

§loss_type: Option<String>

Type of loss function used during training run.

§machine_type: Option<String>

The type of the machine used to deploy and serve the model.

§max_iterations: Option<i64>

The maximum number of iterations in training. Used only for iterative training algorithms.

§max_parallel_trials: Option<i64>

Maximum number of trials to run in parallel.

§max_replica_count: Option<i64>

The maximum number of machine replicas that will be deployed on an endpoint. The default value is equal to min_replica_count.

§max_time_series_length: Option<i64>

The maximum number of time points in a time series that can be used in modeling the trend component of the time series. Don’t use this option with the timeSeriesLengthFraction or minTimeSeriesLength options.

§max_tree_depth: Option<i64>

Maximum depth of a tree for boosted tree models.

§min_apriori_support: Option<f64>

The apriori support minimum. Applies to contribution analysis models.

§min_relative_progress: Option<f64>

When early_stop is true, stops training when accuracy improvement is less than ‘min_relative_progress’. Used only for iterative training algorithms.

§min_replica_count: Option<i64>

The minimum number of machine replicas that will be always deployed on an endpoint. This value must be greater than or equal to 1. The default value is 1.

§min_split_loss: Option<f64>

Minimum split loss for boosted tree models.

§min_time_series_length: Option<i64>

The minimum number of time points in a time series that are used in modeling the trend component of the time series. If you use this option you must also set the timeSeriesLengthFraction option. This training option ensures that enough time points are available when you use timeSeriesLengthFraction in trend modeling. This is particularly important when forecasting multiple time series in a single query using timeSeriesIdColumn. If the total number of time points is less than the minTimeSeriesLength value, then the query uses all available time points.

§min_tree_child_weight: Option<i64>

Minimum sum of instance weight needed in a child for boosted tree models.

§model_garden_model_name: Option<String>

The name of a Vertex model garden publisher model. Format is publishers/{publisher}/models/{model}@{optional_version_id}.

§model_registry: Option<String>

The model registry.

§model_uri: Option<String>

Google Cloud Storage URI from which the model was imported. Only applicable for imported models.

§non_seasonal_order: Option<ArimaOrder>

A specification of the non-seasonal part of the ARIMA model: the three components (p, d, q) are the AR order, the degree of differencing, and the MA order.

§num_clusters: Option<i64>

Number of clusters for clustering models.

§num_factors: Option<i64>

Num factors specified for matrix factorization models.

§num_parallel_tree: Option<i64>

Number of parallel trees constructed during each iteration for boosted tree models.

§num_principal_components: Option<i64>

Number of principal components to keep in the PCA model. Must be <= the number of features.

§num_trials: Option<i64>

Number of trials to run this hyperparameter tuning job.

§optimization_strategy: Option<String>

Optimization strategy for training linear regression models.

§optimizer: Option<String>

Optimizer used for training the neural nets.

§pca_explained_variance_ratio: Option<f64>

The minimum ratio of cumulative explained variance that needs to be given by the PCA model.

§pca_solver: Option<String>

The solver for PCA.

§reservation_affinity_key: Option<String>

Corresponds to the label key of a reservation resource used by Vertex AI. To target a SPECIFIC_RESERVATION by name, use compute.googleapis.com/reservation-name as the key and specify the name of your reservation as its value.

§reservation_affinity_type: Option<String>

Specifies the reservation affinity type used to configure a Vertex AI resource. The default value is NO_RESERVATION.

§reservation_affinity_values: Option<Vec<String>>

Corresponds to the label values of a reservation resource used by Vertex AI. This must be the full resource name of the reservation or reservation block.

§sampled_shapley_num_paths: Option<i64>

Number of paths for the sampled Shapley explain method.

§scale_features: Option<bool>

If true, scale the feature values by dividing the feature standard deviation. Currently only apply to PCA.

§standardize_features: Option<bool>

Whether to standardize numerical features. Default to true.

§subsample: Option<f64>

Subsample fraction of the training data to grow tree to prevent overfitting for boosted tree models.

§tf_version: Option<String>

Based on the selected TF version, the corresponding docker image is used to train external models.

§time_series_data_column: Option<String>

Column to be designated as time series data for ARIMA model.

§time_series_id_column: Option<String>

The time series id column that was used during ARIMA model training.

§time_series_id_columns: Option<Vec<String>>

The time series id columns that were used during ARIMA model training.

§time_series_length_fraction: Option<f64>

The fraction of the interpolated length of the time series that’s used to model the time series trend component. All of the time points of the time series are used to model the non-trend component. This training option accelerates modeling training without sacrificing much forecasting accuracy. You can use this option with minTimeSeriesLength but not with maxTimeSeriesLength.

§time_series_timestamp_column: Option<String>

Column to be designated as time series timestamp for ARIMA model.

§tree_method: Option<String>

Tree construction algorithm for boosted tree models.

§trend_smoothing_window_size: Option<i64>

Smoothing window size for the trend component. When a positive value is specified, a center moving average smoothing is applied on the history trend. When the smoothing window is out of the boundary at the beginning or the end of the trend, the first element or the last element is padded to fill the smoothing window before the average is applied.

§user_column: Option<String>

User column specified for matrix factorization models.

§vertex_ai_model_version_aliases: Option<Vec<String>>

The version aliases to apply in Vertex AI model registry. Always overwrite if the version aliases exists in a existing model.

§wals_alpha: Option<f64>

Hyperparameter for matrix factoration when implicit feedback type is specified.

§warm_start: Option<bool>

Whether to train a model from the last checkpoint.

§xgboost_version: Option<String>

User-selected XGBoost versions for training of XGBoost models.