// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.bigquery.v2;
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/cloud/bigquery/v2/encryption_config.proto";
import "google/cloud/bigquery/v2/model_reference.proto";
import "google/cloud/bigquery/v2/standard_sql.proto";
import "google/cloud/bigquery/v2/table_reference.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/timestamp.proto";
import "google/protobuf/wrappers.proto";
import "google/api/annotations.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery";
option java_outer_classname = "ModelProto";
option java_package = "com.google.cloud.bigquery.v2";
service ModelService {
option (google.api.default_host) = "bigquery.googleapis.com";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/bigquery,"
"https://www.googleapis.com/auth/bigquery.readonly,"
"https://www.googleapis.com/auth/cloud-platform,"
"https://www.googleapis.com/auth/cloud-platform.read-only";
// Gets the specified model resource by model ID.
rpc GetModel(GetModelRequest) returns (Model) {
option (google.api.http) = {
get: "/bigquery/v2/projects/{project_id=*}/datasets/{dataset_id=*}/models/{model_id=*}"
};
option (google.api.method_signature) = "project_id,dataset_id,model_id";
}
// Lists all models in the specified dataset. Requires the READER dataset
// role. After retrieving the list of models, you can get information about a
// particular model by calling the models.get method.
rpc ListModels(ListModelsRequest) returns (ListModelsResponse) {
option (google.api.http) = {
get: "/bigquery/v2/projects/{project_id=*}/datasets/{dataset_id=*}/models"
};
option (google.api.method_signature) = "project_id,dataset_id,max_results";
}
// Patch specific fields in the specified model.
rpc PatchModel(PatchModelRequest) returns (Model) {
option (google.api.http) = {
patch: "/bigquery/v2/projects/{project_id=*}/datasets/{dataset_id=*}/models/{model_id=*}"
body: "model"
};
option (google.api.method_signature) = "project_id,dataset_id,model_id,model";
}
// Deletes the model specified by modelId from the dataset.
rpc DeleteModel(DeleteModelRequest) returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/bigquery/v2/projects/{project_id=*}/datasets/{dataset_id=*}/models/{model_id=*}"
};
option (google.api.method_signature) = "project_id,dataset_id,model_id";
}
}
message Model {
message SeasonalPeriod {
enum SeasonalPeriodType {
SEASONAL_PERIOD_TYPE_UNSPECIFIED = 0;
// No seasonality
NO_SEASONALITY = 1;
// Daily period, 24 hours.
DAILY = 2;
// Weekly period, 7 days.
WEEKLY = 3;
// Monthly period, 30 days or irregular.
MONTHLY = 4;
// Quarterly period, 90 days or irregular.
QUARTERLY = 5;
// Yearly period, 365 days or irregular.
YEARLY = 6;
}
}
message KmeansEnums {
// Indicates the method used to initialize the centroids for KMeans
// clustering algorithm.
enum KmeansInitializationMethod {
// Unspecified initialization method.
KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0;
// Initializes the centroids randomly.
RANDOM = 1;
// Initializes the centroids using data specified in
// kmeans_initialization_column.
CUSTOM = 2;
// Initializes with kmeans++.
KMEANS_PLUS_PLUS = 3;
}
}
// Evaluation metrics for regression and explicit feedback type matrix
// factorization models.
message RegressionMetrics {
// Mean absolute error.
google.protobuf.DoubleValue mean_absolute_error = 1;
// Mean squared error.
google.protobuf.DoubleValue mean_squared_error = 2;
// Mean squared log error.
google.protobuf.DoubleValue mean_squared_log_error = 3;
// Median absolute error.
google.protobuf.DoubleValue median_absolute_error = 4;
// R^2 score. This corresponds to r2_score in ML.EVALUATE.
google.protobuf.DoubleValue r_squared = 5;
}
// Aggregate metrics for classification/classifier models. For multi-class
// models, the metrics are either macro-averaged or micro-averaged. When
// macro-averaged, the metrics are calculated for each label and then an
// unweighted average is taken of those values. When micro-averaged, the
// metric is calculated globally by counting the total number of correctly
// predicted rows.
message AggregateClassificationMetrics {
// Precision is the fraction of actual positive predictions that had
// positive actual labels. For multiclass this is a macro-averaged
// metric treating each class as a binary classifier.
google.protobuf.DoubleValue precision = 1;
// Recall is the fraction of actual positive labels that were given a
// positive prediction. For multiclass this is a macro-averaged metric.
google.protobuf.DoubleValue recall = 2;
// Accuracy is the fraction of predictions given the correct label. For
// multiclass this is a micro-averaged metric.
google.protobuf.DoubleValue accuracy = 3;
// Threshold at which the metrics are computed. For binary
// classification models this is the positive class threshold.
// For multi-class classfication models this is the confidence
// threshold.
google.protobuf.DoubleValue threshold = 4;
// The F1 score is an average of recall and precision. For multiclass
// this is a macro-averaged metric.
google.protobuf.DoubleValue f1_score = 5;
// Logarithmic Loss. For multiclass this is a macro-averaged metric.
google.protobuf.DoubleValue log_loss = 6;
// Area Under a ROC Curve. For multiclass this is a macro-averaged
// metric.
google.protobuf.DoubleValue roc_auc = 7;
}
// Evaluation metrics for binary classification/classifier models.
message BinaryClassificationMetrics {
// Confusion matrix for binary classification models.
message BinaryConfusionMatrix {
// Threshold value used when computing each of the following metric.
google.protobuf.DoubleValue positive_class_threshold = 1;
// Number of true samples predicted as true.
google.protobuf.Int64Value true_positives = 2;
// Number of false samples predicted as true.
google.protobuf.Int64Value false_positives = 3;
// Number of true samples predicted as false.
google.protobuf.Int64Value true_negatives = 4;
// Number of false samples predicted as false.
google.protobuf.Int64Value false_negatives = 5;
// The fraction of actual positive predictions that had positive actual
// labels.
google.protobuf.DoubleValue precision = 6;
// The fraction of actual positive labels that were given a positive
// prediction.
google.protobuf.DoubleValue recall = 7;
// The equally weighted average of recall and precision.
google.protobuf.DoubleValue f1_score = 8;
// The fraction of predictions given the correct label.
google.protobuf.DoubleValue accuracy = 9;
}
// Aggregate classification metrics.
AggregateClassificationMetrics aggregate_classification_metrics = 1;
// Binary confusion matrix at multiple thresholds.
repeated BinaryConfusionMatrix binary_confusion_matrix_list = 2;
// Label representing the positive class.
string positive_label = 3;
// Label representing the negative class.
string negative_label = 4;
}
// Evaluation metrics for multi-class classification/classifier models.
message MultiClassClassificationMetrics {
// Confusion matrix for multi-class classification models.
message ConfusionMatrix {
// A single entry in the confusion matrix.
message Entry {
// The predicted label. For confidence_threshold > 0, we will
// also add an entry indicating the number of items under the
// confidence threshold.
string predicted_label = 1;
// Number of items being predicted as this label.
google.protobuf.Int64Value item_count = 2;
}
// A single row in the confusion matrix.
message Row {
// The original label of this row.
string actual_label = 1;
// Info describing predicted label distribution.
repeated Entry entries = 2;
}
// Confidence threshold used when computing the entries of the
// confusion matrix.
google.protobuf.DoubleValue confidence_threshold = 1;
// One row per actual label.
repeated Row rows = 2;
}
// Aggregate classification metrics.
AggregateClassificationMetrics aggregate_classification_metrics = 1;
// Confusion matrix at different thresholds.
repeated ConfusionMatrix confusion_matrix_list = 2;
}
// Evaluation metrics for clustering models.
message ClusteringMetrics {
// Message containing the information about one cluster.
message Cluster {
// Representative value of a single feature within the cluster.
message FeatureValue {
// Representative value of a categorical feature.
message CategoricalValue {
// Represents the count of a single category within the cluster.
message CategoryCount {
// The name of category.
string category = 1;
// The count of training samples matching the category within the
// cluster.
google.protobuf.Int64Value count = 2;
}
// Counts of all categories for the categorical feature. If there are
// more than ten categories, we return top ten (by count) and return
// one more CategoryCount with category "_OTHER_" and count as
// aggregate counts of remaining categories.
repeated CategoryCount category_counts = 1;
}
// The feature column name.
string feature_column = 1;
oneof value {
// The numerical feature value. This is the centroid value for this
// feature.
google.protobuf.DoubleValue numerical_value = 2;
// The categorical feature value.
CategoricalValue categorical_value = 3;
}
}
// Centroid id.
int64 centroid_id = 1;
// Values of highly variant features for this cluster.
repeated FeatureValue feature_values = 2;
// Count of training data rows that were assigned to this cluster.
google.protobuf.Int64Value count = 3;
}
// Davies-Bouldin index.
google.protobuf.DoubleValue davies_bouldin_index = 1;
// Mean of squared distances between each sample to its cluster centroid.
google.protobuf.DoubleValue mean_squared_distance = 2;
// Information for all clusters.
repeated Cluster clusters = 3;
}
// Evaluation metrics used by weighted-ALS models specified by
// feedback_type=implicit.
message RankingMetrics {
// Calculates a precision per user for all the items by ranking them and
// then averages all the precisions across all the users.
google.protobuf.DoubleValue mean_average_precision = 1;
// Similar to the mean squared error computed in regression and explicit
// recommendation models except instead of computing the rating directly,
// the output from evaluate is computed against a preference which is 1 or 0
// depending on if the rating exists or not.
google.protobuf.DoubleValue mean_squared_error = 2;
// A metric to determine the goodness of a ranking calculated from the
// predicted confidence by comparing it to an ideal rank measured by the
// original ratings.
google.protobuf.DoubleValue normalized_discounted_cumulative_gain = 3;
// Determines the goodness of a ranking by computing the percentile rank
// from the predicted confidence and dividing it by the original rank.
google.protobuf.DoubleValue average_rank = 4;
}
// Model evaluation metrics for ARIMA forecasting models.
message ArimaForecastingMetrics {
// Model evaluation metrics for a single ARIMA forecasting model.
message ArimaSingleModelForecastingMetrics {
// Non-seasonal order.
ArimaOrder non_seasonal_order = 1;
// Arima fitting metrics.
ArimaFittingMetrics arima_fitting_metrics = 2;
// Is arima model fitted with drift or not. It is always false when d
// is not 1.
bool has_drift = 3;
// The time_series_id value for this time series. It will be one of
// the unique values from the time_series_id_column specified during
// ARIMA model training. Only present when time_series_id_column
// training option was used.
string time_series_id = 4;
// The tuple of time_series_ids identifying this time series. It will
// be one of the unique tuples of values present in the
// time_series_id_columns specified during ARIMA model training. Only
// present when time_series_id_columns training option was used and
// the order of values here are same as the order of
// time_series_id_columns.
repeated string time_series_ids = 9;
// Seasonal periods. Repeated because multiple periods are supported
// for one time series.
repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 5;
// If true, holiday_effect is a part of time series decomposition result.
google.protobuf.BoolValue has_holiday_effect = 6;
// If true, spikes_and_dips is a part of time series decomposition result.
google.protobuf.BoolValue has_spikes_and_dips = 7;
// If true, step_changes is a part of time series decomposition result.
google.protobuf.BoolValue has_step_changes = 8;
}
// Non-seasonal order.
repeated ArimaOrder non_seasonal_order = 1 [deprecated = true];
// Arima model fitting metrics.
repeated ArimaFittingMetrics arima_fitting_metrics = 2 [deprecated = true];
// Seasonal periods. Repeated because multiple periods are supported for one
// time series.
repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 3 [deprecated = true];
// Whether Arima model fitted with drift or not. It is always false when d
// is not 1.
repeated bool has_drift = 4 [deprecated = true];
// Id to differentiate different time series for the large-scale case.
repeated string time_series_id = 5 [deprecated = true];
// Repeated as there can be many metric sets (one for each model) in
// auto-arima and the large-scale case.
repeated ArimaSingleModelForecastingMetrics arima_single_model_forecasting_metrics = 6;
}
// Evaluation metrics of a model. These are either computed on all training
// data or just the eval data based on whether eval data was used during
// training. These are not present for imported models.
message EvaluationMetrics {
oneof metrics {
// Populated for regression models and explicit feedback type matrix
// factorization models.
RegressionMetrics regression_metrics = 1;
// Populated for binary classification/classifier models.
BinaryClassificationMetrics binary_classification_metrics = 2;
// Populated for multi-class classification/classifier models.
MultiClassClassificationMetrics multi_class_classification_metrics = 3;
// Populated for clustering models.
ClusteringMetrics clustering_metrics = 4;
// Populated for implicit feedback type matrix factorization models.
RankingMetrics ranking_metrics = 5;
// Populated for ARIMA models.
ArimaForecastingMetrics arima_forecasting_metrics = 6;
}
}
// Data split result. This contains references to the training and evaluation
// data tables that were used to train the model.
message DataSplitResult {
// Table reference of the training data after split.
TableReference training_table = 1;
// Table reference of the evaluation data after split.
TableReference evaluation_table = 2;
}
// Arima order, can be used for both non-seasonal and seasonal parts.
message ArimaOrder {
// Order of the autoregressive part.
int64 p = 1;
// Order of the differencing part.
int64 d = 2;
// Order of the moving-average part.
int64 q = 3;
}
// ARIMA model fitting metrics.
message ArimaFittingMetrics {
// Log-likelihood.
double log_likelihood = 1;
// AIC.
double aic = 2;
// Variance.
double variance = 3;
}
// Global explanations containing the top most important features
// after training.
message GlobalExplanation {
// Explanation for a single feature.
message Explanation {
// Full name of the feature. For non-numerical features, will be
// formatted like <column_name>.<encoded_feature_name>. Overall size of
// feature name will always be truncated to first 120 characters.
string feature_name = 1;
// Attribution of feature.
google.protobuf.DoubleValue attribution = 2;
}
// A list of the top global explanations. Sorted by absolute value of
// attribution in descending order.
repeated Explanation explanations = 1;
// Class label for this set of global explanations. Will be empty/null for
// binary logistic and linear regression models. Sorted alphabetically in
// descending order.
string class_label = 2;
}
// Information about a single training query run for the model.
message TrainingRun {
// Options used in model training.
message TrainingOptions {
// The maximum number of iterations in training. Used only for iterative
// training algorithms.
int64 max_iterations = 1;
// Type of loss function used during training run.
LossType loss_type = 2;
// Learning rate in training. Used only for iterative training algorithms.
double learn_rate = 3;
// L1 regularization coefficient.
google.protobuf.DoubleValue l1_regularization = 4;
// L2 regularization coefficient.
google.protobuf.DoubleValue l2_regularization = 5;
// When early_stop is true, stops training when accuracy improvement is
// less than 'min_relative_progress'. Used only for iterative training
// algorithms.
google.protobuf.DoubleValue min_relative_progress = 6;
// Whether to train a model from the last checkpoint.
google.protobuf.BoolValue warm_start = 7;
// Whether to stop early when the loss doesn't improve significantly
// any more (compared to min_relative_progress). Used only for iterative
// training algorithms.
google.protobuf.BoolValue early_stop = 8;
// Name of input label columns in training data.
repeated string input_label_columns = 9;
// The data split type for training and evaluation, e.g. RANDOM.
DataSplitMethod data_split_method = 10;
// The fraction of evaluation data over the whole input data. The rest
// of data will be used as training data. The format should be double.
// Accurate to two decimal places.
// Default value is 0.2.
double data_split_eval_fraction = 11;
// The column to split data with. This column won't be used as a
// feature.
// 1. When data_split_method is CUSTOM, the corresponding column should
// be boolean. The rows with true value tag are eval data, and the false
// are training data.
// 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION
// rows (from smallest to largest) in the corresponding column are used
// as training data, and the rest are eval data. It respects the order
// in Orderable data types:
// https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties
string data_split_column = 12;
// The strategy to determine learn rate for the current iteration.
LearnRateStrategy learn_rate_strategy = 13;
// Specifies the initial learning rate for the line search learn rate
// strategy.
double initial_learn_rate = 16;
// Weights associated with each label class, for rebalancing the
// training data. Only applicable for classification models.
map<string, double> label_class_weights = 17;
// User column specified for matrix factorization models.
string user_column = 18;
// Item column specified for matrix factorization models.
string item_column = 19;
// Distance type for clustering models.
DistanceType distance_type = 20;
// Number of clusters for clustering models.
int64 num_clusters = 21;
// Google Cloud Storage URI from which the model was imported. Only
// applicable for imported models.
string model_uri = 22;
// Optimization strategy for training linear regression models.
OptimizationStrategy optimization_strategy = 23;
// Hidden units for dnn models.
repeated int64 hidden_units = 24;
// Batch size for dnn models.
int64 batch_size = 25;
// Dropout probability for dnn models.
google.protobuf.DoubleValue dropout = 26;
// Maximum depth of a tree for boosted tree models.
int64 max_tree_depth = 27;
// Subsample fraction of the training data to grow tree to prevent
// overfitting for boosted tree models.
double subsample = 28;
// Minimum split loss for boosted tree models.
google.protobuf.DoubleValue min_split_loss = 29;
// Num factors specified for matrix factorization models.
int64 num_factors = 30;
// Feedback type that specifies which algorithm to run for matrix
// factorization.
FeedbackType feedback_type = 31;
// Hyperparameter for matrix factoration when implicit feedback type is
// specified.
google.protobuf.DoubleValue wals_alpha = 32;
// The method used to initialize the centroids for kmeans algorithm.
KmeansEnums.KmeansInitializationMethod kmeans_initialization_method = 33;
// The column used to provide the initial centroids for kmeans algorithm
// when kmeans_initialization_method is CUSTOM.
string kmeans_initialization_column = 34;
// Column to be designated as time series timestamp for ARIMA model.
string time_series_timestamp_column = 35;
// Column to be designated as time series data for ARIMA model.
string time_series_data_column = 36;
// Whether to enable auto ARIMA or not.
bool auto_arima = 37;
// A specification of the non-seasonal part of the ARIMA model: the three
// components (p, d, q) are the AR order, the degree of differencing, and
// the MA order.
ArimaOrder non_seasonal_order = 38;
// The data frequency of a time series.
DataFrequency data_frequency = 39;
// Include drift when fitting an ARIMA model.
bool include_drift = 41;
// The geographical region based on which the holidays are considered in
// time series modeling. If a valid value is specified, then holiday
// effects modeling is enabled.
HolidayRegion holiday_region = 42;
// The time series id column that was used during ARIMA model training.
string time_series_id_column = 43;
// The time series id columns that were used during ARIMA model training.
repeated string time_series_id_columns = 51;
// The number of periods ahead that need to be forecasted.
int64 horizon = 44;
// Whether to preserve the input structs in output feature names.
// Suppose there is a struct A with field b.
// When false (default), the output feature name is A_b.
// When true, the output feature name is A.b.
bool preserve_input_structs = 45;
// The max value of non-seasonal p and q.
int64 auto_arima_max_order = 46;
// If true, perform decompose time series and save the results.
google.protobuf.BoolValue decompose_time_series = 50;
// If true, clean spikes and dips in the input time series.
google.protobuf.BoolValue clean_spikes_and_dips = 52;
// If true, detect step changes and make data adjustment in the input time
// series.
google.protobuf.BoolValue adjust_step_changes = 53;
}
// Information about a single iteration of the training run.
message IterationResult {
// Information about a single cluster for clustering model.
message ClusterInfo {
// Centroid id.
int64 centroid_id = 1;
// Cluster radius, the average distance from centroid
// to each point assigned to the cluster.
google.protobuf.DoubleValue cluster_radius = 2;
// Cluster size, the total number of points assigned to the cluster.
google.protobuf.Int64Value cluster_size = 3;
}
// (Auto-)arima fitting result. Wrap everything in ArimaResult for easier
// refactoring if we want to use model-specific iteration results.
message ArimaResult {
// Arima coefficients.
message ArimaCoefficients {
// Auto-regressive coefficients, an array of double.
repeated double auto_regressive_coefficients = 1;
// Moving-average coefficients, an array of double.
repeated double moving_average_coefficients = 2;
// Intercept coefficient, just a double not an array.
double intercept_coefficient = 3;
}
// Arima model information.
message ArimaModelInfo {
// Non-seasonal order.
ArimaOrder non_seasonal_order = 1;
// Arima coefficients.
ArimaCoefficients arima_coefficients = 2;
// Arima fitting metrics.
ArimaFittingMetrics arima_fitting_metrics = 3;
// Whether Arima model fitted with drift or not. It is always false
// when d is not 1.
bool has_drift = 4;
// The time_series_id value for this time series. It will be one of
// the unique values from the time_series_id_column specified during
// ARIMA model training. Only present when time_series_id_column
// training option was used.
string time_series_id = 5;
// The tuple of time_series_ids identifying this time series. It will
// be one of the unique tuples of values present in the
// time_series_id_columns specified during ARIMA model training. Only
// present when time_series_id_columns training option was used and
// the order of values here are same as the order of
// time_series_id_columns.
repeated string time_series_ids = 10;
// Seasonal periods. Repeated because multiple periods are supported
// for one time series.
repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 6;
// If true, holiday_effect is a part of time series decomposition
// result.
google.protobuf.BoolValue has_holiday_effect = 7;
// If true, spikes_and_dips is a part of time series decomposition
// result.
google.protobuf.BoolValue has_spikes_and_dips = 8;
// If true, step_changes is a part of time series decomposition
// result.
google.protobuf.BoolValue has_step_changes = 9;
}
// This message is repeated because there are multiple arima models
// fitted in auto-arima. For non-auto-arima model, its size is one.
repeated ArimaModelInfo arima_model_info = 1;
// Seasonal periods. Repeated because multiple periods are supported for
// one time series.
repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 2;
}
// Index of the iteration, 0 based.
google.protobuf.Int32Value index = 1;
// Time taken to run the iteration in milliseconds.
google.protobuf.Int64Value duration_ms = 4;
// Loss computed on the training data at the end of iteration.
google.protobuf.DoubleValue training_loss = 5;
// Loss computed on the eval data at the end of iteration.
google.protobuf.DoubleValue eval_loss = 6;
// Learn rate used for this iteration.
double learn_rate = 7;
// Information about top clusters for clustering models.
repeated ClusterInfo cluster_infos = 8;
ArimaResult arima_result = 9;
}
// Options that were used for this training run, includes
// user specified and default options that were used.
TrainingOptions training_options = 1;
// The start time of this training run.
google.protobuf.Timestamp start_time = 8;
// Output of each iteration run, results.size() <= max_iterations.
repeated IterationResult results = 6;
// The evaluation metrics over training/eval data that were computed at the
// end of training.
EvaluationMetrics evaluation_metrics = 7;
// Data split result of the training run. Only set when the input data is
// actually split.
DataSplitResult data_split_result = 9;
// Global explanations for important features of the model. For multi-class
// models, there is one entry for each label class. For other models, there
// is only one entry in the list.
repeated GlobalExplanation global_explanations = 10;
}
// Indicates the type of the Model.
enum ModelType {
MODEL_TYPE_UNSPECIFIED = 0;
// Linear regression model.
LINEAR_REGRESSION = 1;
// Logistic regression based classification model.
LOGISTIC_REGRESSION = 2;
// K-means clustering model.
KMEANS = 3;
// Matrix factorization model.
MATRIX_FACTORIZATION = 4;
// DNN classifier model.
DNN_CLASSIFIER = 5;
// An imported TensorFlow model.
TENSORFLOW = 6;
// DNN regressor model.
DNN_REGRESSOR = 7;
// Boosted tree regressor model.
BOOSTED_TREE_REGRESSOR = 9;
// Boosted tree classifier model.
BOOSTED_TREE_CLASSIFIER = 10;
// ARIMA model.
ARIMA = 11;
// [Beta] AutoML Tables regression model.
AUTOML_REGRESSOR = 12;
// [Beta] AutoML Tables classification model.
AUTOML_CLASSIFIER = 13;
// New name for the ARIMA model.
ARIMA_PLUS = 19;
}
// Loss metric to evaluate model training performance.
enum LossType {
LOSS_TYPE_UNSPECIFIED = 0;
// Mean squared loss, used for linear regression.
MEAN_SQUARED_LOSS = 1;
// Mean log loss, used for logistic regression.
MEAN_LOG_LOSS = 2;
}
// Distance metric used to compute the distance between two points.
enum DistanceType {
DISTANCE_TYPE_UNSPECIFIED = 0;
// Eculidean distance.
EUCLIDEAN = 1;
// Cosine distance.
COSINE = 2;
}
// Indicates the method to split input data into multiple tables.
enum DataSplitMethod {
DATA_SPLIT_METHOD_UNSPECIFIED = 0;
// Splits data randomly.
RANDOM = 1;
// Splits data with the user provided tags.
CUSTOM = 2;
// Splits data sequentially.
SEQUENTIAL = 3;
// Data split will be skipped.
NO_SPLIT = 4;
// Splits data automatically: Uses NO_SPLIT if the data size is small.
// Otherwise uses RANDOM.
AUTO_SPLIT = 5;
}
// Type of supported data frequency for time series forecasting models.
enum DataFrequency {
DATA_FREQUENCY_UNSPECIFIED = 0;
// Automatically inferred from timestamps.
AUTO_FREQUENCY = 1;
// Yearly data.
YEARLY = 2;
// Quarterly data.
QUARTERLY = 3;
// Monthly data.
MONTHLY = 4;
// Weekly data.
WEEKLY = 5;
// Daily data.
DAILY = 6;
// Hourly data.
HOURLY = 7;
// Per-minute data.
PER_MINUTE = 8;
}
// Type of supported holiday regions for time series forecasting models.
enum HolidayRegion {
// Holiday region unspecified.
HOLIDAY_REGION_UNSPECIFIED = 0;
// Global.
GLOBAL = 1;
// North America.
NA = 2;
// Japan and Asia Pacific: Korea, Greater China, India, Australia, and New
// Zealand.
JAPAC = 3;
// Europe, the Middle East and Africa.
EMEA = 4;
// Latin America and the Caribbean.
LAC = 5;
// United Arab Emirates
AE = 6;
// Argentina
AR = 7;
// Austria
AT = 8;
// Australia
AU = 9;
// Belgium
BE = 10;
// Brazil
BR = 11;
// Canada
CA = 12;
// Switzerland
CH = 13;
// Chile
CL = 14;
// China
CN = 15;
// Colombia
CO = 16;
// Czechoslovakia
CS = 17;
// Czech Republic
CZ = 18;
// Germany
DE = 19;
// Denmark
DK = 20;
// Algeria
DZ = 21;
// Ecuador
EC = 22;
// Estonia
EE = 23;
// Egypt
EG = 24;
// Spain
ES = 25;
// Finland
FI = 26;
// France
FR = 27;
// Great Britain (United Kingdom)
GB = 28;
// Greece
GR = 29;
// Hong Kong
HK = 30;
// Hungary
HU = 31;
// Indonesia
ID = 32;
// Ireland
IE = 33;
// Israel
IL = 34;
// India
IN = 35;
// Iran
IR = 36;
// Italy
IT = 37;
// Japan
JP = 38;
// Korea (South)
KR = 39;
// Latvia
LV = 40;
// Morocco
MA = 41;
// Mexico
MX = 42;
// Malaysia
MY = 43;
// Nigeria
NG = 44;
// Netherlands
NL = 45;
// Norway
NO = 46;
// New Zealand
NZ = 47;
// Peru
PE = 48;
// Philippines
PH = 49;
// Pakistan
PK = 50;
// Poland
PL = 51;
// Portugal
PT = 52;
// Romania
RO = 53;
// Serbia
RS = 54;
// Russian Federation
RU = 55;
// Saudi Arabia
SA = 56;
// Sweden
SE = 57;
// Singapore
SG = 58;
// Slovenia
SI = 59;
// Slovakia
SK = 60;
// Thailand
TH = 61;
// Turkey
TR = 62;
// Taiwan
TW = 63;
// Ukraine
UA = 64;
// United States
US = 65;
// Venezuela
VE = 66;
// Viet Nam
VN = 67;
// South Africa
ZA = 68;
}
// Indicates the learning rate optimization strategy to use.
enum LearnRateStrategy {
LEARN_RATE_STRATEGY_UNSPECIFIED = 0;
// Use line search to determine learning rate.
LINE_SEARCH = 1;
// Use a constant learning rate.
CONSTANT = 2;
}
// Indicates the optimization strategy used for training.
enum OptimizationStrategy {
OPTIMIZATION_STRATEGY_UNSPECIFIED = 0;
// Uses an iterative batch gradient descent algorithm.
BATCH_GRADIENT_DESCENT = 1;
// Uses a normal equation to solve linear regression problem.
NORMAL_EQUATION = 2;
}
// Indicates the training algorithm to use for matrix factorization models.
enum FeedbackType {
FEEDBACK_TYPE_UNSPECIFIED = 0;
// Use weighted-als for implicit feedback problems.
IMPLICIT = 1;
// Use nonweighted-als for explicit feedback problems.
EXPLICIT = 2;
}
// Output only. A hash of this resource.
string etag = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
// Required. Unique identifier for this model.
ModelReference model_reference = 2 [(google.api.field_behavior) = REQUIRED];
// Output only. The time when this model was created, in millisecs since the epoch.
int64 creation_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The time when this model was last modified, in millisecs since the epoch.
int64 last_modified_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
// Optional. A user-friendly description of this model.
string description = 12 [(google.api.field_behavior) = OPTIONAL];
// Optional. A descriptive name for this model.
string friendly_name = 14 [(google.api.field_behavior) = OPTIONAL];
// The labels associated with this model. You can use these to organize
// and group your models. Label keys and values can be no longer
// than 63 characters, can only contain lowercase letters, numeric
// characters, underscores and dashes. International characters are allowed.
// Label values are optional. Label keys must start with a letter and each
// label in the list must have a different key.
map<string, string> labels = 15;
// Optional. The time when this model expires, in milliseconds since the epoch.
// If not present, the model will persist indefinitely. Expired models
// will be deleted and their storage reclaimed. The defaultTableExpirationMs
// property of the encapsulating dataset can be used to set a default
// expirationTime on newly created models.
int64 expiration_time = 16 [(google.api.field_behavior) = OPTIONAL];
// Output only. The geographic location where the model resides. This value
// is inherited from the dataset.
string location = 13 [(google.api.field_behavior) = OUTPUT_ONLY];
// Custom encryption configuration (e.g., Cloud KMS keys). This shows the
// encryption configuration of the model data while stored in BigQuery
// storage. This field can be used with PatchModel to update encryption key
// for an already encrypted model.
EncryptionConfiguration encryption_configuration = 17;
// Output only. Type of the model resource.
ModelType model_type = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Information for all training runs in increasing order of start_time.
repeated TrainingRun training_runs = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Input feature columns that were used to train this model.
repeated StandardSqlField feature_columns = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Label columns that were used to train this model.
// The output of the model will have a "predicted_" prefix to these columns.
repeated StandardSqlField label_columns = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
// The best trial_id across all training runs.
int64 best_trial_id = 19 [deprecated = true];
}
message GetModelRequest {
// Required. Project ID of the requested model.
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
// Required. Dataset ID of the requested model.
string dataset_id = 2 [(google.api.field_behavior) = REQUIRED];
// Required. Model ID of the requested model.
string model_id = 3 [(google.api.field_behavior) = REQUIRED];
}
message PatchModelRequest {
// Required. Project ID of the model to patch.
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
// Required. Dataset ID of the model to patch.
string dataset_id = 2 [(google.api.field_behavior) = REQUIRED];
// Required. Model ID of the model to patch.
string model_id = 3 [(google.api.field_behavior) = REQUIRED];
// Required. Patched model.
// Follows RFC5789 patch semantics. Missing fields are not updated.
// To clear a field, explicitly set to default value.
Model model = 4 [(google.api.field_behavior) = REQUIRED];
}
message DeleteModelRequest {
// Required. Project ID of the model to delete.
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
// Required. Dataset ID of the model to delete.
string dataset_id = 2 [(google.api.field_behavior) = REQUIRED];
// Required. Model ID of the model to delete.
string model_id = 3 [(google.api.field_behavior) = REQUIRED];
}
message ListModelsRequest {
// Required. Project ID of the models to list.
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
// Required. Dataset ID of the models to list.
string dataset_id = 2 [(google.api.field_behavior) = REQUIRED];
// The maximum number of results to return in a single response page.
// Leverage the page tokens to iterate through the entire collection.
google.protobuf.UInt32Value max_results = 3;
// Page token, returned by a previous call to request the next page of
// results
string page_token = 4;
}
message ListModelsResponse {
// Models in the requested dataset. Only the following fields are populated:
// model_reference, model_type, creation_time, last_modified_time and
// labels.
repeated Model models = 1;
// A token to request the next page of results.
string next_page_token = 2;
}