// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.dataproc.v1;
import "google/api/field_behavior.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
option java_multiple_files = true;
option java_outer_classname = "SharedProto";
option java_package = "com.google.cloud.dataproc.v1";
// Runtime configuration for a workload.
message RuntimeConfig {
// Optional. Version of the batch runtime.
string version = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Optional custom container image for the job runtime environment. If
// not specified, a default container image will be used.
string container_image = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. A mapping of property names to values, which are used to configure workload
// execution.
map<string, string> properties = 3 [(google.api.field_behavior) = OPTIONAL];
}
// Environment configuration for a workload.
message EnvironmentConfig {
// Optional. Execution configuration for a workload.
ExecutionConfig execution_config = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Peripherals configuration that workload has access to.
PeripheralsConfig peripherals_config = 2 [(google.api.field_behavior) = OPTIONAL];
}
// Execution configuration for a workload.
message ExecutionConfig {
// Optional. Service account that used to execute workload.
string service_account = 2 [(google.api.field_behavior) = OPTIONAL];
// Network configuration for workload execution.
oneof network {
// Optional. Network URI to connect workload to.
string network_uri = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. Subnetwork URI to connect workload to.
string subnetwork_uri = 5 [(google.api.field_behavior) = OPTIONAL];
}
// Optional. Tags used for network traffic control.
repeated string network_tags = 6 [(google.api.field_behavior) = OPTIONAL];
// Optional. The Cloud KMS key to use for encryption.
string kms_key = 7 [(google.api.field_behavior) = OPTIONAL];
}
// Spark History Server configuration for the workload.
message SparkHistoryServerConfig {
// Optional. Resource name of an existing Dataproc Cluster to act as a Spark History
// Server for the workload.
//
// Example:
//
// * `projects/[project_id]/regions/[region]/clusters/[cluster_name]`
string dataproc_cluster = 1 [
(google.api.field_behavior) = OPTIONAL
];
}
// Auxiliary services configuration for a workload.
message PeripheralsConfig {
// Optional. Resource name of an existing Dataproc Metastore service.
//
// Example:
//
// * `projects/[project_id]/locations/[region]/services/[service_id]`
string metastore_service = 1 [
(google.api.field_behavior) = OPTIONAL
];
// Optional. The Spark History Server configuration for the workload.
SparkHistoryServerConfig spark_history_server_config = 2 [(google.api.field_behavior) = OPTIONAL];
}
// Runtime information about workload execution.
message RuntimeInfo {
// Output only. Map of remote access endpoints (such as web interfaces and APIs) to their
// URIs.
map<string, string> endpoints = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. A URI pointing to the location of the stdout and stderr of the workload.
string output_uri = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. A URI pointing to the location of the diagnostics tarball.
string diagnostic_output_uri = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
}
// The cluster's GKE config.
message GkeClusterConfig {
// Optional. A target GKE cluster to deploy to. It must be in the same project and
// region as the Dataproc cluster (the GKE cluster can be zonal or regional).
// Format: 'projects/{project}/locations/{location}/clusters/{cluster_id}'
string gke_cluster_target = 2 [
(google.api.field_behavior) = OPTIONAL
];
// Optional. GKE NodePools where workloads will be scheduled. At least one node pool
// must be assigned the 'default' role. Each role can be given to only a
// single NodePoolTarget. All NodePools must have the same location settings.
// If a nodePoolTarget is not specified, Dataproc constructs a default
// nodePoolTarget.
repeated GkeNodePoolTarget node_pool_target = 3 [(google.api.field_behavior) = OPTIONAL];
}
// The configuration for running the Dataproc cluster on Kubernetes.
message KubernetesClusterConfig {
// Optional. A namespace within the Kubernetes cluster to deploy into. If this namespace
// does not exist, it is created. If it exists, Dataproc
// verifies that another Dataproc VirtualCluster is not installed
// into it. If not specified, the name of the Dataproc Cluster is used.
string kubernetes_namespace = 1 [(google.api.field_behavior) = OPTIONAL];
oneof config {
// Required. The configuration for running the Dataproc cluster on GKE.
GkeClusterConfig gke_cluster_config = 2 [(google.api.field_behavior) = REQUIRED];
}
// Optional. The software configuration for this Dataproc cluster running on Kubernetes.
KubernetesSoftwareConfig kubernetes_software_config = 3 [(google.api.field_behavior) = OPTIONAL];
}
// The software configuration for this Dataproc cluster running on Kubernetes.
message KubernetesSoftwareConfig {
// The components that should be installed in this Dataproc cluster. The key
// must be a string from the KubernetesComponent enumeration. The value is
// the version of the software to be installed.
// At least one entry must be specified.
map<string, string> component_version = 1;
// The properties to set on daemon config files.
//
// Property keys are specified in `prefix:property` format, for example
// `spark:spark.kubernetes.container.image`. The following are supported
// prefixes and their mappings:
//
// * spark: `spark-defaults.conf`
//
// For more information, see [Cluster
// properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).
map<string, string> properties = 2;
}
// GKE NodePools that Dataproc workloads run on.
message GkeNodePoolTarget {
// `Role` specifies whose tasks will run on the NodePool. The roles can be
// specific to workloads. Exactly one GkeNodePoolTarget within the
// VirtualCluster must have 'default' role, which is used to run all workloads
// that are not associated with a NodePool.
enum Role {
// Role is unspecified.
ROLE_UNSPECIFIED = 0;
// Any roles that are not directly assigned to a NodePool run on the
// `default` role's NodePool.
DEFAULT = 1;
// Run controllers and webhooks.
CONTROLLER = 2;
// Run spark driver.
SPARK_DRIVER = 3;
// Run spark executors.
SPARK_EXECUTOR = 4;
}
// Required. The target GKE NodePool.
// Format:
// 'projects/{project}/locations/{location}/clusters/{cluster}/nodePools/{node_pool}'
string node_pool = 1 [
(google.api.field_behavior) = REQUIRED
];
// Required. The types of role for a GKE NodePool
repeated Role roles = 2 [(google.api.field_behavior) = REQUIRED];
// Optional. The configuration for the GKE NodePool.
//
// If specified, Dataproc attempts to create a NodePool with the
// specified shape. If one with the same name already exists, it is
// verified against all specified fields. If a field differs, the
// virtual cluster creation will fail.
//
// If omitted, any NodePool with the specified name is used. If a
// NodePool with the specified name does not exist, Dataproc create a NodePool
// with default values.
GkeNodePoolConfig node_pool_config = 3 [(google.api.field_behavior) = OPTIONAL];
}
// The configuration of a GKE NodePool used by a [Dataproc-on-GKE
// cluster](https://cloud.google.com/dataproc/docs/concepts/jobs/dataproc-gke#create-a-dataproc-on-gke-cluster).
message GkeNodePoolConfig {
// Parameters that describe cluster nodes.
message GkeNodeConfig {
// Optional. The name of a Compute Engine [machine
// type](https://cloud.google.com/compute/docs/machine-types).
string machine_type = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Whether the nodes are created as [preemptible VM
// instances](https://cloud.google.com/compute/docs/instances/preemptible).
bool preemptible = 10 [(google.api.field_behavior) = OPTIONAL];
// Optional. The number of local SSD disks to attach to the node, which is limited by
// the maximum number of disks allowable per zone (see [Adding Local
// SSDs](https://cloud.google.com/compute/docs/disks/local-ssd)).
int32 local_ssd_count = 7 [(google.api.field_behavior) = OPTIONAL];
// Optional. A list of [hardware
// accelerators](https://cloud.google.com/compute/docs/gpus) to attach to
// each node.
repeated GkeNodePoolAcceleratorConfig accelerators = 11 [(google.api.field_behavior) = OPTIONAL];
// Optional. [Minimum CPU
// platform](https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform)
// to be used by this instance. The instance may be scheduled on the
// specified or a newer CPU platform. Specify the friendly names of CPU
// platforms, such as "Intel Haswell"` or Intel Sandy Bridge".
string min_cpu_platform = 13 [(google.api.field_behavior) = OPTIONAL];
}
// A GkeNodeConfigAcceleratorConfig represents a Hardware Accelerator request
// for a NodePool.
message GkeNodePoolAcceleratorConfig {
// The number of accelerator cards exposed to an instance.
int64 accelerator_count = 1;
// The accelerator type resource namename (see GPUs on Compute Engine).
string accelerator_type = 2;
}
// GkeNodePoolAutoscaling contains information the cluster autoscaler needs to
// adjust the size of the node pool to the current cluster usage.
message GkeNodePoolAutoscalingConfig {
// The minimum number of nodes in the NodePool. Must be >= 0 and <=
// max_node_count.
int32 min_node_count = 2;
// The maximum number of nodes in the NodePool. Must be >= min_node_count.
// **Note:** Quota must be sufficient to scale up the cluster.
int32 max_node_count = 3;
}
// Optional. The node pool configuration.
GkeNodeConfig config = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. The list of Compute Engine
// [zones](https://cloud.google.com/compute/docs/zones#available) where
// NodePool's nodes will be located.
//
// **Note:** Currently, only one zone may be specified.
//
// If a location is not specified during NodePool creation, Dataproc will
// choose a location.
repeated string locations = 13 [(google.api.field_behavior) = OPTIONAL];
// Optional. The autoscaler configuration for this NodePool. The autoscaler is enabled
// only when a valid configuration is present.
GkeNodePoolAutoscalingConfig autoscaling = 4 [(google.api.field_behavior) = OPTIONAL];
}
// Cluster components that can be activated.
enum Component {
// Unspecified component. Specifying this will cause Cluster creation to fail.
COMPONENT_UNSPECIFIED = 0;
// The Anaconda python distribution. The Anaconda component is not supported
// in the Dataproc
// <a
// href="/dataproc/docs/concepts/versioning/dataproc-release-2.0">2.0
// image</a>. The 2.0 image is pre-installed with Miniconda.
ANACONDA = 5;
// Docker
DOCKER = 13;
// The Druid query engine. (alpha)
DRUID = 9;
// Flink
FLINK = 14;
// HBase. (beta)
HBASE = 11;
// The Hive Web HCatalog (the REST service for accessing HCatalog).
HIVE_WEBHCAT = 3;
// The Jupyter Notebook.
JUPYTER = 1;
// The Presto query engine.
PRESTO = 6;
// The Ranger service.
RANGER = 12;
// The Solr service.
SOLR = 10;
// The Zeppelin notebook.
ZEPPELIN = 4;
// The Zookeeper service.
ZOOKEEPER = 8;
}
// Actions in response to failure of a resource associated with a cluster.
enum FailureAction {
// When FailureAction is unspecified, failure action defaults to NO_ACTION.
FAILURE_ACTION_UNSPECIFIED = 0;
// Take no action on failure to create a cluster resource. NO_ACTION is the
// default.
NO_ACTION = 1;
// Delete the failed cluster resource.
DELETE = 2;
}