data-plane-api 0.1.1

Envoy xDS protobuf and gRPC definitions
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.dataflow.v1beta3;

import "google/api/field_behavior.proto";
import "google/protobuf/any.proto";
import "google/protobuf/struct.proto";

option csharp_namespace = "Google.Cloud.Dataflow.V1Beta3";
option go_package = "google.golang.org/genproto/googleapis/dataflow/v1beta3;dataflow";
option java_multiple_files = true;
option java_outer_classname = "EnvironmentProto";
option java_package = "com.google.dataflow.v1beta3";
option php_namespace = "Google\\Cloud\\Dataflow\\V1beta3";
option ruby_package = "Google::Cloud::Dataflow::V1beta3";

// Describes the environment in which a Dataflow Job runs.
message Environment {
  // The prefix of the resources the system should use for temporary
  // storage.  The system will append the suffix "/temp-{JOBNAME} to
  // this resource prefix, where {JOBNAME} is the value of the
  // job_name field.  The resulting bucket and object prefix is used
  // as the prefix of the resources used to store temporary data
  // needed during the job execution.  NOTE: This will override the
  // value in taskrunner_settings.
  // The supported resource type is:
  //
  // Google Cloud Storage:
  //
  //   storage.googleapis.com/{bucket}/{object}
  //   bucket.storage.googleapis.com/{object}
  string temp_storage_prefix = 1;

  // The type of cluster manager API to use.  If unknown or
  // unspecified, the service will attempt to choose a reasonable
  // default.  This should be in the form of the API service name,
  // e.g. "compute.googleapis.com".
  string cluster_manager_api_service = 2;

  // The list of experiments to enable. This field should be used for SDK
  // related experiments and not for service related experiments. The proper
  // field for service related experiments is service_options.
  repeated string experiments = 3;

  // The list of service options to enable. This field should be used for
  // service related experiments only. These experiments, when graduating to GA,
  // should be replaced by dedicated fields or become default (i.e. always on).
  repeated string service_options = 16;

  // If set, contains the Cloud KMS key identifier used to encrypt data
  // at rest, AKA a Customer Managed Encryption Key (CMEK).
  //
  // Format:
  //   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
  string service_kms_key_name = 12;

  // The worker pools. At least one "harness" worker pool must be
  // specified in order for the job to have workers.
  repeated WorkerPool worker_pools = 4;

  // A description of the process that generated the request.
  google.protobuf.Struct user_agent = 5;

  // A structure describing which components and their versions of the service
  // are required in order to run the job.
  google.protobuf.Struct version = 6;

  // The dataset for the current project where various workflow
  // related tables are stored.
  //
  // The supported resource type is:
  //
  // Google BigQuery:
  //   bigquery.googleapis.com/{dataset}
  string dataset = 7;

  // The Cloud Dataflow SDK pipeline options specified by the user. These
  // options are passed through the service and are used to recreate the
  // SDK pipeline options on the worker in a language agnostic and platform
  // independent way.
  google.protobuf.Struct sdk_pipeline_options = 8;

  // Experimental settings.
  google.protobuf.Any internal_experiments = 9;

  // Identity to run virtual machines as. Defaults to the default account.
  string service_account_email = 10;

  // Which Flexible Resource Scheduling mode to run in.
  FlexResourceSchedulingGoal flex_resource_scheduling_goal = 11;

  // The Compute Engine region
  // (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
  // which worker processing should occur, e.g. "us-west1". Mutually exclusive
  // with worker_zone. If neither worker_region nor worker_zone is specified,
  // default to the control plane's region.
  string worker_region = 13;

  // The Compute Engine zone
  // (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in
  // which worker processing should occur, e.g. "us-west1-a". Mutually exclusive
  // with worker_region. If neither worker_region nor worker_zone is specified,
  // a zone in the control plane's region is chosen based on available capacity.
  string worker_zone = 14;

  // Output only. The shuffle mode used for the job.
  ShuffleMode shuffle_mode = 15 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Any debugging options to be supplied to the job.
  DebugOptions debug_options = 17;
}

// The packages that must be installed in order for a worker to run the
// steps of the Cloud Dataflow job that will be assigned to its worker
// pool.
//
// This is the mechanism by which the Cloud Dataflow SDK causes code to
// be loaded onto the workers. For example, the Cloud Dataflow Java SDK
// might use this to install jars containing the user's code and all of the
// various dependencies (libraries, data files, etc.) required in order
// for that code to run.
message Package {
  // The name of the package.
  string name = 1;

  // The resource to read the package from. The supported resource type is:
  //
  // Google Cloud Storage:
  //
  //   storage.googleapis.com/{bucket}
  //   bucket.storage.googleapis.com/
  string location = 2;
}

// Specifies the processing model used by a
// [google.dataflow.v1beta3.Job], which determines the way the Job is
// managed by the Cloud Dataflow service (how workers are scheduled, how
// inputs are sharded, etc).
enum JobType {
  // The type of the job is unspecified, or unknown.
  JOB_TYPE_UNKNOWN = 0;

  // A batch job with a well-defined end point: data is read, data is
  // processed, data is written, and the job is done.
  JOB_TYPE_BATCH = 1;

  // A continuously streaming job with no end: data is read,
  // processed, and written continuously.
  JOB_TYPE_STREAMING = 2;
}

// Specifies the resource to optimize for in Flexible Resource Scheduling.
enum FlexResourceSchedulingGoal {
  // Run in the default mode.
  FLEXRS_UNSPECIFIED = 0;

  // Optimize for lower execution time.
  FLEXRS_SPEED_OPTIMIZED = 1;

  // Optimize for lower cost.
  FLEXRS_COST_OPTIMIZED = 2;
}

// Describes the data disk used by a workflow job.
message Disk {
  // Size of disk in GB.  If zero or unspecified, the service will
  // attempt to choose a reasonable default.
  int32 size_gb = 1;

  // Disk storage type, as defined by Google Compute Engine.  This
  // must be a disk type appropriate to the project and zone in which
  // the workers will run.  If unknown or unspecified, the service
  // will attempt to choose a reasonable default.
  //
  // For example, the standard persistent disk type is a resource name
  // typically ending in "pd-standard".  If SSD persistent disks are
  // available, the resource name typically ends with "pd-ssd".  The
  // actual valid values are defined the Google Compute Engine API,
  // not by the Cloud Dataflow API; consult the Google Compute Engine
  // documentation for more information about determining the set of
  // available disk types for a particular project and zone.
  //
  // Google Compute Engine Disk types are local to a particular
  // project in a particular zone, and so the resource name will
  // typically look something like this:
  //
  // compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
  string disk_type = 2;

  // Directory in a VM where disk is mounted.
  string mount_point = 3;
}

// Provides data to pass through to the worker harness.
message WorkerSettings {
  // The base URL for accessing Google Cloud APIs.
  //
  // When workers access Google Cloud APIs, they logically do so via
  // relative URLs.  If this field is specified, it supplies the base
  // URL to use for resolving these relative URLs.  The normative
  // algorithm used is defined by RFC 1808, "Relative Uniform Resource
  // Locators".
  //
  // If not specified, the default value is "http://www.googleapis.com/"
  string base_url = 1;

  // Whether to send work progress updates to the service.
  bool reporting_enabled = 2;

  // The Cloud Dataflow service path relative to the root URL, for example,
  // "dataflow/v1b3/projects".
  string service_path = 3;

  // The Shuffle service path relative to the root URL, for example,
  // "shuffle/v1beta1".
  string shuffle_service_path = 4;

  // The ID of the worker running this pipeline.
  string worker_id = 5;

  // The prefix of the resources the system should use for temporary
  // storage.
  //
  // The supported resource type is:
  //
  // Google Cloud Storage:
  //
  //   storage.googleapis.com/{bucket}/{object}
  //   bucket.storage.googleapis.com/{object}
  string temp_storage_prefix = 6;
}

// Taskrunner configuration settings.
message TaskRunnerSettings {
  // The UNIX user ID on the worker VM to use for tasks launched by
  // taskrunner; e.g. "root".
  string task_user = 1;

  // The UNIX group ID on the worker VM to use for tasks launched by
  // taskrunner; e.g. "wheel".
  string task_group = 2;

  // The OAuth2 scopes to be requested by the taskrunner in order to
  // access the Cloud Dataflow API.
  repeated string oauth_scopes = 3;

  // The base URL for the taskrunner to use when accessing Google Cloud APIs.
  //
  // When workers access Google Cloud APIs, they logically do so via
  // relative URLs.  If this field is specified, it supplies the base
  // URL to use for resolving these relative URLs.  The normative
  // algorithm used is defined by RFC 1808, "Relative Uniform Resource
  // Locators".
  //
  // If not specified, the default value is "http://www.googleapis.com/"
  string base_url = 4;

  // The API version of endpoint, e.g. "v1b3"
  string dataflow_api_version = 5;

  // The settings to pass to the parallel worker harness.
  WorkerSettings parallel_worker_settings = 6;

  // The location on the worker for task-specific subdirectories.
  string base_task_dir = 7;

  // Whether to continue taskrunner if an exception is hit.
  bool continue_on_exception = 8;

  // Whether to send taskrunner log info to Google Compute Engine VM serial
  // console.
  bool log_to_serialconsole = 9;

  // Whether to also send taskrunner log info to stderr.
  bool alsologtostderr = 10;

  // Indicates where to put logs.  If this is not specified, the logs
  // will not be uploaded.
  //
  // The supported resource type is:
  //
  // Google Cloud Storage:
  //   storage.googleapis.com/{bucket}/{object}
  //   bucket.storage.googleapis.com/{object}
  string log_upload_location = 11;

  // The directory on the VM to store logs.
  string log_dir = 12;

  // The prefix of the resources the taskrunner should use for
  // temporary storage.
  //
  // The supported resource type is:
  //
  // Google Cloud Storage:
  //   storage.googleapis.com/{bucket}/{object}
  //   bucket.storage.googleapis.com/{object}
  string temp_storage_prefix = 13;

  // The command to launch the worker harness.
  string harness_command = 14;

  // The file to store the workflow in.
  string workflow_file_name = 15;

  // The file to store preprocessing commands in.
  string commandlines_file_name = 16;

  // The ID string of the VM.
  string vm_id = 17;

  // The suggested backend language.
  string language_hint = 18;

  // The streaming worker main class name.
  string streaming_worker_main_class = 19;
}

// Specifies what happens to a resource when a Cloud Dataflow
// [google.dataflow.v1beta3.Job][google.dataflow.v1beta3.Job] has completed.
enum TeardownPolicy {
  // The teardown policy isn't specified, or is unknown.
  TEARDOWN_POLICY_UNKNOWN = 0;

  // Always teardown the resource.
  TEARDOWN_ALWAYS = 1;

  // Teardown the resource on success. This is useful for debugging
  // failures.
  TEARDOWN_ON_SUCCESS = 2;

  // Never teardown the resource. This is useful for debugging and
  // development.
  TEARDOWN_NEVER = 3;
}

// The default set of packages to be staged on a pool of workers.
enum DefaultPackageSet {
  // The default set of packages to stage is unknown, or unspecified.
  DEFAULT_PACKAGE_SET_UNKNOWN = 0;

  // Indicates that no packages should be staged at the worker unless
  // explicitly specified by the job.
  DEFAULT_PACKAGE_SET_NONE = 1;

  // Stage packages typically useful to workers written in Java.
  DEFAULT_PACKAGE_SET_JAVA = 2;

  // Stage packages typically useful to workers written in Python.
  DEFAULT_PACKAGE_SET_PYTHON = 3;
}

// Specifies the algorithm used to determine the number of worker
// processes to run at any given point in time, based on the amount of
// data left to process, the number of workers, and how quickly
// existing workers are processing data.
enum AutoscalingAlgorithm {
  // The algorithm is unknown, or unspecified.
  AUTOSCALING_ALGORITHM_UNKNOWN = 0;

  // Disable autoscaling.
  AUTOSCALING_ALGORITHM_NONE = 1;

  // Increase worker count over time to reduce job execution time.
  AUTOSCALING_ALGORITHM_BASIC = 2;
}

// Settings for WorkerPool autoscaling.
message AutoscalingSettings {
  // The algorithm to use for autoscaling.
  AutoscalingAlgorithm algorithm = 1;

  // The maximum number of workers to cap scaling at.
  int32 max_num_workers = 2;
}

// Specifies how IP addresses should be allocated to the worker machines.
enum WorkerIPAddressConfiguration {
  // The configuration is unknown, or unspecified.
  WORKER_IP_UNSPECIFIED = 0;

  // Workers should have public IP addresses.
  WORKER_IP_PUBLIC = 1;

  // Workers should have private IP addresses.
  WORKER_IP_PRIVATE = 2;
}

// Defines a SDK harness container for executing Dataflow pipelines.
message SdkHarnessContainerImage {
  // A docker container image that resides in Google Container Registry.
  string container_image = 1;

  // If true, recommends the Dataflow service to use only one core per SDK
  // container instance with this image. If false (or unset) recommends using
  // more than one core per SDK container instance with this image for
  // efficiency. Note that Dataflow service may choose to override this property
  // if needed.
  bool use_single_core_per_container = 2;

  // Environment ID for the Beam runner API proto Environment that corresponds
  // to the current SDK Harness.
  string environment_id = 3;

  // The set of capabilities enumerated in the above Environment proto. See also
  // https://github.com/apache/beam/blob/master/model/pipeline/src/main/proto/beam_runner_api.proto
  repeated string capabilities = 4;
}

// Describes one particular pool of Cloud Dataflow workers to be
// instantiated by the Cloud Dataflow service in order to perform the
// computations required by a job.  Note that a workflow job may use
// multiple pools, in order to match the various computational
// requirements of the various stages of the job.
message WorkerPool {
  // The kind of the worker pool; currently only `harness` and `shuffle`
  // are supported.
  string kind = 1;

  // Number of Google Compute Engine workers in this pool needed to
  // execute the job.  If zero or unspecified, the service will
  // attempt to choose a reasonable default.
  int32 num_workers = 2;

  // Packages to be installed on workers.
  repeated Package packages = 3;

  // The default package set to install.  This allows the service to
  // select a default set of packages which are useful to worker
  // harnesses written in a particular language.
  DefaultPackageSet default_package_set = 4;

  // Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
  // service will attempt to choose a reasonable default.
  string machine_type = 5;

  // Sets the policy for determining when to turndown worker pool.
  // Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
  // `TEARDOWN_NEVER`.
  // `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
  // the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
  // if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
  // down.
  //
  // If the workers are not torn down by the service, they will
  // continue to run and use Google Compute Engine VM resources in the
  // user's project until they are explicitly terminated by the user.
  // Because of this, Google recommends using the `TEARDOWN_ALWAYS`
  // policy except for small, manually supervised test jobs.
  //
  // If unknown or unspecified, the service will attempt to choose a reasonable
  // default.
  TeardownPolicy teardown_policy = 6;

  // Size of root disk for VMs, in GB.  If zero or unspecified, the service will
  // attempt to choose a reasonable default.
  int32 disk_size_gb = 7;

  // Type of root disk for VMs.  If empty or unspecified, the service will
  // attempt to choose a reasonable default.
  string disk_type = 16;

  // Fully qualified source image for disks.
  string disk_source_image = 8;

  // Zone to run the worker pools in.  If empty or unspecified, the service
  // will attempt to choose a reasonable default.
  string zone = 9;

  // Settings passed through to Google Compute Engine workers when
  // using the standard Dataflow task runner.  Users should ignore
  // this field.
  TaskRunnerSettings taskrunner_settings = 10;

  // The action to take on host maintenance, as defined by the Google
  // Compute Engine API.
  string on_host_maintenance = 11;

  // Data disks that are used by a VM in this workflow.
  repeated Disk data_disks = 12;

  // Metadata to set on the Google Compute Engine VMs.
  map<string, string> metadata = 13;

  // Settings for autoscaling of this WorkerPool.
  AutoscalingSettings autoscaling_settings = 14;

  // Extra arguments for this worker pool.
  google.protobuf.Any pool_args = 15;

  // Network to which VMs will be assigned.  If empty or unspecified,
  // the service will use the network "default".
  string network = 17;

  // Subnetwork to which VMs will be assigned, if desired.  Expected to be of
  // the form "regions/REGION/subnetworks/SUBNETWORK".
  string subnetwork = 19;

  // Required. Docker container image that executes the Cloud Dataflow worker
  // harness, residing in Google Container Registry.
  //
  // Deprecated for the Fn API path. Use sdk_harness_container_images instead.
  string worker_harness_container_image = 18;

  // The number of threads per worker harness. If empty or unspecified, the
  // service will choose a number of threads (according to the number of cores
  // on the selected machine type for batch, or 1 by convention for streaming).
  int32 num_threads_per_worker = 20;

  // Configuration for VM IPs.
  WorkerIPAddressConfiguration ip_configuration = 21;

  // Set of SDK harness containers needed to execute this pipeline. This will
  // only be set in the Fn API path. For non-cross-language pipelines this
  // should have only one entry. Cross-language pipelines will have two or more
  // entries.
  repeated SdkHarnessContainerImage sdk_harness_container_images = 22;
}

// Specifies the shuffle mode used by a
// [google.dataflow.v1beta3.Job], which determines the approach data is shuffled
// during processing. More details in:
// https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#dataflow-shuffle
enum ShuffleMode {
  // Shuffle mode information is not available.
  SHUFFLE_MODE_UNSPECIFIED = 0;

  // Shuffle is done on the worker VMs.
  VM_BASED = 1;

  // Shuffle is done on the service side.
  SERVICE_BASED = 2;
}

// Describes any options that have an effect on the debugging of pipelines.
message DebugOptions {
  // When true, enables the logging of the literal hot key to the user's Cloud
  // Logging.
  bool enable_hot_key_logging = 1;
}