data-plane-api 0.1.1

// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.dataflow.v1beta3;

option csharp_namespace = "Google.Cloud.Dataflow.V1Beta3";
option go_package = "google.golang.org/genproto/googleapis/dataflow/v1beta3;dataflow";
option java_multiple_files = true;
option java_outer_classname = "StreamingProto";
option java_package = "com.google.dataflow.v1beta3";
option php_namespace = "Google\\Cloud\\Dataflow\\V1beta3";
option ruby_package = "Google::Cloud::Dataflow::V1beta3";

// Global topology of the streaming Dataflow job, including all
// computations and their sharded locations.
message TopologyConfig {
  // The computations associated with a streaming Dataflow job.
  repeated ComputationTopology computations = 1;

  // The disks assigned to a streaming Dataflow job.
  repeated DataDiskAssignment data_disk_assignments = 2;

  // Maps user stage names to stable computation names.
  map<string, string> user_stage_to_computation_name_map = 3;

  // The size (in bits) of keys that will be assigned to source messages.
  int32 forwarding_key_bits = 4;

  // Version number for persistent state.
  int32 persistent_state_version = 5;
}

// Identifies a pubsub location to use for transferring data into or
// out of a streaming Dataflow job.
message PubsubLocation {
  // A pubsub topic, in the form of
  // "pubsub.googleapis.com/topics/<project-id>/<topic-name>"
  string topic = 1;

  // A pubsub subscription, in the form of
  // "pubsub.googleapis.com/subscriptions/<project-id>/<subscription-name>"
  string subscription = 2;

  // If set, contains a pubsub label from which to extract record timestamps.
  // If left empty, record timestamps will be generated upon arrival.
  string timestamp_label = 3;

  // If set, contains a pubsub label from which to extract record ids.
  // If left empty, record deduplication will be strictly best effort.
  string id_label = 4;

  // Indicates whether the pipeline allows late-arriving data.
  bool drop_late_data = 5;

  // If set, specifies the pubsub subscription that will be used for tracking
  // custom time timestamps for watermark estimation.
  string tracking_subscription = 6;

  // If true, then the client has requested to get pubsub attributes.
  bool with_attributes = 7;
}

// Identifies the location of a streaming computation stage, for
// stage-to-stage communication.
message StreamingStageLocation {
  // Identifies the particular stream within the streaming Dataflow
  // job.
  string stream_id = 1;
}

// Identifies the location of a streaming side input.
message StreamingSideInputLocation {
  // Identifies the particular side input within the streaming Dataflow job.
  string tag = 1;

  // Identifies the state family where this side input is stored.
  string state_family = 2;
}

// Identifies the location of a custom souce.
message CustomSourceLocation {
  // Whether this source is stateful.
  bool stateful = 1;
}

// Describes a stream of data, either as input to be processed or as
// output of a streaming Dataflow job.
message StreamLocation {
  // A specification of a stream's location.
  oneof location {
    // The stream is part of another computation within the current
    // streaming Dataflow job.
    StreamingStageLocation streaming_stage_location = 1;

    // The stream is a pubsub stream.
    PubsubLocation pubsub_location = 2;

    // The stream is a streaming side input.
    StreamingSideInputLocation side_input_location = 3;

    // The stream is a custom source.
    CustomSourceLocation custom_source_location = 4;
  }
}

// State family configuration.
message StateFamilyConfig {
  // The state family value.
  string state_family = 1;

  // If true, this family corresponds to a read operation.
  bool is_read = 2;
}

// All configuration data for a particular Computation.
message ComputationTopology {
  // The system stage name.
  string system_stage_name = 1;

  // The ID of the computation.
  string computation_id = 5;

  // The key ranges processed by the computation.
  repeated KeyRangeLocation key_ranges = 2;

  // The inputs to the computation.
  repeated StreamLocation inputs = 3;

  // The outputs from the computation.
  repeated StreamLocation outputs = 4;

  // The state family values.
  repeated StateFamilyConfig state_families = 7;
}

// Location information for a specific key-range of a sharded computation.
// Currently we only support UTF-8 character splits to simplify encoding into
// JSON.
message KeyRangeLocation {
  // The start (inclusive) of the key range.
  string start = 1;

  // The end (exclusive) of the key range.
  string end = 2;

  // The physical location of this range assignment to be used for
  // streaming computation cross-worker message delivery.
  string delivery_endpoint = 3;

  // The name of the data disk where data for this range is stored.
  // This name is local to the Google Cloud Platform project and uniquely
  // identifies the disk within that project, for example
  // "myproject-1014-104817-4c2-harness-0-disk-1".
  string data_disk = 5;

  // DEPRECATED. The location of the persistent state for this range, as a
  // persistent directory in the worker local filesystem.
  string deprecated_persistent_directory = 4 [deprecated = true];
}

// Describes mounted data disk.
message MountedDataDisk {
  // The name of the data disk.
  // This name is local to the Google Cloud Platform project and uniquely
  // identifies the disk within that project, for example
  // "myproject-1014-104817-4c2-harness-0-disk-1".
  string data_disk = 1;
}

// Data disk assignment for a given VM instance.
message DataDiskAssignment {
  // VM instance name the data disks mounted to, for example
  // "myproject-1014-104817-4c2-harness-0".
  string vm_instance = 1;

  // Mounted data disks. The order is important a data disk's 0-based index in
  // this list defines which persistent directory the disk is mounted to, for
  // example the list of { "myproject-1014-104817-4c2-harness-0-disk-0" },
  // { "myproject-1014-104817-4c2-harness-0-disk-1" }.
  repeated string data_disks = 2;
}

// Data disk assignment information for a specific key-range of a sharded
// computation.
// Currently we only support UTF-8 character splits to simplify encoding into
// JSON.
message KeyRangeDataDiskAssignment {
  // The start (inclusive) of the key range.
  string start = 1;

  // The end (exclusive) of the key range.
  string end = 2;

  // The name of the data disk where data for this range is stored.
  // This name is local to the Google Cloud Platform project and uniquely
  // identifies the disk within that project, for example
  // "myproject-1014-104817-4c2-harness-0-disk-1".
  string data_disk = 3;
}

// Describes full or partial data disk assignment information of the computation
// ranges.
message StreamingComputationRanges {
  // The ID of the computation.
  string computation_id = 1;

  // Data disk assignments for ranges from this computation.
  repeated KeyRangeDataDiskAssignment range_assignments = 2;
}

// Streaming appliance snapshot configuration.
message StreamingApplianceSnapshotConfig {
  // If set, indicates the snapshot id for the snapshot being performed.
  string snapshot_id = 1;

  // Indicates which endpoint is used to import appliance state.
  string import_state_endpoint = 2;
}