substrait-validator 0.1.4

// SPDX-License-Identifier: Apache-2.0
syntax = "proto3";

package substrait;

import "google/protobuf/any.proto";
import "substrait/extensions/extensions.proto";
import "substrait/type.proto";

option csharp_namespace = "Substrait.Protobuf";
option go_package = "github.com/substrait-io/substrait-go/proto";
option java_multiple_files = true;
option java_package = "io.substrait.proto";

// Common fields for all relational operators
message RelCommon {
  oneof emit_kind {
    // The underlying relation is output as is (no reordering or projection of columns)
    Direct direct = 1;
    // Allows to control for order and inclusion of fields
    Emit emit = 2;
  }

  Hint hint = 3;
  substrait.extensions.AdvancedExtension advanced_extension = 4;

  // Direct indicates no change on presence and ordering of fields in the output
  message Direct {}

  // Remap which fields are output and in which order
  message Emit {
    repeated int32 output_mapping = 1;
  }

  // Changes to the operation that can influence efficiency/performance but
  // should not impact correctness.
  message Hint {
    Stats stats = 1;
    RuntimeConstraint constraint = 2;

    // Name (alias) for this relation. Can be used for e.g. qualifying the relation (see e.g.
    // Spark's SubqueryAlias), or debugging.
    string alias = 3;
    // Assigns alternative output field names for any relation.  Equivalent to the names field
    // in RelRoot but applies to the output of the relation this RelCommon is attached to.
    repeated string output_names = 4;

    substrait.extensions.AdvancedExtension advanced_extension = 10;

    // The statistics related to a hint (physical properties of records)
    message Stats {
      double row_count = 1;
      double record_size = 2;
      substrait.extensions.AdvancedExtension advanced_extension = 10;
    }

    message RuntimeConstraint {
      // TODO: nodes, cpu threads/%, memory, iops, etc.

      substrait.extensions.AdvancedExtension advanced_extension = 10;
    }
  }
}

// The scan operator of base data (physical or virtual), including filtering and projection.
message ReadRel {
  RelCommon common = 1;
  NamedStruct base_schema = 2;
  Expression filter = 3;
  Expression best_effort_filter = 11;
  Expression.MaskExpression projection = 4;
  substrait.extensions.AdvancedExtension advanced_extension = 10;

  // Definition of which type of scan operation is to be performed
  oneof read_type {
    VirtualTable virtual_table = 5;
    LocalFiles local_files = 6;
    NamedTable named_table = 7;
    ExtensionTable extension_table = 8;
  }

  // A base table. The list of string is used to represent namespacing (e.g., mydb.mytable).
  // This assumes shared catalog between systems exchanging a message.
  message NamedTable {
    repeated string names = 1;
    substrait.extensions.AdvancedExtension advanced_extension = 10;
  }

  // A table composed of literals.
  message VirtualTable {
    repeated Expression.Literal.Struct values = 1;
  }

  // A stub type that can be used to extend/introduce new table types outside
  // the specification.
  message ExtensionTable {
    google.protobuf.Any detail = 1;
  }

  // Represents a list of files in input of a scan operation
  message LocalFiles {
    repeated FileOrFiles items = 1;
    substrait.extensions.AdvancedExtension advanced_extension = 10;

    // Many files consist of indivisible chunks (e.g. parquet row groups
    // or CSV rows).  If a slice partially selects an indivisible chunk
    // then the consumer should employ some rule to decide which slice to
    // include the chunk in (e.g. include it in the slice that contains
    // the midpoint of the chunk)
    message FileOrFiles {
      oneof path_type {
        // A URI that can refer to either a single folder or a single file
        string uri_path = 1;
        // A URI where the path portion is a glob expression that can
        // identify zero or more paths.
        // Consumers should support the POSIX syntax.  The recursive
        // globstar (**) may not be supported.
        string uri_path_glob = 2;
        // A URI that refers to a single file
        string uri_file = 3;
        // A URI that refers to a single folder
        string uri_folder = 4;
      }

      // Original file format enum, superseded by the file_format oneof.
      reserved 5;
      reserved "format";

      // The index of the partition this item belongs to
      uint64 partition_index = 6;

      // The start position in byte to read from this item
      uint64 start = 7;

      // The length in byte to read from this item
      uint64 length = 8;

      message ParquetReadOptions {}
      message ArrowReadOptions {}
      message OrcReadOptions {}
      message DwrfReadOptions {}
      message DelimiterSeparatedTextReadOptions {
        // Delimiter separated files may be compressed.  The reader should
        // autodetect this and decompress as needed.

        // The character(s) used to separate fields.  Common values are comma,
        // tab, and pipe.  Multiple characters are allowed.
        string field_delimiter = 1;
        // The maximum number of bytes to read from a single line.  If a line
        // exceeds this limit the resulting behavior is undefined.
        uint64 max_line_size = 2;
        // The character(s) used to quote strings.  Common values are single
        // and double quotation marks.
        string quote = 3;
        // The number of lines to skip at the beginning of the file.
        uint64 header_lines_to_skip = 4;
        // The character used to escape characters in strings.  Backslash is
        // a common value.  Note that a double quote mark can also be used as an
        // escape character but the external quotes should be removed first.
        string escape = 5;
        // If this value is encountered (including empty string), the resulting
        // value is null instead.  Leave unset to disable.  If this value is
        // provided, the effective schema of this file is comprised entirely of
        // nullable strings.  If not provided, the effective schema is instead
        // made up of non-nullable strings.
        optional string value_treated_as_null = 6;
      }

      // The format of the files along with options for reading those files.
      oneof file_format {
        ParquetReadOptions parquet = 9;
        ArrowReadOptions arrow = 10;
        OrcReadOptions orc = 11;
        google.protobuf.Any extension = 12;
        DwrfReadOptions dwrf = 13;
        DelimiterSeparatedTextReadOptions text = 14;
      }
    }
  }
}

// This operator allows to represent calculated expressions of fields (e.g., a+b). Direct/Emit are used to represent classical relational projections
message ProjectRel {
  RelCommon common = 1;
  Rel input = 2;
  repeated Expression expressions = 3;
  substrait.extensions.AdvancedExtension advanced_extension = 10;
}

// The binary JOIN relational operator left-join-right, including various join types, a join condition and post_join_filter expression
message JoinRel {
  RelCommon common = 1;
  Rel left = 2;
  Rel right = 3;
  Expression expression = 4;
  Expression post_join_filter = 5;

  JoinType type = 6;

  enum JoinType {
    JOIN_TYPE_UNSPECIFIED = 0;
    JOIN_TYPE_INNER = 1;
    JOIN_TYPE_OUTER = 2;
    JOIN_TYPE_LEFT = 3;
    JOIN_TYPE_RIGHT = 4;
    JOIN_TYPE_LEFT_SEMI = 5;
    JOIN_TYPE_LEFT_ANTI = 6;
    JOIN_TYPE_LEFT_SINGLE = 7;
    JOIN_TYPE_RIGHT_SEMI = 8;
    JOIN_TYPE_RIGHT_ANTI = 9;
    JOIN_TYPE_RIGHT_SINGLE = 10;
    JOIN_TYPE_LEFT_MARK = 11;
    JOIN_TYPE_RIGHT_MARK = 12;
  }

  substrait.extensions.AdvancedExtension advanced_extension = 10;
}

// Cartesian product relational operator of two tables (left and right)
message CrossRel {
  RelCommon common = 1;
  Rel left = 2;
  Rel right = 3;

  substrait.extensions.AdvancedExtension advanced_extension = 10;
}

// The relational operator representing LIMIT/OFFSET or TOP type semantics.
message FetchRel {
  RelCommon common = 1;
  Rel input = 2;
  // the offset expressed in number of records
  int64 offset = 3;
  // the amount of records to return
  // use -1 to signal that ALL records should be returned
  int64 count = 4;
  substrait.extensions.AdvancedExtension advanced_extension = 10;
}

// The relational operator representing a GROUP BY Aggregate
message AggregateRel {
  RelCommon common = 1;

  // Input of the aggregation
  Rel input = 2;

  // A list of zero or more grouping sets that the aggregation measures should
  // be calculated for. There must be at least one grouping set if there are no
  // measures (but it can be the empty grouping set).
  repeated Grouping groupings = 3;

  // A list of one or more aggregate expressions along with an optional filter.
  // Required if there are no groupings.
  repeated Measure measures = 4;

  // A list of zero or more grouping expressions that grouping sets (i.e.,
  // `Grouping` messages in the `groupings` field) can reference. Each
  // expression in this list must be referred to by at least one
  // `Grouping.expression_references`.
  repeated Expression grouping_expressions = 5;

  substrait.extensions.AdvancedExtension advanced_extension = 10;

  message Grouping {
    // Deprecated in favor of `expression_references` below.
    repeated Expression grouping_expressions = 1 [deprecated = true];

    // A list of zero or more references to grouping expressions, i.e., indices
    // into the `grouping_expression` list.
    repeated uint32 expression_references = 2;
  }

  message Measure {
    AggregateFunction measure = 1;

    // An optional boolean expression that acts to filter which records are
    // included in the measure. True means include this record for calculation
    // within the measure.
    // Helps to support SUM(<c>) FILTER(WHERE...) syntax without masking opportunities for optimization
    Expression filter = 2;
  }
}

// ConsistentPartitionWindowRel provides the ability to perform calculations across sets of rows
// that are related to the current query row. It can be used to execute window functions where
// all the windows share the same partitioning and ordering.
message ConsistentPartitionWindowRel {
  RelCommon common = 1;
  Rel input = 2;
  repeated WindowRelFunction window_functions = 3;
  repeated Expression partition_expressions = 4;
  repeated SortField sorts = 5;

  substrait.extensions.AdvancedExtension advanced_extension = 10;

  // This message mirrors the `WindowFunction` message but removes the fields defining the partition,
  // sorts, and bounds, since those must be consistent across the various functions in this rel.  Refer
  // to the `WindowFunction` message for a description of these fields.
  message WindowRelFunction {
    uint32 function_reference = 1;

    repeated FunctionArgument arguments = 9;

    repeated FunctionOption options = 11;

    Type output_type = 7;

    AggregationPhase phase = 6;

    AggregateFunction.AggregationInvocation invocation = 10;

    Expression.WindowFunction.Bound lower_bound = 5;

    Expression.WindowFunction.Bound upper_bound = 4;

    Expression.WindowFunction.BoundsType bounds_type = 12;
  }
}

// The ORDERY BY (or sorting) relational operator. Beside describing a base relation, it includes a list of fields to sort on
message SortRel {
  RelCommon common = 1;
  Rel input = 2;
  repeated SortField sorts = 3;
  substrait.extensions.AdvancedExtension advanced_extension = 10;
}

// The relational operator capturing simple FILTERs (as in the WHERE clause of SQL)
message FilterRel {
  RelCommon common = 1;
  Rel input = 2;
  Expression condition = 3;
  substrait.extensions.AdvancedExtension advanced_extension = 10;
}

// The relational set operators (intersection/union/etc..)
message SetRel {
  RelCommon common = 1;
  // The first input is the primary input, the remaining are secondary
  // inputs.  There must be at least two inputs.
  repeated Rel inputs = 2;
  SetOp op = 3;
  substrait.extensions.AdvancedExtension advanced_extension = 10;

  enum SetOp {
    SET_OP_UNSPECIFIED = 0;
    SET_OP_MINUS_PRIMARY = 1;
    SET_OP_MINUS_PRIMARY_ALL = 7;
    SET_OP_MINUS_MULTISET = 2;
    SET_OP_INTERSECTION_PRIMARY = 3;
    SET_OP_INTERSECTION_MULTISET = 4;
    SET_OP_INTERSECTION_MULTISET_ALL = 8;
    SET_OP_UNION_DISTINCT = 5;
    SET_OP_UNION_ALL = 6;
  }
}

// Stub to support extension with a single input
message ExtensionSingleRel {
  RelCommon common = 1;
  Rel input = 2;
  google.protobuf.Any detail = 3;
}

// Stub to support extension with a zero inputs
message ExtensionLeafRel {
  RelCommon common = 1;
  google.protobuf.Any detail = 2;
}

// Stub to support extension with multiple inputs
message ExtensionMultiRel {
  RelCommon common = 1;
  repeated Rel inputs = 2;
  google.protobuf.Any detail = 3;
}

// A redistribution operation
message ExchangeRel {
  RelCommon common = 1;
  Rel input = 2;
  int32 partition_count = 3;
  repeated ExchangeTarget targets = 4;

  // the type of exchange used
  oneof exchange_kind {
    ScatterFields scatter_by_fields = 5;
    SingleBucketExpression single_target = 6;
    MultiBucketExpression multi_target = 7;
    RoundRobin round_robin = 8;
    Broadcast broadcast = 9;
  }

  substrait.extensions.AdvancedExtension advanced_extension = 10;

  message ScatterFields {
    repeated Expression.FieldReference fields = 1;
  }

  // Returns a single bucket number per record.
  message SingleBucketExpression {
    Expression expression = 1;
  }

  // Returns zero or more bucket numbers per record
  message MultiBucketExpression {
    Expression expression = 1;
    bool constrained_to_count = 2;
  }

  // Send all data to every target.
  message Broadcast {}

  // Route approximately
  message RoundRobin {
    // whether the round robin behavior is required to exact (per record) or
    // approximate. Defaults to approximate.
    bool exact = 1;
  }

  // The message to describe partition targets of an exchange
  message ExchangeTarget {
    // Describes the partition id(s) to send. If this is empty, all data is sent
    // to this target.
    repeated int32 partition_id = 1;

    oneof target_type {
      string uri = 2;
      google.protobuf.Any extended = 3;
    }
  }
}

// Duplicates records by emitting one or more rows per input row.  The number of rows emitted per
// input row is the same for all input rows.
//
// In addition to a field being emitted per input field an extra int64 field is emitted which
// contains a zero-indexed ordinal corresponding to the duplicate definition.
message ExpandRel {
  RelCommon common = 1;
  Rel input = 2;
  // There should be one definition here for each input field.  Any fields beyond the provided
  // definitions will be emitted as is (as if a consistent_field record with an identity
  // expression was provided).
  repeated ExpandField fields = 4;

  message ExpandField {
    oneof field_type {
      // Field that switches output based on which duplicate is being output.  Every
      // switching_field should contain the same number of duplicates (so that the output rows
      // are of consistent size and type).  If there are not enough switching field definitions
      // to match the other field definitions NULL will be returned to fill the extras.
      SwitchingField switching_field = 2;

      // Field that outputs the same value no matter which duplicate is being output.  Equivalent
      // to a switching_field that lists the same expression multiple times.
      Expression consistent_field = 3;
    }
  }

  message SwitchingField {
    // All duplicates must return the same type class but may differ in nullability.  The effective
    // type of the output field will be nullable if any of the duplicate expressions are nullable.
    repeated Expression duplicates = 1;
  }
}

// A relation with output field names.
//
// This is for use at the root of a `Rel` tree.
message RelRoot {
  // A relation
  Rel input = 1;
  // Field names in depth-first order
  repeated string names = 2;
}

// A relation (used internally in a plan)
message Rel {
  oneof rel_type {
    ReadRel read = 1;
    FilterRel filter = 2;
    FetchRel fetch = 3;
    AggregateRel aggregate = 4;
    SortRel sort = 5;
    JoinRel join = 6;
    ProjectRel project = 7;
    SetRel set = 8;
    ExtensionSingleRel extension_single = 9;
    ExtensionMultiRel extension_multi = 10;
    ExtensionLeafRel extension_leaf = 11;
    CrossRel cross = 12;
    ReferenceRel reference = 21;
    WriteRel write = 19;
    DdlRel ddl = 20;
    // Physical relations
    HashJoinRel hash_join = 13;
    MergeJoinRel merge_join = 14;
    NestedLoopJoinRel nested_loop_join = 18;
    ConsistentPartitionWindowRel window = 17;
    ExchangeRel exchange = 15;
    ExpandRel expand = 16;
  }
}

// A base object for writing (e.g., a table or a view).
message NamedObjectWrite {
  // The list of string is used to represent namespacing (e.g., mydb.mytable).
  // This assumes shared catalog between systems exchanging a message.
  repeated string names = 1;
  substrait.extensions.AdvancedExtension advanced_extension = 10;
}

// A stub type that can be used to extend/introduce new table types outside
// the specification.
message ExtensionObject {
  google.protobuf.Any detail = 1;
}

message DdlRel {
  // Definition of which type of object we are operating on
  oneof write_type {
    NamedObjectWrite named_object = 1;
    ExtensionObject extension_object = 2;
  }

  // The columns that will be modified (representing after-image of a schema change)
  NamedStruct table_schema = 3;
  // The default values for the columns (representing after-image of a schema change)
  // E.g., in case of an ALTER TABLE that changes some of the column default values, we expect
  // the table_defaults Struct to report a full list of default values reflecting the result of applying
  // the ALTER TABLE operator successfully
  Expression.Literal.Struct table_defaults = 4;

  // Which type of object we operate on
  DdlObject object = 5;

  // The type of operation to perform
  DdlOp op = 6;

  // The body of the CREATE VIEW
  Rel view_definition = 7;
  RelCommon common = 8;

  enum DdlObject {
    DDL_OBJECT_UNSPECIFIED = 0;
    // A Table object in the system
    DDL_OBJECT_TABLE = 1;
    // A View object in the system
    DDL_OBJECT_VIEW = 2;
  }

  enum DdlOp {
    DDL_OP_UNSPECIFIED = 0;
    // A create operation (for any object)
    DDL_OP_CREATE = 1;
    // A create operation if the object does not exist, or replaces it (equivalent to a DROP + CREATE) if the object already exists
    DDL_OP_CREATE_OR_REPLACE = 2;
    // An operation that modifies the schema (e.g., column names, types, default values) for the target object
    DDL_OP_ALTER = 3;
    // An operation that removes an object from the system
    DDL_OP_DROP = 4;
    // An operation that removes an object from the system (without throwing an exception if the object did not exist)
    DDL_OP_DROP_IF_EXIST = 5;
  }
  //TODO add PK/constraints/indexes/etc..?
}

// The operator that modifies the content of a database (operates on 1 table at a time, but record-selection/source can be
// based on joining of multiple tables).
message WriteRel {
  // Definition of which TABLE we are operating on
  oneof write_type {
    NamedObjectWrite named_table = 1;
    ExtensionObject extension_table = 2;
  }

  // The schema of the table (must align with Rel input (e.g., number of leaf fields must match))
  NamedStruct table_schema = 3;

  // The type of operation to perform
  WriteOp op = 4;

  // The relation that determines the records to add/remove/modify
  // the schema must match with table_schema. Default values must be explicitly stated
  // in a ProjectRel at the top of the input. The match must also
  // occur in case of DELETE to ensure multi-engine plans are unequivocal.
  Rel input = 5;

  // Output mode determines what is the output of executing this rel
  OutputMode output = 6;
  RelCommon common = 7;

  enum WriteOp {
    WRITE_OP_UNSPECIFIED = 0;
    // The insert of new records in a table
    WRITE_OP_INSERT = 1;
    // The removal of records from a table
    WRITE_OP_DELETE = 2;
    // The modification of existing records within a table
    WRITE_OP_UPDATE = 3;
    // The Creation of a new table, and the insert of new records in the table
    WRITE_OP_CTAS = 4;
  }

  enum OutputMode {
    OUTPUT_MODE_UNSPECIFIED = 0;
    // return no records at all
    OUTPUT_MODE_NO_OUTPUT = 1;
    // this mode makes the operator return all the record INSERTED/DELETED/UPDATED by the operator.
    // The operator returns the AFTER-image of any change. This can be further manipulated by operators upstreams
    // (e.g., retunring the typical "count of modified records").
    // For scenarios in which the BEFORE image is required, the user must implement a spool (via references to
    // subplans in the body of the Rel input) and return those with anounter PlanRel.relations.
    OUTPUT_MODE_MODIFIED_RECORDS = 2;
  }
}

// Hash joins and merge joins are a specialization of the general join where the join
// expression is an series of comparisons between fields that are ANDed together.  The
// behavior of this comparison is flexible
message ComparisonJoinKey {
  // The key to compare from the left table
  Expression.FieldReference left = 1;
  // The key to compare from the right table
  Expression.FieldReference right = 2;
  // Describes how to compare the two keys
  ComparisonType comparison = 3;

  // Most joins will use one of the following behaviors.  To avoid the complexity
  // of a function lookup we define the common behaviors here
  enum SimpleComparisonType {
    SIMPLE_COMPARISON_TYPE_UNSPECIFIED = 0;
    // Returns true only if both values are equal and not null
    SIMPLE_COMPARISON_TYPE_EQ = 1;
    // Returns true if both values are equal and not null
    // Returns true if both values are null
    // Returns false if one value is null and the other value is not null
    //
    // This can be expressed as a = b OR (isnull(a) AND isnull(b))
    SIMPLE_COMPARISON_TYPE_IS_NOT_DISTINCT_FROM = 2;
    // Returns true if both values are equal and not null
    // Returns true if either value is null
    //
    // This can be expressed as a = b OR isnull(a = b)
    SIMPLE_COMPARISON_TYPE_MIGHT_EQUAL = 3;
  }

  // Describes how the relation should consider if two rows are a match
  message ComparisonType {
    oneof inner_type {
      // One of the simple comparison behaviors is used
      SimpleComparisonType simple = 1;
      // A custom comparison behavior is used.  This can happen, for example, when using
      // collations, where we might want to do something like a case-insensitive comparison.
      //
      // This must be a binary function with a boolean return type
      uint32 custom_function_reference = 2;
    }
  }
}

// The hash equijoin join operator will build a hash table out of the right input based on a set of join keys.
// It will then probe that hash table for incoming inputs, finding matches.
//
// Two rows are a match if the comparison function returns true for all keys
message HashJoinRel {
  RelCommon common = 1;
  Rel left = 2;
  Rel right = 3;
  // These fields are deprecated in favor of `keys`.  If they are set then
  // the two lists (left_keys and right_keys) must have the same length and
  // the comparion function is considered to be SimpleEqualityType::EQ
  repeated Expression.FieldReference left_keys = 4 [deprecated = true];
  repeated Expression.FieldReference right_keys = 5 [deprecated = true];
  // One or more keys to join on.  The relation is invalid if this is empty
  // (unless the deprecated left_keys/right_keys fields are being used).
  //
  // If a custom comparison function is used then it must be consistent with
  // the hash function used for the keys.
  //
  // In other words, the hash function must return the same hash code when the
  // comparison returns true.  For example, if the comparison function is
  // "equals ignoring case" then the hash function must return the same hash
  // code for strings that differ only by case.  Note: the hash function is not
  // specified here.  It is the responsibility of the consumer to find an appropriate
  // hash function for a given comparsion function or to reject the plan if it cannot
  // do so.
  repeated ComparisonJoinKey keys = 8;
  Expression post_join_filter = 6;

  JoinType type = 7;

  enum JoinType {
    JOIN_TYPE_UNSPECIFIED = 0;
    JOIN_TYPE_INNER = 1;
    JOIN_TYPE_OUTER = 2;
    JOIN_TYPE_LEFT = 3;
    JOIN_TYPE_RIGHT = 4;
    JOIN_TYPE_LEFT_SEMI = 5;
    JOIN_TYPE_RIGHT_SEMI = 6;
    JOIN_TYPE_LEFT_ANTI = 7;
    JOIN_TYPE_RIGHT_ANTI = 8;
    JOIN_TYPE_LEFT_SINGLE = 9;
    JOIN_TYPE_RIGHT_SINGLE = 10;
    JOIN_TYPE_LEFT_MARK = 11;
    JOIN_TYPE_RIGHT_MARK = 12;
  }

  substrait.extensions.AdvancedExtension advanced_extension = 10;
}

// The merge equijoin does a join by taking advantage of two sets that are sorted on the join keys.
// This allows the join operation to be done in a streaming fashion.
message MergeJoinRel {
  RelCommon common = 1;
  Rel left = 2;
  Rel right = 3;
  // These fields are deprecated in favor of `keys`.  If they are set then
  // the two lists (left_keys and right_keys) must have the same length and
  // the comparion function is considered to be SimpleEqualityType::EQ
  repeated Expression.FieldReference left_keys = 4 [deprecated = true];
  repeated Expression.FieldReference right_keys = 5 [deprecated = true];
  // One or more keys to join on.  The relation is invalid if this is empty
  // (unless the deprecated left_keys/right_keys fields are being used).
  //
  // If a custom comparison function is used then it must be consistent with
  // the ordering of the input data.  For example, if the comparison function
  // is "<" then we generally expect the data to be sorted in ascending order.
  //
  // If the comparison function is something like "less than ignoring case" then
  // the data should be sorted appropriately (e.g. both "A" and "a" should come
  // before "b")
  //
  // The sort order is not specified here.  It is typically the responsibility of
  // the producer to ensure the plan sorts the data if needed (although the consumer
  // is free to do so as well).  If possible, the consumer should verify the sort
  // order and reject invalid plans.
  repeated ComparisonJoinKey keys = 8;
  Expression post_join_filter = 6;

  JoinType type = 7;

  enum JoinType {
    JOIN_TYPE_UNSPECIFIED = 0;
    JOIN_TYPE_INNER = 1;
    JOIN_TYPE_OUTER = 2;
    JOIN_TYPE_LEFT = 3;
    JOIN_TYPE_RIGHT = 4;
    JOIN_TYPE_LEFT_SEMI = 5;
    JOIN_TYPE_RIGHT_SEMI = 6;
    JOIN_TYPE_LEFT_ANTI = 7;
    JOIN_TYPE_RIGHT_ANTI = 8;
    JOIN_TYPE_LEFT_SINGLE = 9;
    JOIN_TYPE_RIGHT_SINGLE = 10;
    JOIN_TYPE_LEFT_MARK = 11;
    JOIN_TYPE_RIGHT_MARK = 12;
  }

  substrait.extensions.AdvancedExtension advanced_extension = 10;
}

// The nested loop join (NLJ) operator will hold the entire right input and iterate over it using the
// left input, evaluating the join expression on the Cartesian product of all rows.
message NestedLoopJoinRel {
  RelCommon common = 1;
  Rel left = 2;
  Rel right = 3;
  // optional, defaults to true (a cartesian join)
  Expression expression = 4;

  JoinType type = 5;

  enum JoinType {
    JOIN_TYPE_UNSPECIFIED = 0;
    JOIN_TYPE_INNER = 1;
    JOIN_TYPE_OUTER = 2;
    JOIN_TYPE_LEFT = 3;
    JOIN_TYPE_RIGHT = 4;
    JOIN_TYPE_LEFT_SEMI = 5;
    JOIN_TYPE_RIGHT_SEMI = 6;
    JOIN_TYPE_LEFT_ANTI = 7;
    JOIN_TYPE_RIGHT_ANTI = 8;
    JOIN_TYPE_LEFT_SINGLE = 9;
    JOIN_TYPE_RIGHT_SINGLE = 10;
    JOIN_TYPE_LEFT_MARK = 11;
    JOIN_TYPE_RIGHT_MARK = 12;
  }

  substrait.extensions.AdvancedExtension advanced_extension = 10;
}

// The argument of a function
message FunctionArgument {
  oneof arg_type {
    string enum = 1;
    Type type = 2;
    Expression value = 3;
  }
}

// An optional function argument.  Typically used for specifying behavior in
// invalid or corner cases.
message FunctionOption {
  // Name of the option to set. If the consumer does not recognize the
  // option, it must reject the plan. The name is matched case-insensitively
  // with option names defined for the function.
  string name = 1;

  // List of behavior options allowed by the producer. At least one must be
  // specified; to leave an option unspecified, simply don't add an entry to
  // `options`. The consumer must use the first option from the list that it
  // supports. If the consumer supports none of the specified options, it
  // must reject the plan. The name is matched case-insensitively and must
  // match one of the option values defined for the option.
  repeated string preference = 2;
}

message Expression {
  oneof rex_type {
    Literal literal = 1;
    FieldReference selection = 2;
    ScalarFunction scalar_function = 3;
    WindowFunction window_function = 5;
    IfThen if_then = 6;
    SwitchExpression switch_expression = 7;
    SingularOrList singular_or_list = 8;
    MultiOrList multi_or_list = 9;
    Cast cast = 11;
    Subquery subquery = 12;
    Nested nested = 13;

    // deprecated: enum literals are only sensible in the context of
    // function arguments, for which FunctionArgument should now be
    // used
    Enum enum = 10 [deprecated = true];
  }

  message Enum {
    option deprecated = true;

    oneof enum_kind {
      string specified = 1;
      Empty unspecified = 2;
    }

    message Empty {
      option deprecated = true;
    }
  }

  message Literal {
    oneof literal_type {
      bool boolean = 1;
      int32 i8 = 2;
      int32 i16 = 3;
      int32 i32 = 5;
      int64 i64 = 7;
      float fp32 = 10;
      double fp64 = 11;
      string string = 12;
      bytes binary = 13;
      // Timestamp in units of microseconds since the UNIX epoch.
      // Deprecated in favor of `precision_timestamp`
      int64 timestamp = 14 [deprecated = true];
      // Date in units of days since the UNIX epoch.
      int32 date = 16;
      // Time in units of microseconds past midnight
      int64 time = 17;
      IntervalYearToMonth interval_year_to_month = 19;
      IntervalDayToSecond interval_day_to_second = 20;
      IntervalCompound interval_compound = 36;
      string fixed_char = 21;
      VarChar var_char = 22;
      bytes fixed_binary = 23;
      Decimal decimal = 24;
      PrecisionTimestamp precision_timestamp = 34;
      PrecisionTimestamp precision_timestamp_tz = 35;
      Struct struct = 25;
      Map map = 26;
      // Timestamp in units of microseconds since the UNIX epoch.
      // Deprecated in favor of `precision_timestamp_tz`
      int64 timestamp_tz = 27 [deprecated = true];
      bytes uuid = 28;
      Type null = 29; // a typed null literal
      List list = 30;
      Type.List empty_list = 31;
      Type.Map empty_map = 32;
      UserDefined user_defined = 33;
    }

    // Whether the literal_type above should be treated as a nullable type.
    // Applies to all members of the literal_type oneof EXCEPT:
    //  * Type null             (must be nullable by definition)
    //  * Type.List empty_list  (use Type.List::nullability)
    //  * Type.Map empty_map    (use Type.Map::nullability)
    bool nullable = 50;

    // optionally points to a type_variation_anchor defined in this plan.
    // Applies to all members of union other than the Typed null (which should
    // directly declare the type variation).
    uint32 type_variation_reference = 51;

    message VarChar {
      string value = 1;
      uint32 length = 2;
    }

    message Decimal {
      // little-endian twos-complement integer representation of complete value
      // (ignoring precision) Always 16 bytes in length
      bytes value = 1;
      // The maximum number of digits allowed in the value.
      // the maximum precision is 38.
      int32 precision = 2;
      // declared scale of decimal literal
      int32 scale = 3;
    }

    message PrecisionTimestamp {
      // Sub-second precision, 0 means the value given is in seconds, 3 is milliseconds, 6 microseconds, 9 is nanoseconds
      int32 precision = 1;
      // Time passed since 1970-01-01 00:00:00.000000 in UTC for PrecisionTimestampTZ and unspecified timezone for PrecisionTimestamp
      int64 value = 2;
    }

    message Map {
      message KeyValue {
        Literal key = 1;
        Literal value = 2;
      }

      repeated KeyValue key_values = 1;
    }

    message IntervalYearToMonth {
      int32 years = 1;
      int32 months = 2;
    }

    message IntervalDayToSecond {
      int32 days = 1;
      int32 seconds = 2;

      // Consumers should expect either (miroseconds) to be set or (precision and subseconds) to be set
      oneof precision_mode {
        int32 microseconds = 3 [deprecated = true]; // use precision and subseconds below, they cover and replace microseconds.
        // Sub-second precision, 0 means the value given is in seconds, 3 is milliseconds, 6 microseconds, 9 is nanoseconds. Should be used with subseconds below.
        int32 precision = 4;
      }

      // the number of fractional seconds using 1e(-precision) units. Should only be used with precision field, not microseconds.
      int64 subseconds = 5;
    }

    message IntervalCompound {
      IntervalYearToMonth interval_year_to_month = 1;
      IntervalDayToSecond interval_day_to_second = 2;
    }

    message Struct {
      // A possibly heterogeneously typed list of literals
      repeated Literal fields = 1;
    }

    message List {
      // A homogeneously typed list of literals
      repeated Literal values = 1;
    }

    message UserDefined {
      // points to a type_anchor defined in this plan
      uint32 type_reference = 1;

      // The parameters to be bound to the type class, if the type class is
      // parameterizable.
      repeated Type.Parameter type_parameters = 3;

      // a user-defined literal can be encoded in one of two ways
      oneof val {
        // the value of the literal, serialized using some type-specific protobuf message
        google.protobuf.Any value = 2;
        // the value of the literal, serialized using the structure definition in its declaration
        Literal.Struct struct = 4;
      }
    }
  }

  // Expression to dynamically construct nested types.
  message Nested {
    // Whether the returned nested type is nullable.
    bool nullable = 1;

    // Optionally points to a type_variation_anchor defined in this plan for
    // the returned nested type.
    uint32 type_variation_reference = 2;

    oneof nested_type {
      Struct struct = 3;
      List list = 4;
      Map map = 5;
    }

    message Map {
      message KeyValue {
        // Mandatory key/value expressions.
        Expression key = 1;
        Expression value = 2;
      }

      // One or more key-value pairs. To specify an empty map, use
      // Literal.empty_map (otherwise type information would be missing).
      repeated KeyValue key_values = 1;
    }

    message Struct {
      // Zero or more possibly heterogeneously-typed list of expressions that
      // form the struct fields.
      repeated Expression fields = 1;
    }

    message List {
      // A homogeneously-typed list of one or more expressions that form the
      // list entries. To specify an empty list, use Literal.empty_list
      // (otherwise type information would be missing).
      repeated Expression values = 1;
    }
  }

  // A scalar function call.
  message ScalarFunction {
    // Points to a function_anchor defined in this plan, which must refer
    // to a scalar function in the associated YAML file. Required; avoid
    // using anchor/reference zero.
    uint32 function_reference = 1;

    // The arguments to be bound to the function. This must have exactly the
    // number of arguments specified in the function definition, and the
    // argument types must also match exactly:
    //
    //  - Value arguments must be bound using FunctionArgument.value, and
    //    the expression in that must yield a value of a type that a function
    //    overload is defined for.
    //  - Type arguments must be bound using FunctionArgument.type.
    //  - Enum arguments must be bound using FunctionArgument.enum
    //    followed by Enum.specified, with a string that case-insensitively
    //    matches one of the allowed options.
    repeated FunctionArgument arguments = 4;

    // Options to specify behavior for corner cases, or leave behavior
    // unspecified if the consumer does not need specific behavior in these
    // cases.
    repeated FunctionOption options = 5;

    // Must be set to the return type of the function, exactly as derived
    // using the declaration in the extension.
    Type output_type = 3;

    // Deprecated; use arguments instead.
    repeated Expression args = 2 [deprecated = true];
  }

  // A window function call.
  message WindowFunction {
    // Points to a function_anchor defined in this plan. The function must be:
    //  - a window function
    //  - an aggregate function
    //
    // An aggregate function referenced here should be treated as a window
    // function with Window Type STREAMING
    //
    // Required; 0 is considered to be a valid anchor/reference.
    uint32 function_reference = 1;

    // The arguments to be bound to the function. This must have exactly the
    // number of arguments specified in the function definition, and the
    // argument types must also match exactly:
    //
    //  - Value arguments must be bound using FunctionArgument.value, and
    //    the expression in that must yield a value of a type that a function
    //    overload is defined for.
    //  - Type arguments must be bound using FunctionArgument.type, and a
    //    function overload must be defined for that type.
    //  - Enum arguments must be bound using FunctionArgument.enum
    //    followed by Enum.specified, with a string that case-insensitively
    //    matches one of the allowed options.
    repeated FunctionArgument arguments = 9;

    // Options to specify behavior for corner cases, or leave behavior
    // unspecified if the consumer does not need specific behavior in these
    // cases.
    repeated FunctionOption options = 11;

    // Must be set to the return type of the function, exactly as derived
    // using the declaration in the extension.
    Type output_type = 7;

    // Describes which part of the window function to perform within the
    // context of distributed algorithms. Required. Must be set to
    // INITIAL_TO_RESULT for window functions that are not decomposable.
    AggregationPhase phase = 6;

    // If specified, the records that are part of the window defined by
    // upper_bound and lower_bound are ordered according to this list
    // before they are aggregated. The first sort field has the highest
    // priority; only if a sort field determines two records to be equivalent
    // is the next field queried. This field is optional, and is only allowed
    // if the window function is defined to support sorting.
    repeated SortField sorts = 3;

    // Specifies whether equivalent records are merged before being aggregated.
    // Optional, defaults to AGGREGATION_INVOCATION_ALL.
    AggregateFunction.AggregationInvocation invocation = 10;

    // When one or more partition expressions are specified, two records are
    // considered to be in the same partition if and only if these expressions
    // yield an equal record of values for both. When computing the window
    // function, only the subset of records within the bounds that are also in
    // the same partition as the current record are aggregated.
    repeated Expression partitions = 2;

    // Defines the bounds type: ROWS, RANGE
    BoundsType bounds_type = 12;

    // Defines the record relative to the current record from which the window
    // extends. The bound is inclusive. If the lower bound indexes a record
    // greater than the upper bound, TODO (null range/no records passed?
    // wrapping around as if lower/upper were swapped? error? null?).
    // Optional; defaults to the start of the partition.
    Bound lower_bound = 5;

    // Defines the record relative to the current record up to which the window
    // extends. The bound is inclusive. If the upper bound indexes a record
    // less than the lower bound, TODO (null range/no records passed?
    // wrapping around as if lower/upper were swapped? error? null?).
    // Optional; defaults to the end of the partition.
    Bound upper_bound = 4;

    // Deprecated; use arguments instead.
    repeated Expression args = 8 [deprecated = true];

    enum BoundsType {
      BOUNDS_TYPE_UNSPECIFIED = 0;
      // The lower and upper bound specify how many rows before and after the current row
      // the window should extend.
      BOUNDS_TYPE_ROWS = 1;
      // The lower and upper bound describe a range of values.  The window should include all rows
      // where the value of the ordering column is greater than or equal to (current_value - lower bound)
      // and less than or equal to (current_value + upper bound).  This bounds type is only valid if there
      // is a single ordering column.
      BOUNDS_TYPE_RANGE = 2;
    }

    // Defines one of the two boundaries for the window of a window function.
    message Bound {
      // Defines that the bound extends this far back from the current record.
      message Preceding {
        // A strictly positive integer specifying the number of records that
        // the window extends back from the current record. Required. Use
        // CurrentRow for offset zero and Following for negative offsets.
        int64 offset = 1;
      }

      // Defines that the bound extends this far ahead of the current record.
      message Following {
        // A strictly positive integer specifying the number of records that
        // the window extends ahead of the current record. Required. Use
        // CurrentRow for offset zero and Preceding for negative offsets.
        int64 offset = 1;
      }

      // Defines that the bound extends to or from the current record.
      message CurrentRow {}

      // Defines an "unbounded bound": for lower bounds this means the start
      // of the partition, and for upper bounds this means the end of the
      // partition.
      message Unbounded {}

      oneof kind {
        // The bound extends some number of records behind the current record.
        Preceding preceding = 1;

        // The bound extends some number of records ahead of the current
        // record.
        Following following = 2;

        // The bound extends to the current record.
        CurrentRow current_row = 3;

        // The bound extends to the start of the partition or the end of the
        // partition, depending on whether this represents the upper or lower
        // bound.
        Unbounded unbounded = 4;
      }
    }
  }

  message IfThen {
    // A list of one or more IfClauses
    repeated IfClause ifs = 1;
    // The returned Expression if no IfClauses are satisified
    Expression else = 2;

    message IfClause {
      Expression if = 1;
      Expression then = 2;
    }
  }

  message Cast {
    Type type = 1;
    Expression input = 2;
    FailureBehavior failure_behavior = 3;

    enum FailureBehavior {
      FAILURE_BEHAVIOR_UNSPECIFIED = 0;
      FAILURE_BEHAVIOR_RETURN_NULL = 1;
      FAILURE_BEHAVIOR_THROW_EXCEPTION = 2;
    }
  }

  message SwitchExpression {
    Expression match = 3;
    repeated IfValue ifs = 1;
    Expression else = 2;

    message IfValue {
      Literal if = 1;
      Expression then = 2;
    }
  }

  message SingularOrList {
    Expression value = 1;
    repeated Expression options = 2;
  }

  message MultiOrList {
    repeated Expression value = 1;
    repeated Record options = 2;

    message Record {
      repeated Expression fields = 1;
    }
  }

  message EmbeddedFunction {
    repeated Expression arguments = 1;
    Type output_type = 2;
    oneof kind {
      PythonPickleFunction python_pickle_function = 3;
      WebAssemblyFunction web_assembly_function = 4;
    }

    message PythonPickleFunction {
      bytes function = 1;
      repeated string prerequisite = 2;
    }

    message WebAssemblyFunction {
      bytes script = 1;
      repeated string prerequisite = 2;
    }
  }

  // A way to reference the inner property of a complex record. Can reference
  // either a map key by literal, a struct field by the ordinal position of
  // the desired field or a particular element in an array. Supports
  // expressions that would roughly translate to something similar to:
  // a.b[2].c['my_map_key'].x where a,b,c and x are struct field references
  // (ordinalized in the internal representation here), [2] is a list offset
  // and ['my_map_key'] is a reference into a map field.
  message ReferenceSegment {
    oneof reference_type {
      MapKey map_key = 1;
      StructField struct_field = 2;
      ListElement list_element = 3;
    }

    message MapKey {
      // literal based reference to specific possible value in map.
      Literal map_key = 1;

      // Optional child segment
      ReferenceSegment child = 2;
    }

    message StructField {
      // zero-indexed ordinal position of field in struct
      int32 field = 1;

      // Optional child segment
      ReferenceSegment child = 2;
    }

    message ListElement {
      // zero-indexed ordinal position of element in list
      int32 offset = 1;

      // Optional child segment
      ReferenceSegment child = 2;
    }
  }

  // A reference that takes an existing subtype and selectively removes fields
  // from it. For example, one might initially have an inner struct with 100
  // fields but a a particular operation only needs to interact with only 2 of
  // those 100 fields. In this situation, one would use a mask expression to
  // eliminate the 98 fields that are not relevant to the rest of the operation
  // pipeline.
  //
  // Note that this does not fundamentally alter the structure of data beyond
  // the elimination of unnecessary elements.
  message MaskExpression {
    StructSelect select = 1;
    bool maintain_singular_struct = 2;

    message Select {
      oneof type {
        StructSelect struct = 1;
        ListSelect list = 2;
        MapSelect map = 3;
      }
    }

    message StructSelect {
      repeated StructItem struct_items = 1;
    }

    message StructItem {
      int32 field = 1;
      Select child = 2;
    }

    message ListSelect {
      repeated ListSelectItem selection = 1;
      Select child = 2;

      message ListSelectItem {
        oneof type {
          ListElement item = 1;
          ListSlice slice = 2;
        }

        message ListElement {
          int32 field = 1;
        }

        message ListSlice {
          int32 start = 1;
          int32 end = 2;
        }
      }
    }

    message MapSelect {
      oneof select {
        MapKey key = 1;
        MapKeyExpression expression = 2;
      }

      Select child = 3;

      message MapKey {
        string map_key = 1;
      }

      message MapKeyExpression {
        string map_key_expression = 1;
      }
    }
  }

  // A reference to an inner part of a complex object. Can reference reference a
  // single element or a masked version of elements
  message FieldReference {
    // Whether this is composed of a single element reference or a masked
    // element subtree
    oneof reference_type {
      ReferenceSegment direct_reference = 1;
      MaskExpression masked_reference = 2;
    }

    // Whether this reference has an origin of a root struct or is based on the
    // ouput of an expression. When this is a RootReference and direct_reference
    // above is used, the direct_reference must be of a type StructField.
    oneof root_type {
      Expression expression = 3;
      RootReference root_reference = 4;
      OuterReference outer_reference = 5;
    }

    // Singleton that expresses this FieldReference is rooted off the root
    // incoming record type
    message RootReference {}

    // A root reference for the outer relation's subquery
    message OuterReference {
      // number of subquery boundaries to traverse up for this field's reference
      //
      // This value must be >= 1
      uint32 steps_out = 1;
    }
  }

  // Subquery relation expression
  message Subquery {
    oneof subquery_type {
      // Scalar subquery
      Scalar scalar = 1;
      // x IN y predicate
      InPredicate in_predicate = 2;
      // EXISTS/UNIQUE predicate
      SetPredicate set_predicate = 3;
      // ANY/ALL predicate
      SetComparison set_comparison = 4;
    }

    // A subquery with one row and one column. This is often an aggregate
    // though not required to be.
    message Scalar {
      Rel input = 1;
    }

    // Predicate checking that the left expression is contained in the right
    // subquery
    //
    // Examples:
    //
    // x IN (SELECT * FROM t)
    // (x, y) IN (SELECT a, b FROM t)
    message InPredicate {
      repeated Expression needles = 1;
      Rel haystack = 2;
    }

    // A predicate over a set of rows in the form of a subquery
    // EXISTS and UNIQUE are common SQL forms of this operation.
    message SetPredicate {
      enum PredicateOp {
        PREDICATE_OP_UNSPECIFIED = 0;
        PREDICATE_OP_EXISTS = 1;
        PREDICATE_OP_UNIQUE = 2;
      }
      // TODO: should allow expressions
      PredicateOp predicate_op = 1;
      Rel tuples = 2;
    }

    // A subquery comparison using ANY or ALL.
    // Examples:
    //
    // SELECT *
    // FROM t1
    // WHERE x < ANY(SELECT y from t2)
    message SetComparison {
      enum ComparisonOp {
        COMPARISON_OP_UNSPECIFIED = 0;
        COMPARISON_OP_EQ = 1;
        COMPARISON_OP_NE = 2;
        COMPARISON_OP_LT = 3;
        COMPARISON_OP_GT = 4;
        COMPARISON_OP_LE = 5;
        COMPARISON_OP_GE = 6;
      }

      enum ReductionOp {
        REDUCTION_OP_UNSPECIFIED = 0;
        REDUCTION_OP_ANY = 1;
        REDUCTION_OP_ALL = 2;
      }

      // ANY or ALL
      ReductionOp reduction_op = 1;
      // A comparison operator
      ComparisonOp comparison_op = 2;
      // left side of the expression
      Expression left = 3;
      // right side of the expression
      Rel right = 4;
    }
  }
}

// The description of a field to sort on (including the direction of sorting and null semantics)
message SortField {
  Expression expr = 1;

  oneof sort_kind {
    SortDirection direction = 2;
    uint32 comparison_function_reference = 3;
  }
  enum SortDirection {
    SORT_DIRECTION_UNSPECIFIED = 0;
    SORT_DIRECTION_ASC_NULLS_FIRST = 1;
    SORT_DIRECTION_ASC_NULLS_LAST = 2;
    SORT_DIRECTION_DESC_NULLS_FIRST = 3;
    SORT_DIRECTION_DESC_NULLS_LAST = 4;
    SORT_DIRECTION_CLUSTERED = 5;
  }
}

// Describes which part of an aggregation or window function to perform within
// the context of distributed algorithms.
enum AggregationPhase {
  // Implies `INTERMEDIATE_TO_RESULT`.
  AGGREGATION_PHASE_UNSPECIFIED = 0;

  // Specifies that the function should be run only up to the point of
  // generating an intermediate value, to be further aggregated later using
  // INTERMEDIATE_TO_INTERMEDIATE or INTERMEDIATE_TO_RESULT.
  AGGREGATION_PHASE_INITIAL_TO_INTERMEDIATE = 1;

  // Specifies that the inputs of the aggregate or window function are the
  // intermediate values of the function, and that the output should also be
  // an intermediate value, to be further aggregated later using
  // INTERMEDIATE_TO_INTERMEDIATE or INTERMEDIATE_TO_RESULT.
  AGGREGATION_PHASE_INTERMEDIATE_TO_INTERMEDIATE = 2;

  // A complete invocation: the function should aggregate the given set of
  // inputs to yield a single return value. This style must be used for
  // aggregate or window functions that are not decomposable.
  AGGREGATION_PHASE_INITIAL_TO_RESULT = 3;

  // Specifies that the inputs of the aggregate or window function are the
  // intermediate values of the function, generated previously using
  // INITIAL_TO_INTERMEDIATE and possibly INTERMEDIATE_TO_INTERMEDIATE calls.
  // This call should combine the intermediate values to yield the final
  // return value.
  AGGREGATION_PHASE_INTERMEDIATE_TO_RESULT = 4;
}

// An aggregate function.
message AggregateFunction {
  // Points to a function_anchor defined in this plan, which must refer
  // to an aggregate function in the associated YAML file. Required; 0 is
  // considered to be a valid anchor/reference.
  uint32 function_reference = 1;

  // The arguments to be bound to the function. This must have exactly the
  // number of arguments specified in the function definition, and the
  // argument types must also match exactly:
  //
  //  - Value arguments must be bound using FunctionArgument.value, and
  //    the expression in that must yield a value of a type that a function
  //    overload is defined for.
  //  - Type arguments must be bound using FunctionArgument.type, and a
  //    function overload must be defined for that type.
  //  - Enum arguments must be bound using FunctionArgument.enum
  //    followed by Enum.specified, with a string that case-insensitively
  //    matches one of the allowed options.
  //  - Optional enum arguments must be bound using FunctionArgument.enum
  //    followed by either Enum.specified or Enum.unspecified. If specified,
  //    the string must case-insensitively match one of the allowed options.
  repeated FunctionArgument arguments = 7;

  // Options to specify behavior for corner cases, or leave behavior
  // unspecified if the consumer does not need specific behavior in these
  // cases.
  repeated FunctionOption options = 8;

  // Must be set to the return type of the function, exactly as derived
  // using the declaration in the extension.
  Type output_type = 5;

  // Describes which part of the aggregation to perform within the context of
  // distributed algorithms. Required. Must be set to INITIAL_TO_RESULT for
  // aggregate functions that are not decomposable.
  AggregationPhase phase = 4;

  // If specified, the aggregated records are ordered according to this list
  // before they are aggregated. The first sort field has the highest
  // priority; only if a sort field determines two records to be equivalent is
  // the next field queried. This field is optional.
  repeated SortField sorts = 3;

  // Specifies whether equivalent records are merged before being aggregated.
  // Optional, defaults to AGGREGATION_INVOCATION_ALL.
  AggregationInvocation invocation = 6;

  // deprecated; use arguments instead
  repeated Expression args = 2 [deprecated = true];

  // Method in which equivalent records are merged before being aggregated.
  enum AggregationInvocation {
    // This default value implies AGGREGATION_INVOCATION_ALL.
    AGGREGATION_INVOCATION_UNSPECIFIED = 0;

    // Use all values in the aggregation calculation.
    AGGREGATION_INVOCATION_ALL = 1;

    // Use only distinct values in the aggregation calculation.
    AGGREGATION_INVOCATION_DISTINCT = 2;
  }
}

// This rel is used  to create references,
// in case we refer to a RelRoot field names will be ignored
message ReferenceRel {
  int32 subtree_ordinal = 1;
}