// SPDX-License-Identifier: Apache-2.0
syntax = "proto3";
// This proto file defines the tree structure that corresponds to the internal
// representation of the validator's parse result. The toplevel message type
// for this is ParseResult.
package substrait.validator;
import "google/protobuf/any.proto";
import "google/protobuf/empty.proto";
import "substrait/validator/simple_extensions.proto";
import "substrait/validator/type_system.proto";
option csharp_namespace = "Substrait.Validator.Protobuf";
option java_multiple_files = true;
option java_package = "io.substrait.validator.proto";
// Root message type returned by the validator as a result of parsing a
// substrait.Plan.
message ParseResult {
// Root node of the parse result tree.
Node root = 1;
}
// Nodes of the validator parse result tree.
//
// Note that, unlike substrait.Plan and its children, the nodes in this tree
// are intentionally devoid of typing information: all nodes are of type Node.
// The purpose of this is to allow a consumer of these trees to walk over the
// entire tree without needing in-depth knowledge of how Substrait works (and,
// with that, to decouple them from changes to the Substrait specification):
// they are intended as an intermediate format for converting Substrait plans
// into more human-friendly representations after all, not for programmatically
// dealing with the semantics of Substrait itself. That's what the validator is
// for, in this case.
//
// In particular, gathering all diagnostics emitted by the validator only
// requires the consumer to use the Node, Node.Data, Node.Child, and of course
// the Diagnostic message types.
//
// In case the consumer does need additional information from the original
// substrait.Plan, every node can be related back to its corresponding message
// via the path information associated with the nodes.
message Node {
// The type of node.
oneof node_type {
// This node represents a protobuf message. The fields are described using
// Field, RepeatedField, and OneOfField messages in data.
ProtoMessage proto_message = 1;
// This node represents a protobuf primitive or enum.
ProtoPrimitive proto_primitive = 2;
// This node is inserted as a placeholder when a required oneof field was
// not populated in the input.
google.protobuf.Empty proto_missing_oneof = 3;
// Special case of proto_primitive for references to anchors defined
// elsewhere in the plan.
NodeReference node_reference = 4;
// This node represents a YAML map/object. The keys are represented using
// Field messages in data.
google.protobuf.Empty yaml_map = 6;
// This node represents a YAML array. The elements are represented using
// ArrayElement messages in data.
google.protobuf.Empty yaml_array = 7;
// This node represents a YAML primitive.
PrimitiveData yaml_primitive = 8;
// Special case for string primitives that were interpreted and resolved as
// a URI. These nodes will have a single child node with path `data` that
// represents the parse result of the referred file.
string resolved_uri = 9;
// This node represents an abstract syntax tree node, used for representing
// complex YAML string parse results.
google.protobuf.Empty ast_node = 10;
// No longer used. The more generic ResolvedUri type is used instead.
YamlReference yaml_reference = 5 [deprecated = true];
}
// Semantic classification of this node.
Class class = 13;
enum Class {
CLASS_UNSPECIFIED = 0;
// This node represents a data type.
CLASS_TYPE = 1;
// This node represents an expression.
CLASS_EXPRESSION = 2;
// This node represents a relation.
CLASS_RELATION = 3;
}
// Optional brief description of the node. Should not contain newlines or
// other non-span formatting information.
Comment brief = 14;
// Optional summary of the node. Unlike brief, this may contain
// paragraph-level formatting information.
Comment summary = 15;
// For the following types of nodes, the validator will try to do type
// resolution:
// - type-like nodes resolve to said type;
// - expression-like nodes resolve to the type returned by the expression;
// - relation-like nodes resolve to the schema (as a named struct) returned
// by the relation.
// This field will be populated for such nodes even if resolution fails, to
// indicate that there is supposed to be a type. In that case, the type kind
// will be set to "unresolved." The field will not be populated for nodes
// that don't have a logical Substrait type.
DataType data_type = 16;
// Data associated with the node. Note that some variants are illegal based
// on the node type (for example, a primitive does not have fields, so it
// makes no sense for Field data to appear).
repeated Data data = 31;
message Data {
oneof kind {
// Represents a child node in the tree.
Child child = 1;
// Represents a diagnostic message.
Diagnostic diagnostic = 2;
// Represents an (intermediate) data type.
DataType data_type = 3;
// Unstructured additional information about the node or something in it.
Comment comment = 4;
// Pointer to a function extension.
FunctionUsage function_usage = 5;
// Simple extension definition.
ExtensionDefinition extension_definition = 6;
}
}
// Representation of a child node in the tree.
message Child {
// Path element identifying the relation of this child node to its
// parent.
Path.Element path = 1;
// The child node.
Node node = 2;
// Whether the validator recognized/expected the field or element that
// this child represents. Fields/elements may be unrecognized simply
// because validation is not implemented for them yet. In any case, this
// flag indicates that the subtree represented by this node could not be
// validated.
bool recognized = 3;
}
// Information about a protobuf message.
message ProtoMessage {
// The full protobuf path for the type, for example "substrait.Plan".
string path = 1;
}
// Information about a protobuf primitive.
message ProtoPrimitive {
// Logically compatible protobuf name of the primitive type, for example
// uint32 for any 32-bit unsigned data storage type.
string path = 1;
// Value of the primitive.
PrimitiveData data = 2;
}
// Information about the reference part of a reference/anchor pair.
message NodeReference {
// Integer value of the reference and anchor.
uint64 value = 1;
// Absolute path to the referenced node, i.e. the node containing the
// anchor field.
Path path = 2;
}
// Information about a reference to a YAML file.
message YamlReference {
option deprecated = true;
// URI to the YAML file.
string uri = 1;
}
// Value for a primitive data element.
message PrimitiveData {
// Note: to represent a YAML null, this field is simply not populated.
oneof data {
bool boolean = 1;
uint64 unsigned = 2;
int64 signed = 3;
double real = 4;
string unicode = 5;
bytes binary = 6;
string variant = 7;
google.protobuf.Any any = 8;
}
}
}
// An absolute path to a node in the tree.
message Path {
// Name of the root node. Currently always set to `plan`.
string root = 1;
// Elements of the path. The first element selects a child node of the root
// node, the second selects one of its children, etc.
repeated Element elements = 2;
message Element {
oneof kind {
Field field = 1;
RepeatedField repeated_field = 2;
OneOfField oneof_field = 3;
ArrayElement array_element = 4;
}
}
// Path element used for protobuf fields and YAML maps.
// Canonically represented as `.<field>` if field matches
// [a-zA-Z_][a-zA-Z0-9_]*, or as `."<field>"` using \\ and \" escape
// sequences if not (note that this can only happen for YAML map keys).
message Field {
string field = 1;
}
// Path element used for protobuf repeated field elements.
// Canonically represented as `.<field>[<index>]`.
message RepeatedField {
string field = 1;
uint64 index = 2;
}
// Path element used for protobuf oneof fields.
// Canonically represented as `.<field>{<variant>}`.
message OneOfField {
string field = 1;
string variant = 2;
}
// Path element used for YAML arrays.
// Canonically represented as `[<index>]`.
message ArrayElement {
uint64 index = 2;
}
}
// Representation of a diagnostic message.
message Diagnostic {
// The original error level/severity for this diagnostic.
Level original_level = 1;
// The error level/severity for this diagnostic after adjustment according
// to the validator configuration.
Level adjusted_level = 2;
// The machine-readable message for this diagnostic.
uint32 cause = 3;
// The human-readable message for this diagnostic.
string msg = 4;
// A path associated with this diagnostic. This is usually the path for
// the node it is associated with, but not necessarily: for example, a
// diagnostic message relating to a duplicate definition may refer back
// to the previous or first definition.
Path path = 5;
// Error level.
enum Level {
LEVEL_UNSPECIFIED = 0;
// Information diagnostic. Has no bearing on the validity of the plan.
LEVEL_INFO = 1;
// Warning diagnostic. The presence of warning diagnostics indicates
// that the plan may or may not be valid, for example because the
// validator was unable to access a referenced YAML file, or because
// enhancements using protobuf's Any type were used.
LEVEL_WARNING = 2;
// Error diagnostic. The presence of error diagnostics indicates that
// the plan is invalid.
LEVEL_ERROR = 3;
}
}
// Representation of a comment made by the validator that is only intended
// to be interpreted by people.
message Comment {
// Comments consist of one or more "elements," defining formatting
// information.
repeated Element elements = 1;
message Element {
oneof kind {
// A span of text.
Span span = 1;
// A newline, i.e. the next span should start on the next line.
google.protobuf.Empty new_line = 2;
// Opens a new unordered list. The next span is the start of the text for
// the next item. list_next elements are used to advance to the next list
// item; newlines can be used to add paragraphs without bullet points.
// Each list_open should be matched with a list_close. Lists may be
// nested.
google.protobuf.Empty list_open = 3;
// Advances to the next list item.
google.protobuf.Empty list_next = 4;
// Closes the current list.
google.protobuf.Empty list_close = 5;
}
}
// A span of text.
message Span {
// Text for this span. Should not include newlines.
string text = 1;
// Specified if this span of text should link to something.
oneof link {
// Link to a path in the tree.
Path path = 2;
// Link to a web page.
string url = 3;
}
}
}
// Information about the usage of a function.
message FunctionUsage {
// URI of the YAML file that the function is (supposed to be) defined in,
// if known.
string uri = 1;
// Compound name of the function, uniquely identifying a function
// implementation within the scope of the URI.
string compound_name = 2;
// Simple name of the function, uniquely identifying the function behavior
// description within the scope of the URI. If the function only has a
// single implementation, this name may also be used to refer to it.
string simple_name = 3;
// If nonzero, points to a function extension definition elsewhere in the
// tree. All extension definitions can be gathered by traversing the tree
// and looking for ExtensionDefinition messages in the data associated with
// each node. Note that extension IDs are only unique within a single tree.
uint64 extension_id = 4;
}