substrait_validator/output/extension/simple/
function.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Module for representing simple function extensions.
4
5use crate::input::yaml;
6use crate::output::extension;
7use crate::output::type_system::meta;
8use std::collections::HashMap;
9use std::sync::Arc;
10
11/// The definition of a function implementation.
12#[derive(Clone, Debug)]
13pub struct Definition {
14    /// Unique number within the tree that can be used to refer to this
15    /// extension when exporting in protobuf form.
16    pub extension_id: u64,
17
18    /// Link to information common to a set of function implementations going by
19    /// the same name.
20    pub common: Arc<Common>,
21
22    /// The derived compound name of this function.
23    pub compound_name: String,
24
25    /// The expected arguments of the function.
26    pub arguments: Vec<ArgumentSlot>,
27
28    /// The options of the function.
29    pub options: HashMap<OptionName, OptionValues>,
30
31    /// Specifies the variadic behavior of the last argument slot, if any.
32    pub variadic: VariadicBehavior,
33
34    /// Whether this function is session-dependent. If set, evaluation of the
35    /// function may differ from session to session.
36    pub session_dependent: bool,
37
38    /// Whether this function is deterministic; if set, the function can be
39    /// assumed to always return the same value for the same input.
40    pub deterministic: bool,
41
42    /// How the function deals with nullability. Note that this information is
43    /// also captured in the parsed argument patterns and return type
44    /// derivation by means of nullability captures.
45    pub nullability_handling: NullabilityHandling,
46
47    /// The type derivation program used to derive the return type.
48    pub return_type: meta::Program,
49
50    /// Implementation map. This is not yet specified in Substrait, so this is
51    /// still very generic.
52    pub implementations: HashMap<String, yaml::Value>,
53}
54
55/// Information common to a group of function implementations with the same name.
56#[derive(Clone, Debug)]
57pub struct Common {
58    // The simple name of the function.
59    pub name: String,
60
61    /// An optional human-readable description of the behavior of the function.
62    pub description: Option<String>,
63
64    /// The function type; scalar, aggregate, or window.
65    pub function_type: Type,
66}
67
68/// The type of function.
69#[derive(Clone, Debug, PartialEq, Eq)]
70pub enum Type {
71    /// A scalar function, converting a single value to a single value.
72    Scalar,
73
74    /// An aggregate function, reducing a list of values to a single value.
75    Aggregate,
76
77    /// A window function, reducing a window within a list of values to a
78    /// single value.
79    Window,
80}
81
82/// Information about a function argument slot (a.k.a. parameter outside of
83/// Substrait).
84#[derive(Clone, Debug)]
85pub struct ArgumentSlot {
86    /// Optional argument name to aid human understanding.
87    pub name: Option<String>,
88
89    /// Optional description of the argument.
90    pub description: Option<String>,
91
92    /// The type of argument.
93    pub argument_type: ArgumentSlotType,
94}
95
96/// An argument type, along with information specific to that type.
97#[derive(Clone, Debug)]
98pub enum ArgumentSlotType {
99    /// This argument slot accepts a value at runtime. The data type of this
100    /// value must match the contained pattern.
101    Value(ValueArgumentSlot),
102
103    /// This argument slot accepts a data type without a value. The data type
104    /// is matched against the contained pattern.
105    Type(TypeArgumentSlot),
106
107    /// This argument slot accepts an enumeration option.
108    Enumeration(EnumerationArgumentSlot),
109}
110
111/// Definition of a value argument slot.
112#[derive(Clone, Debug)]
113pub struct ValueArgumentSlot {
114    /// The expected data type.
115    pub pattern: meta::pattern::Value,
116
117    /// If true, the argument slot must be bound to a literal.
118    pub constant: bool,
119}
120
121/// Definition of a data type argument slot.
122#[derive(Clone, Debug)]
123pub struct TypeArgumentSlot {
124    /// The expected data type. Normally, the pattern used here is a binding
125    /// that hasn't been used previously.
126    pub pattern: meta::pattern::Value,
127}
128
129/// Definition of an enumeration option.
130#[derive(Clone, Debug)]
131pub struct EnumerationArgumentSlot {
132    /// The list of options that can be passed to this enumeration, using their
133    /// original case convention. Matching is to be done case-insensitively.
134    pub options: Vec<String>,
135
136    /// If false, this enumeration argument can explicitly be left unspecified.
137    /// This leaves the choice of the option up to the consumer; it is then to
138    /// pick the first option in definition order that it supports.
139    pub required: bool,
140}
141
142/// Definition of a function option name.
143#[derive(Clone, Debug)]
144pub struct OptionName {
145    /// A human-readable name for this option.
146    pub name: String,
147}
148
149/// Definition of a valid options for a function option.
150#[derive(Clone, Debug)]
151pub struct OptionValues {
152    /// A list of valid strings for this option.
153    pub values: Vec<String>,
154}
155
156/// Definition of the variadic behavior of the last argument slot.
157#[derive(Clone, Debug)]
158pub struct VariadicBehavior {
159    /// The specified "parameter consistency", the semantics of which are
160    /// already captured by the derived patterns.
161    pub parameter_consistency: ParameterConsistency,
162
163    /// The minimum number of arguments that have to match the last slot.
164    pub min: usize,
165
166    /// The maximum number of arguments that can match the last slot.
167    pub max: usize,
168}
169
170/// The specified consistency requirement of the variadic arguments passed
171/// to the last argument slot.
172#[derive(Clone, Debug, PartialEq, Eq)]
173pub enum ParameterConsistency {
174    /// The arguments must be consistent; that is, a binding can only be
175    /// bound to a single metavalue. This is the default behavior of the
176    /// pattern matching logic if a pattern is matched more than once.
177    Consistent,
178
179    /// The arguments may be inconsistent; that is, a binding can match a
180    /// different metavalue each time the pattern is matched. This is
181    /// captured in the patterns by converting all normal binding patterns to
182    /// inconsistent bindings when evaluating syntactic sugar.
183    Inconsistent,
184}
185
186/// The specified nullability behavior of a function.
187#[derive(Clone, Debug, PartialEq, Eq)]
188pub enum NullabilityHandling {
189    /// Specifies that a function can capture any combination of nullability
190    /// for its arguments. If and only if none of the arguments are nullable,
191    /// will output types be non-nullable. This is captured in the patterns by
192    /// replacing all top-level nullability specifiers with an inconsistent
193    /// binding named with something not yet used for anything else. Toplevel
194    /// bindings that were not yet overriding nullability are furthermore
195    /// promoted to bindings that do override nullability, using the same
196    /// inconsistent binding for the nullability specifier.
197    Mirror,
198
199    /// Specifies that a function can capture any combination of nullability
200    /// for its arguments. Nullability of the output types is not modified.
201    /// This is captured in the patterns by replacing all top-level nullability
202    /// specifiers in the argument patterns with Any patterns.
203    DeclaredOutput,
204
205    /// Specifies that the nullability of the arguments and output types are
206    /// exactly as specified; no changes are needed for the patterns.
207    Discrete,
208}
209
210/// A reference to a completed function namespace.
211pub type NamespaceReference = extension::namespace::Reference<Definition>;
212
213/// A potentially mutable function namespace definition.
214pub type NamespaceDefinition = extension::namespace::Definition<Definition>;
215
216/// A to-be-resolved reference to a function. Includes the name and URI even if
217/// unresolved.
218pub type UnresolvedReference = extension::reference::Data<Definition>;
219
220/// The result of a name resolution. May consist of any number of definitions
221/// that are ambiguously referred to. The results may be further refined at a
222/// later stage. For functions, this is used to allow referring to functions
223/// by their simple name rather than by their compound name even if Substrait
224/// explicitly does not allow this due to ambiguity, in order to give better
225/// error messages than just "function not found".
226pub type ResolutionResult = extension::namespace::ResolutionResult<Definition>;
227
228/// A potentially unresolved reference to a function implementation. Includes
229/// the name and URI even if unresolved.
230pub type Reference = extension::Reference<Definition>;