substrait_validator/output/extension/simple/function.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! Module for representing simple function extensions.
4
5use crate::input::yaml;
6use crate::output::extension;
7use crate::output::type_system::meta;
8use std::collections::HashMap;
9use std::sync::Arc;
10
11/// The definition of a function implementation.
12#[derive(Clone, Debug)]
13pub struct Definition {
14 /// Unique number within the tree that can be used to refer to this
15 /// extension when exporting in protobuf form.
16 pub extension_id: u64,
17
18 /// Link to information common to a set of function implementations going by
19 /// the same name.
20 pub common: Arc<Common>,
21
22 /// The derived compound name of this function.
23 pub compound_name: String,
24
25 /// The expected arguments of the function.
26 pub arguments: Vec<ArgumentSlot>,
27
28 /// The options of the function.
29 pub options: HashMap<OptionName, OptionValues>,
30
31 /// Specifies the variadic behavior of the last argument slot, if any.
32 pub variadic: VariadicBehavior,
33
34 /// Whether this function is session-dependent. If set, evaluation of the
35 /// function may differ from session to session.
36 pub session_dependent: bool,
37
38 /// Whether this function is deterministic; if set, the function can be
39 /// assumed to always return the same value for the same input.
40 pub deterministic: bool,
41
42 /// How the function deals with nullability. Note that this information is
43 /// also captured in the parsed argument patterns and return type
44 /// derivation by means of nullability captures.
45 pub nullability_handling: NullabilityHandling,
46
47 /// The type derivation program used to derive the return type.
48 pub return_type: meta::Program,
49
50 /// Implementation map. This is not yet specified in Substrait, so this is
51 /// still very generic.
52 pub implementations: HashMap<String, yaml::Value>,
53}
54
55/// Information common to a group of function implementations with the same name.
56#[derive(Clone, Debug)]
57pub struct Common {
58 // The simple name of the function.
59 pub name: String,
60
61 /// An optional human-readable description of the behavior of the function.
62 pub description: Option<String>,
63
64 /// The function type; scalar, aggregate, or window.
65 pub function_type: Type,
66}
67
68/// The type of function.
69#[derive(Clone, Debug, PartialEq, Eq)]
70pub enum Type {
71 /// A scalar function, converting a single value to a single value.
72 Scalar,
73
74 /// An aggregate function, reducing a list of values to a single value.
75 Aggregate,
76
77 /// A window function, reducing a window within a list of values to a
78 /// single value.
79 Window,
80}
81
82/// Information about a function argument slot (a.k.a. parameter outside of
83/// Substrait).
84#[derive(Clone, Debug)]
85pub struct ArgumentSlot {
86 /// Optional argument name to aid human understanding.
87 pub name: Option<String>,
88
89 /// Optional description of the argument.
90 pub description: Option<String>,
91
92 /// The type of argument.
93 pub argument_type: ArgumentSlotType,
94}
95
96/// An argument type, along with information specific to that type.
97#[derive(Clone, Debug)]
98pub enum ArgumentSlotType {
99 /// This argument slot accepts a value at runtime. The data type of this
100 /// value must match the contained pattern.
101 Value(ValueArgumentSlot),
102
103 /// This argument slot accepts a data type without a value. The data type
104 /// is matched against the contained pattern.
105 Type(TypeArgumentSlot),
106
107 /// This argument slot accepts an enumeration option.
108 Enumeration(EnumerationArgumentSlot),
109}
110
111/// Definition of a value argument slot.
112#[derive(Clone, Debug)]
113pub struct ValueArgumentSlot {
114 /// The expected data type.
115 pub pattern: meta::pattern::Value,
116
117 /// If true, the argument slot must be bound to a literal.
118 pub constant: bool,
119}
120
121/// Definition of a data type argument slot.
122#[derive(Clone, Debug)]
123pub struct TypeArgumentSlot {
124 /// The expected data type. Normally, the pattern used here is a binding
125 /// that hasn't been used previously.
126 pub pattern: meta::pattern::Value,
127}
128
129/// Definition of an enumeration option.
130#[derive(Clone, Debug)]
131pub struct EnumerationArgumentSlot {
132 /// The list of options that can be passed to this enumeration, using their
133 /// original case convention. Matching is to be done case-insensitively.
134 pub options: Vec<String>,
135
136 /// If false, this enumeration argument can explicitly be left unspecified.
137 /// This leaves the choice of the option up to the consumer; it is then to
138 /// pick the first option in definition order that it supports.
139 pub required: bool,
140}
141
142/// Definition of a function option name.
143#[derive(Clone, Debug)]
144pub struct OptionName {
145 /// A human-readable name for this option.
146 pub name: String,
147}
148
149/// Definition of a valid options for a function option.
150#[derive(Clone, Debug)]
151pub struct OptionValues {
152 /// A list of valid strings for this option.
153 pub values: Vec<String>,
154}
155
156/// Definition of the variadic behavior of the last argument slot.
157#[derive(Clone, Debug)]
158pub struct VariadicBehavior {
159 /// The specified "parameter consistency", the semantics of which are
160 /// already captured by the derived patterns.
161 pub parameter_consistency: ParameterConsistency,
162
163 /// The minimum number of arguments that have to match the last slot.
164 pub min: usize,
165
166 /// The maximum number of arguments that can match the last slot.
167 pub max: usize,
168}
169
170/// The specified consistency requirement of the variadic arguments passed
171/// to the last argument slot.
172#[derive(Clone, Debug, PartialEq, Eq)]
173pub enum ParameterConsistency {
174 /// The arguments must be consistent; that is, a binding can only be
175 /// bound to a single metavalue. This is the default behavior of the
176 /// pattern matching logic if a pattern is matched more than once.
177 Consistent,
178
179 /// The arguments may be inconsistent; that is, a binding can match a
180 /// different metavalue each time the pattern is matched. This is
181 /// captured in the patterns by converting all normal binding patterns to
182 /// inconsistent bindings when evaluating syntactic sugar.
183 Inconsistent,
184}
185
186/// The specified nullability behavior of a function.
187#[derive(Clone, Debug, PartialEq, Eq)]
188pub enum NullabilityHandling {
189 /// Specifies that a function can capture any combination of nullability
190 /// for its arguments. If and only if none of the arguments are nullable,
191 /// will output types be non-nullable. This is captured in the patterns by
192 /// replacing all top-level nullability specifiers with an inconsistent
193 /// binding named with something not yet used for anything else. Toplevel
194 /// bindings that were not yet overriding nullability are furthermore
195 /// promoted to bindings that do override nullability, using the same
196 /// inconsistent binding for the nullability specifier.
197 Mirror,
198
199 /// Specifies that a function can capture any combination of nullability
200 /// for its arguments. Nullability of the output types is not modified.
201 /// This is captured in the patterns by replacing all top-level nullability
202 /// specifiers in the argument patterns with Any patterns.
203 DeclaredOutput,
204
205 /// Specifies that the nullability of the arguments and output types are
206 /// exactly as specified; no changes are needed for the patterns.
207 Discrete,
208}
209
210/// A reference to a completed function namespace.
211pub type NamespaceReference = extension::namespace::Reference<Definition>;
212
213/// A potentially mutable function namespace definition.
214pub type NamespaceDefinition = extension::namespace::Definition<Definition>;
215
216/// A to-be-resolved reference to a function. Includes the name and URI even if
217/// unresolved.
218pub type UnresolvedReference = extension::reference::Data<Definition>;
219
220/// The result of a name resolution. May consist of any number of definitions
221/// that are ambiguously referred to. The results may be further refined at a
222/// later stage. For functions, this is used to allow referring to functions
223/// by their simple name rather than by their compound name even if Substrait
224/// explicitly does not allow this due to ambiguity, in order to give better
225/// error messages than just "function not found".
226pub type ResolutionResult = extension::namespace::ResolutionResult<Definition>;
227
228/// A potentially unresolved reference to a function implementation. Includes
229/// the name and URI even if unresolved.
230pub type Reference = extension::Reference<Definition>;