udf/traits.rs
1//! Module containing traits to be implemented by a user
2//!
3//! A basic UDF just needs to implement [`BasicUdf`]. An aggregate UDF needs to
4//! implement both [`BasicUdf`] and [`AggregateUdf`].
5
6use core::fmt::Debug;
7use std::num::NonZeroU8;
8
9use crate::types::{ArgList, UdfCfg};
10use crate::ProcessError;
11
12/// This trait specifies the functions needed for a standard (non-aggregate) UDF
13///
14/// Implement this on any struct in order to create a UDF. That struct can
15/// either be empty (usually the case for simple functions), or contain data
16/// that will be shared among all the UDF functions.
17///
18/// If the UDF is basic (non-aggregate), the process is:
19///
20/// - Caller (SQL server) calls `init()` with basic argument information
21/// - `init()` function (defined here) validates the arguments, does
22/// configuration (if needed), and configures and returns the `Self` struct
23/// - For each row, the caller calls `process(...)` with the relevant arguments
24/// - `process()` function (defined here) accepts an instance of `self` (created
25/// during init) and updates it as needed, and produces a result for that row
26///
27/// The UDF specification also calls out a `deinit()` function to deallocate any
28/// memory, but this is not needed here (handled by this wrapper).
29pub trait BasicUdf: Sized {
30 /// This type represents the return type of the UDF function.
31 ///
32 /// There are a lot of options, with some rules to follow. Warning! tedious
33 /// explanation below, just skip to the next section if you don't need the
34 /// details.
35 ///
36 /// - `f64` (real), `i64` (integer), and `[u8]` (string/blob) are the three
37 /// fundamental types
38 /// - Any `Return` can be an `Option<something>` if the result is
39 /// potentially nullable
40 /// - There is no meaningful difference between `String`, `Vec<u8>`, `str`,
41 /// and `[u8]` - return whichever is most convenient (following the below
42 /// rules). Any of these types are acceptable for returning `string` or
43 /// `decimal` types.
44 /// - Out of these buffer options, prefer returning `&'static str` or
45 /// `&'static [u8]` where possible. These are usable when only returning
46 /// const/static values.
47 /// - "Owned allocated" types (`String`, `Vec<u8>`) are the next preference
48 /// for buffer types, and can be used whenever
49 /// - If you have an owned type that updates itself, you can store the
50 /// relevant `String` or `Vec<u8>` in your struct and return a `&'a str`
51 /// or `&'a [u8]` that references them. This is useful for something like
52 /// a `concat` function that updates its result string with each call
53 /// (GATs allow this to work).
54 ///
55 /// Choosing a type may seem tricky at first but anything that successfully
56 /// compiles will likely work. The flow chart below helps clarify some of
57 /// the decisions making:
58 ///
59 /// ```text
60 /// Desired Use Option<T> if the result may be null
61 /// Return Type
62 /// ┉┉┉┉┉┉┉┉┉┉┉┉┉
63 /// ╭─────────────╮
64 /// │ integer ├─> i64 / Option<i64>
65 /// ╰─────────────╯
66 /// ╭─────────────╮
67 /// │ float ├─> f64 / Option<f64>
68 /// ╰─────────────╯
69 /// ╭───────────╮
70 /// ╭─────────────╮ │ static ├─> &'static str / Option<&'static str>
71 /// │ utf8 string ├─> │ │
72 /// ╰─────────────╯ │ │ ╭───────────────╮
73 /// │ dynamic ├─> │ independent ├─> String / Option<String>
74 /// ╰───────────╯ │ │
75 /// │ self-updating ├─> &'a str / Option<&'a str>
76 /// ╰───────────────╯
77 /// ╭─────────────╮ ╭───────────╮
78 /// │ non utf8 │ │ static ├─> &'static [u8] / Option<&'static [u8]>
79 /// │ string/blob ├─> │ │
80 /// ╰─────────────╯ │ │ ╭───────────────╮
81 /// │ dynamic ├─> │ independent ├─> Vec<u8> / Option<Vec<u8>>
82 /// ╰───────────╯ │ │
83 /// │ self-updating ├─> &'a [u8] / Option<&'a [u8]>
84 /// ╰───────────────╯
85 /// ```
86 type Returns<'a>
87 where
88 Self: 'a;
89
90 /// This is the initialization function
91 ///
92 /// It is expected that this function do the following:
93 ///
94 /// - Check that arguments are the proper type
95 /// - Check whether the arguments are const and have a usable value (can
96 /// provide some optimizations)
97 ///
98 /// # Errors
99 ///
100 /// If your function is not able to work with the given arguments, return a
101 /// helpful error message explaining why. Max error size is
102 /// `MYSQL_ERRMSG_SIZE` (512) bits, and will be truncated if any longer.
103 ///
104 /// `MySql` recommends keeping these error messages under 80 characters to
105 /// fit in a terminal, but personal I'd prefer a helpful message over
106 /// something useless that fits in one line.
107 ///
108 /// Error handling options are limited in all other functions, so make sure
109 /// you check thoroughly for any possible errors that may arise, to the best
110 /// of your ability. These may include:
111 ///
112 /// - Incorrect argument quantity or position
113 /// - Incorrect argument types
114 /// - Values that are `maybe_null()` when you cannot accept them
115 fn init(cfg: &UdfCfg<Init>, args: &ArgList<Init>) -> Result<Self, String>;
116
117 /// Process the actual values and return a result
118 ///
119 /// If you are unfamiliar with Rust, don't worry too much about the `'a` you
120 /// see thrown around a lot. They are lifetime annotations and more or less
121 /// say, "`self` lives at least as long as my return type does so I can
122 /// return a reference to it, but `args` may not last as long so I cannot
123 /// return a reference to that".
124 ///
125 /// # Arguments
126 ///
127 /// - `args`: Iterable list of arguments of the `Process` type
128 /// - `error`: This is only applicable when using aggregate functions and
129 /// can otherwise be ignored. If using aggregate functions, this provides
130 /// the current error value as described in [`AggregateUdf::add()`].
131 ///
132 /// # Return Value
133 ///
134 /// Assuming success, this function must return something of type
135 /// `Self::Returns`. This will be the value for the row (standard functions)
136 /// or for the entire group (aggregate functions).
137 ///
138 /// # Errors
139 ///
140 /// If there is some sort of unrecoverable problem at this point, just
141 /// return a [`ProcessError`]. This will make the SQL server return `NULL`.
142 /// As mentioned, there really aren't any good error handling options at
143 /// this point other than that, so try to catch all possible errors in
144 /// [`BasicUdf::init`].
145 ///
146 /// [`ProcessError`] is just an empty type.
147 fn process<'a>(
148 &'a mut self,
149 cfg: &UdfCfg<Process>,
150 args: &ArgList<Process>,
151 error: Option<NonZeroU8>,
152 ) -> Result<Self::Returns<'a>, ProcessError>;
153}
154
155/// This trait must be implemented if this function performs aggregation.
156///
157/// The basics of aggregation are simple:
158///
159/// - `init` is called once per result set (same as non-aggregate)
160/// - `clear` is called once per group within the result set, and should reset
161/// your struct
162/// - `add` is called once per row in the group, and should add the current row
163/// to the struct as needed
164/// - `process` is called at the end of each group, and should produce the
165/// result value for that group
166///
167/// # Aggregate Error Handling
168///
169/// Error handling for aggregate functions is weird, and does not lend itself to
170/// easy understandability. The following is my best understanding of the
171/// process:
172///
173/// - Any aggregate function may set a nonzero error (Represented here in return
174/// value by `Err(NonZeroU8)`). The value is not important, can be something
175/// internal
176/// - These errors do not stop the remaining `add()`/`remove()` functions from
177/// being called, but these functions do receive the error (and so may choose
178/// to do nothing if there is an error set)
179/// - Errors are not reset on `clear()`; you must do this manually (Hence
180/// `error` being mutable in this function signature)
181///
182/// In order to enforce some of these constraints, we use `NonZeroU8` to
183/// represent error types (which has the nice side effect of being optimizable).
184/// Unfortunately, it is somewhat cumbersome to use, e.g.: `return
185/// Err(NonZeroU8::new(1).unwrap());`
186pub trait AggregateUdf: BasicUdf {
187 /// Clear is run once at the beginning of each aggregate group and should
188 /// reset everything needed in the struct.
189 ///
190 /// # Errors
191 ///
192 /// The `error` arg provides the error value from the previous group, and
193 /// this function may choose to reset it (that is probably a good idea to
194 /// do). `error` will be `None` if there is currently no error.
195 ///
196 /// To clear the error, simply return `Ok(())`.
197 ///
198 /// Return an error if something goes wrong within this function, or if you
199 /// would like to propegate the previous error.
200 fn clear(&mut self, cfg: &UdfCfg<Process>, error: Option<NonZeroU8>) -> Result<(), NonZeroU8>;
201
202 /// Add an item to the aggregate
203 ///
204 /// Usually this is implemented by adding something to an intemdiate value
205 /// inside the core struct type.
206 ///
207 /// # Errors
208 ///
209 /// Hit a problem? Return an integer, which may or may not be meaningful to
210 /// you. This can be done with `return Err(NonZeroU8::new(1).unwrap());`.
211 ///
212 /// The `error` argument tells you if there has been an error at some point,
213 /// and the return value also detemines whether to propegate/modify the
214 /// error (probably what you want) or clear it (I can't think of any good
215 /// reason to do this in `add()`). If you would like to propegate the error
216 /// without action, just add the following as the first line of the
217 /// function:
218 ///
219 /// ```
220 /// # use std::num::NonZeroU8;
221 /// # fn tmp(error: Option<NonZeroU8>) -> Result<(), NonZeroU8> {
222 /// error.map_or(Ok(()), Err)?;
223 /// # Ok(())
224 /// # }
225 /// ```
226 ///
227 /// If you do this,
228 fn add(
229 &mut self,
230 cfg: &UdfCfg<Process>,
231 args: &ArgList<Process>,
232 error: Option<NonZeroU8>,
233 ) -> Result<(), NonZeroU8>;
234
235 /// Remove only applies to `MariaDB`, for use with window functions; i.e.,
236 /// `remove` will be called on a row that should be removed from the current
237 /// set (has moved out of the window).
238 ///
239 /// This is optional; a default is supplied so no action is needed. If you
240 /// would like to use `remove`, just reimplement it.
241 ///
242 /// <https://mariadb.com/kb/en/user-defined-functions-calling-sequences/#x_remove>
243 ///
244 /// # Errors
245 ///
246 /// Errors are handled the same as with [`AggregateUdf::add()`], see the
247 /// description there
248 #[inline]
249 #[allow(unused_variables)] // Allow without an underscore for cleaner docs
250 fn remove(
251 &mut self,
252 cfg: &UdfCfg<Process>,
253 args: &ArgList<Process>,
254 error: Option<NonZeroU8>,
255 ) -> Result<(), NonZeroU8> {
256 unimplemented!()
257 }
258}
259
260/// A state of the UDF, representing either [`Init`] or [`Process`]
261///
262/// This is a zero-sized type used to control what operations are allowed at
263/// different times.
264pub trait UdfState: Debug + PartialEq {}
265
266/// Typestate marker for the initialization phase
267///
268/// This is a zero-sized type. It just allows for specific methods to be
269/// implemented only on types that were created during the `init` function.
270#[derive(Debug, PartialEq, Eq)]
271pub struct Init;
272
273/// Typestate marker for the processing phase
274///
275/// This is a zero-sized type, indicating that a type was created in the
276/// `process` function.
277#[derive(Debug, PartialEq, Eq)]
278pub struct Process;
279
280impl UdfState for Init {}
281impl UdfState for Process {}