udf/
traits.rs

1//! Module containing traits to be implemented by a user
2//!
3//! A basic UDF just needs to implement [`BasicUdf`]. An aggregate UDF needs to
4//! implement both [`BasicUdf`] and [`AggregateUdf`].
5
6use core::fmt::Debug;
7use std::num::NonZeroU8;
8
9use crate::types::{ArgList, UdfCfg};
10use crate::ProcessError;
11
12/// This trait specifies the functions needed for a standard (non-aggregate) UDF
13///
14/// Implement this on any struct in order to create a UDF. That struct can
15/// either be empty (usually the case for simple functions), or contain data
16/// that will be shared among all the UDF functions.
17///
18/// If the UDF is basic (non-aggregate), the process is:
19///
20/// - Caller (SQL server) calls `init()` with basic argument information
21/// - `init()` function (defined here) validates the arguments, does
22///   configuration (if needed), and configures and returns the `Self` struct
23/// - For each row, the caller calls `process(...)` with the relevant arguments
24/// - `process()` function (defined here) accepts an instance of `self` (created
25///   during init) and updates it as needed, and produces a result for that row
26///
27/// The UDF specification also calls out a `deinit()` function to deallocate any
28/// memory, but this is not needed here (handled by this wrapper).
29pub trait BasicUdf: Sized {
30    /// This type represents the return type of the UDF function.
31    ///
32    /// There are a lot of options, with some rules to follow. Warning! tedious
33    /// explanation below, just skip to the next section if you don't need the
34    /// details.
35    ///
36    /// - `f64` (real), `i64` (integer), and `[u8]` (string/blob) are the three
37    ///   fundamental types
38    /// - Any `Return` can be an `Option<something>` if the result is
39    ///   potentially nullable
40    /// - There is no meaningful difference between `String`, `Vec<u8>`, `str`,
41    ///   and `[u8]` - return whichever is most convenient (following the below
42    ///   rules). Any of these types are acceptable for returning `string` or
43    ///   `decimal` types.
44    /// - Out of these buffer options, prefer returning `&'static str` or
45    ///   `&'static [u8]` where possible. These are usable when only returning
46    ///   const/static values.
47    /// - "Owned allocated" types (`String`, `Vec<u8>`) are the next preference
48    ///   for buffer types, and can be used whenever
49    /// - If you have an owned type that updates itself, you can store the
50    ///   relevant `String` or `Vec<u8>` in your struct and return a `&'a str`
51    ///   or `&'a [u8]` that references them. This is useful for something like
52    ///   a `concat` function that updates its result string with each call
53    ///   (GATs allow this to work).
54    ///
55    /// Choosing a type may seem tricky at first but anything that successfully
56    /// compiles will likely work. The flow chart below helps clarify some of
57    /// the decisions making:
58    ///
59    /// ```text
60    ///     Desired                Use Option<T> if the result may be null
61    ///   Return Type
62    ///  ┉┉┉┉┉┉┉┉┉┉┉┉┉
63    /// ╭─────────────╮
64    /// │   integer   ├─> i64 / Option<i64>
65    /// ╰─────────────╯
66    /// ╭─────────────╮
67    /// │    float    ├─> f64 / Option<f64>
68    /// ╰─────────────╯
69    ///                   ╭───────────╮
70    /// ╭─────────────╮   │  static   ├─> &'static str / Option<&'static str>
71    /// │ utf8 string ├─> │           │
72    /// ╰─────────────╯   │           │   ╭───────────────╮
73    ///                   │  dynamic  ├─> │  independent  ├─> String / Option<String>
74    ///                   ╰───────────╯   │               │
75    ///                                   │ self-updating ├─> &'a str / Option<&'a str>
76    ///                                   ╰───────────────╯
77    /// ╭─────────────╮   ╭───────────╮
78    /// │  non utf8   │   │  static   ├─> &'static [u8] / Option<&'static [u8]>
79    /// │ string/blob ├─> │           │
80    /// ╰─────────────╯   │           │   ╭───────────────╮
81    ///                   │  dynamic  ├─> │  independent  ├─> Vec<u8> / Option<Vec<u8>>
82    ///                   ╰───────────╯   │               │
83    ///                                   │ self-updating ├─> &'a [u8] / Option<&'a [u8]>
84    ///                                   ╰───────────────╯
85    /// ```
86    type Returns<'a>
87    where
88        Self: 'a;
89
90    /// This is the initialization function
91    ///
92    /// It is expected that this function do the following:
93    ///
94    /// - Check that arguments are the proper type
95    /// - Check whether the arguments are const and have a usable value (can
96    ///   provide some optimizations)
97    ///
98    /// # Errors
99    ///
100    /// If your function is not able to work with the given arguments, return a
101    /// helpful error message explaining why. Max error size is
102    /// `MYSQL_ERRMSG_SIZE` (512) bits, and will be truncated if any longer.
103    ///
104    /// `MySql` recommends keeping these error messages under 80 characters to
105    /// fit in a terminal, but personal I'd prefer a helpful message over
106    /// something useless that fits in one line.
107    ///
108    /// Error handling options are limited in all other functions, so make sure
109    /// you check thoroughly for any possible errors that may arise, to the best
110    /// of your ability. These may include:
111    ///
112    /// - Incorrect argument quantity or position
113    /// - Incorrect argument types
114    /// - Values that are `maybe_null()` when you cannot accept them
115    fn init(cfg: &UdfCfg<Init>, args: &ArgList<Init>) -> Result<Self, String>;
116
117    /// Process the actual values and return a result
118    ///
119    /// If you are unfamiliar with Rust, don't worry too much about the `'a` you
120    /// see thrown around a lot. They are lifetime annotations and more or less
121    /// say, "`self` lives at least as long as my return type does so I can
122    /// return a reference to it, but `args` may not last as long so I cannot
123    /// return a reference to that".
124    ///
125    /// # Arguments
126    ///
127    /// - `args`: Iterable list of arguments of the `Process` type
128    /// - `error`: This is only applicable when using aggregate functions and
129    ///   can otherwise be ignored. If using aggregate functions, this provides
130    ///   the current error value as described in [`AggregateUdf::add()`].
131    ///
132    /// # Return Value
133    ///
134    /// Assuming success, this function must return something of type
135    /// `Self::Returns`. This will be the value for the row (standard functions)
136    /// or for the entire group (aggregate functions).
137    ///
138    /// # Errors
139    ///
140    /// If there is some sort of unrecoverable problem at this point, just
141    /// return a [`ProcessError`]. This will make the SQL server return `NULL`.
142    /// As mentioned, there really aren't any good error handling options at
143    /// this point other than that, so try to catch all possible errors in
144    /// [`BasicUdf::init`].
145    ///
146    /// [`ProcessError`] is just an empty type.
147    fn process<'a>(
148        &'a mut self,
149        cfg: &UdfCfg<Process>,
150        args: &ArgList<Process>,
151        error: Option<NonZeroU8>,
152    ) -> Result<Self::Returns<'a>, ProcessError>;
153}
154
155/// This trait must be implemented if this function performs aggregation.
156///
157/// The basics of aggregation are simple:
158///
159/// - `init` is called once per result set (same as non-aggregate)
160/// - `clear` is called once per group within the result set, and should reset
161///   your struct
162/// - `add` is called once per row in the group, and should add the current row
163///   to the struct as needed
164/// - `process` is called at the end of each group, and should produce the
165///   result value for that group
166///
167/// # Aggregate Error Handling
168///
169/// Error handling for aggregate functions is weird, and does not lend itself to
170/// easy understandability. The following is my best understanding of the
171/// process:
172///
173/// - Any aggregate function may set a nonzero error (Represented here in return
174///   value by `Err(NonZeroU8)`). The value is not important, can be something
175///   internal
176/// - These errors do not stop the remaining `add()`/`remove()` functions from
177///   being called, but these functions do receive the error (and so may choose
178///   to do nothing if there is an error set)
179/// - Errors are not reset on `clear()`; you must do this manually (Hence
180///   `error` being mutable in this function signature)
181///
182/// In order to enforce some of these constraints, we use `NonZeroU8` to
183/// represent error types (which has the nice side effect of being optimizable).
184/// Unfortunately, it is somewhat cumbersome to use, e.g.: `return
185/// Err(NonZeroU8::new(1).unwrap());`
186pub trait AggregateUdf: BasicUdf {
187    /// Clear is run once at the beginning of each aggregate group and should
188    /// reset everything needed in the struct.
189    ///
190    /// # Errors
191    ///
192    /// The `error` arg provides the error value from the previous group, and
193    /// this function may choose to reset it (that is probably a good idea to
194    /// do). `error` will be `None` if there is currently no error.
195    ///
196    /// To clear the error, simply return `Ok(())`.
197    ///
198    /// Return an error if something goes wrong within this function, or if you
199    /// would like to propegate the previous error.
200    fn clear(&mut self, cfg: &UdfCfg<Process>, error: Option<NonZeroU8>) -> Result<(), NonZeroU8>;
201
202    /// Add an item to the aggregate
203    ///
204    /// Usually this is implemented by adding something to an intemdiate value
205    /// inside the core struct type.
206    ///
207    /// # Errors
208    ///
209    /// Hit a problem? Return an integer, which may or may not be meaningful to
210    /// you. This can be done with `return Err(NonZeroU8::new(1).unwrap());`.
211    ///
212    /// The `error` argument tells you if there has been an error at some point,
213    /// and the return value also detemines whether to propegate/modify the
214    /// error (probably what you want) or clear it (I can't think of any good
215    /// reason to do this in `add()`). If you would like to propegate the error
216    /// without action, just add the following as the first line of the
217    /// function:
218    ///
219    /// ```
220    /// # use std::num::NonZeroU8;
221    /// # fn tmp(error: Option<NonZeroU8>) -> Result<(), NonZeroU8> {
222    /// error.map_or(Ok(()), Err)?;
223    /// # Ok(())
224    /// # }
225    /// ```
226    ///
227    /// If you do this,
228    fn add(
229        &mut self,
230        cfg: &UdfCfg<Process>,
231        args: &ArgList<Process>,
232        error: Option<NonZeroU8>,
233    ) -> Result<(), NonZeroU8>;
234
235    /// Remove only applies to `MariaDB`, for use with window functions; i.e.,
236    /// `remove` will be called on a row that should be removed from the current
237    /// set (has moved out of the window).
238    ///
239    /// This is optional; a default is supplied so no action is needed. If you
240    /// would like to use `remove`, just reimplement it.
241    ///
242    /// <https://mariadb.com/kb/en/user-defined-functions-calling-sequences/#x_remove>
243    ///
244    /// # Errors
245    ///
246    /// Errors are handled the same as with [`AggregateUdf::add()`], see the
247    /// description there
248    #[inline]
249    #[allow(unused_variables)] // Allow without an underscore for cleaner docs
250    fn remove(
251        &mut self,
252        cfg: &UdfCfg<Process>,
253        args: &ArgList<Process>,
254        error: Option<NonZeroU8>,
255    ) -> Result<(), NonZeroU8> {
256        unimplemented!()
257    }
258}
259
260/// A state of the UDF, representing either [`Init`] or [`Process`]
261///
262/// This is a zero-sized type used to control what operations are allowed at
263/// different times.
264pub trait UdfState: Debug + PartialEq {}
265
266/// Typestate marker for the initialization phase
267///
268/// This is a zero-sized type. It just allows for specific methods to be
269/// implemented only on types that were created during the `init` function.
270#[derive(Debug, PartialEq, Eq)]
271pub struct Init;
272
273/// Typestate marker for the processing phase
274///
275/// This is a zero-sized type, indicating that a type was created in the
276/// `process` function.
277#[derive(Debug, PartialEq, Eq)]
278pub struct Process;
279
280impl UdfState for Init {}
281impl UdfState for Process {}