Skip to main content

quack_rs/table/
builder.rs

1// SPDX-License-Identifier: MIT
2// Copyright 2026 Tom F. <https://github.com/tomtom215/>
3// My way of giving something small back to the open source community
4// and encouraging more Rust development!
5
6//! Builder for registering `DuckDB` table functions.
7//!
8//! Table functions are the backbone of "real" `DuckDB` extensions: they are
9//! SELECT-able, support projection pushdown, named parameters, and can
10//! produce arbitrary output schemas determined at query-parse time.
11//!
12//! # Table function lifecycle
13//!
14//! ```text
15//! 1. bind       — parse args, declare output columns, optionally set cardinality hint
16//! 2. init       — allocate global scan state (shared across threads)
17//! 3. local_init — allocate per-thread scan state (optional)
18//! 4. scan       — fill one output chunk; set chunk size to 0 when exhausted
19//! ```
20//!
21//! # Example: A constant table function
22//!
23//! ```rust,no_run
24//! use quack_rs::table::{TableFunctionBuilder, FfiBindData, FfiInitData};
25//! use quack_rs::types::TypeId;
26//! use libduckdb_sys::{
27//!     duckdb_bind_info, duckdb_init_info, duckdb_function_info,
28//!     duckdb_data_chunk, duckdb_data_chunk_set_size,
29//! };
30//!
31//! struct Config { limit: u64 }
32//! struct State  { emitted: u64 }
33//!
34//! unsafe extern "C" fn bind(info: duckdb_bind_info) {
35//!     unsafe {
36//!         // Declare the output schema.
37//!         quack_rs::table::BindInfo::new(info)
38//!             .add_result_column("n", TypeId::BigInt);
39//!         // Store bind-time configuration.
40//!         FfiBindData::<Config>::set(info, Config { limit: 100 });
41//!     }
42//! }
43//!
44//! unsafe extern "C" fn init(info: duckdb_init_info) {
45//!     unsafe { FfiInitData::<State>::set(info, State { emitted: 0 }); }
46//! }
47//!
48//! unsafe extern "C" fn scan(info: duckdb_function_info, output: duckdb_data_chunk) {
49//!     // scan logic
50//! }
51//!
52//! // fn register(con: libduckdb_sys::duckdb_connection) -> Result<(), quack_rs::error::ExtensionError> {
53//! //     unsafe {
54//! //         TableFunctionBuilder::new("my_table_fn")
55//! //             .bind(bind)
56//! //             .init(init)
57//! //             .scan(scan)
58//! //             .register(con)
59//! //     }
60//! // }
61//! ```
62
63use std::ffi::CString;
64use std::os::raw::c_void;
65
66use libduckdb_sys::{
67    duckdb_bind_info, duckdb_connection, duckdb_create_table_function, duckdb_data_chunk,
68    duckdb_destroy_table_function, duckdb_function_info, duckdb_init_info,
69    duckdb_register_table_function, duckdb_table_function_add_named_parameter,
70    duckdb_table_function_add_parameter, duckdb_table_function_set_bind,
71    duckdb_table_function_set_extra_info, duckdb_table_function_set_function,
72    duckdb_table_function_set_init, duckdb_table_function_set_local_init,
73    duckdb_table_function_set_name, duckdb_table_function_supports_projection_pushdown,
74    DuckDBSuccess,
75};
76
77use crate::error::ExtensionError;
78use crate::types::{LogicalType, TypeId};
79use crate::validate::validate_function_name;
80
81/// The bind callback: declare output columns, read parameters, store bind data.
82pub type BindFn = unsafe extern "C" fn(info: duckdb_bind_info);
83
84/// The init callback: allocate global scan state.
85pub type InitFn = unsafe extern "C" fn(info: duckdb_init_info);
86
87/// The scan callback: fill one output chunk; set chunk size to 0 when done.
88pub type ScanFn = unsafe extern "C" fn(info: duckdb_function_info, output: duckdb_data_chunk);
89
90/// The extra-info destructor callback: called by `DuckDB` to free function-level extra data.
91pub type ExtraDestroyFn = unsafe extern "C" fn(data: *mut c_void);
92
93/// A named parameter specification: (name, type).
94enum NamedParam {
95    Simple {
96        name: CString,
97        type_id: TypeId,
98    },
99    Logical {
100        name: CString,
101        logical_type: LogicalType,
102    },
103}
104
105/// Builder for registering a `DuckDB` table function.
106///
107/// Table functions are the most powerful extension type — they can return
108/// arbitrary result schemas, support named parameters, projection pushdown,
109/// and parallel execution.
110///
111/// # Required fields
112///
113/// - [`bind`][TableFunctionBuilder::bind]: must be set.
114/// - [`init`][TableFunctionBuilder::init]: must be set.
115/// - [`scan`][TableFunctionBuilder::scan]: must be set.
116///
117/// # Optional features
118///
119/// - [`param`][TableFunctionBuilder::param]: positional parameters.
120/// - [`named_param`][TableFunctionBuilder::named_param]: named parameters (`name := value`).
121/// - [`local_init`][TableFunctionBuilder::local_init]: per-thread init (enables parallel scan).
122/// - [`projection_pushdown`][TableFunctionBuilder::projection_pushdown]: hint projection info to `DuckDB`.
123/// - [`extra_info`][TableFunctionBuilder::extra_info]: function-level data available in all callbacks.
124#[must_use]
125pub struct TableFunctionBuilder {
126    name: CString,
127    params: Vec<TypeId>,
128    logical_params: Vec<(usize, LogicalType)>,
129    named_params: Vec<NamedParam>,
130    bind: Option<BindFn>,
131    init: Option<InitFn>,
132    local_init: Option<InitFn>,
133    scan: Option<ScanFn>,
134    projection_pushdown: bool,
135    extra_info: Option<(*mut c_void, ExtraDestroyFn)>,
136}
137
138impl TableFunctionBuilder {
139    /// Creates a new builder for a table function with the given name.
140    ///
141    /// # Panics
142    ///
143    /// Panics if `name` contains an interior null byte.
144    pub fn new(name: &str) -> Self {
145        Self {
146            name: CString::new(name).expect("function name must not contain null bytes"),
147            params: Vec::new(),
148            logical_params: Vec::new(),
149            named_params: Vec::new(),
150            bind: None,
151            init: None,
152            local_init: None,
153            scan: None,
154            projection_pushdown: false,
155            extra_info: None,
156        }
157    }
158
159    /// Creates a new builder with function name validation.
160    ///
161    /// # Errors
162    ///
163    /// Returns `ExtensionError` if the name is invalid.
164    pub fn try_new(name: &str) -> Result<Self, ExtensionError> {
165        validate_function_name(name)?;
166        let c_name = CString::new(name)
167            .map_err(|_| ExtensionError::new("function name contains interior null byte"))?;
168        Ok(Self {
169            name: c_name,
170            params: Vec::new(),
171            logical_params: Vec::new(),
172            named_params: Vec::new(),
173            bind: None,
174            init: None,
175            local_init: None,
176            scan: None,
177            projection_pushdown: false,
178            extra_info: None,
179        })
180    }
181
182    /// Returns the function name.
183    ///
184    /// Useful for introspection and for [`MockRegistrar`][crate::testing::MockRegistrar].
185    pub fn name(&self) -> &str {
186        self.name.to_str().unwrap_or("")
187    }
188
189    /// Adds a positional parameter with the given type.
190    pub fn param(mut self, type_id: TypeId) -> Self {
191        self.params.push(type_id);
192        self
193    }
194
195    /// Adds a positional parameter with a complex [`LogicalType`].
196    ///
197    /// Use this for parameterized types that [`TypeId`] cannot express, such as
198    /// `LIST(BIGINT)`, `MAP(VARCHAR, INTEGER)`, or `STRUCT(...)`.
199    pub fn param_logical(mut self, logical_type: LogicalType) -> Self {
200        let position = self.params.len() + self.logical_params.len();
201        self.logical_params.push((position, logical_type));
202        self
203    }
204
205    /// Adds a named parameter (e.g., `my_fn(path := 'data.csv')`).
206    ///
207    /// Named parameters are accessed in the bind callback via
208    /// `duckdb_bind_get_named_parameter`.
209    ///
210    /// # Panics
211    ///
212    /// Panics if `name` contains an interior null byte.
213    pub fn named_param(mut self, name: &str, type_id: TypeId) -> Self {
214        self.named_params.push(NamedParam::Simple {
215            name: CString::new(name).expect("parameter name must not contain null bytes"),
216            type_id,
217        });
218        self
219    }
220
221    /// Adds a named parameter with a complex [`LogicalType`].
222    ///
223    /// Use this for parameterized types that [`TypeId`] cannot express.
224    ///
225    /// # Panics
226    ///
227    /// Panics if `name` contains an interior null byte.
228    pub fn named_param_logical(mut self, name: &str, logical_type: LogicalType) -> Self {
229        self.named_params.push(NamedParam::Logical {
230            name: CString::new(name).expect("parameter name must not contain null bytes"),
231            logical_type,
232        });
233        self
234    }
235
236    /// Sets the bind callback.
237    ///
238    /// The bind callback is called once at query-parse time. It must:
239    /// - Declare all output columns via [`crate::table::BindInfo::add_result_column`].
240    /// - Optionally read parameters and store bind data via [`crate::table::FfiBindData::set`].
241    pub fn bind(mut self, f: BindFn) -> Self {
242        self.bind = Some(f);
243        self
244    }
245
246    /// Sets the global init callback.
247    ///
248    /// Called once per query. Use [`crate::table::FfiInitData::set`] to store global scan state.
249    pub fn init(mut self, f: InitFn) -> Self {
250        self.init = Some(f);
251        self
252    }
253
254    /// Sets the per-thread local init callback (optional).
255    ///
256    /// When set, `DuckDB` calls this once per worker thread. Use [`crate::table::FfiLocalInitData::set`]
257    /// to store thread-local scan state. Setting a local init enables parallel scanning.
258    pub fn local_init(mut self, f: InitFn) -> Self {
259        self.local_init = Some(f);
260        self
261    }
262
263    /// Sets the scan callback.
264    ///
265    /// Called repeatedly until all rows are produced. Set the output chunk's size
266    /// to `0` (via `duckdb_data_chunk_set_size(output, 0)`) to signal end of stream.
267    pub fn scan(mut self, f: ScanFn) -> Self {
268        self.scan = Some(f);
269        self
270    }
271
272    /// Enables or disables projection pushdown support (default: disabled).
273    ///
274    /// When enabled, `DuckDB` informs the `init` callback which columns were
275    /// requested. Use `duckdb_init_get_column_count` and `duckdb_init_get_column_index`
276    /// in your init callback to skip producing unrequested columns.
277    pub const fn projection_pushdown(mut self, enable: bool) -> Self {
278        self.projection_pushdown = enable;
279        self
280    }
281
282    /// Sets function-level extra info shared across all callbacks.
283    ///
284    /// This data is available via `duckdb_function_get_extra_info` and
285    /// `duckdb_bind_get_extra_info` in all callbacks. The `destroy` callback
286    /// is called by `DuckDB` when the function is dropped.
287    ///
288    /// # Safety
289    ///
290    /// `data` must remain valid until `DuckDB` calls `destroy`. The typical pattern
291    /// is to box your data: `Box::into_raw(Box::new(my_data)).cast()`.
292    pub unsafe fn extra_info(mut self, data: *mut c_void, destroy: ExtraDestroyFn) -> Self {
293        self.extra_info = Some((data, destroy));
294        self
295    }
296
297    /// Registers the table function on the given connection.
298    ///
299    /// # Errors
300    ///
301    /// Returns `ExtensionError` if:
302    /// - The bind, init, or scan callback was not set.
303    /// - `DuckDB` reports a registration failure.
304    ///
305    /// # Safety
306    ///
307    /// `con` must be a valid, open `duckdb_connection`.
308    pub unsafe fn register(self, con: duckdb_connection) -> Result<(), ExtensionError> {
309        let bind = self
310            .bind
311            .ok_or_else(|| ExtensionError::new("bind callback not set"))?;
312        let init = self
313            .init
314            .ok_or_else(|| ExtensionError::new("init callback not set"))?;
315        let scan = self
316            .scan
317            .ok_or_else(|| ExtensionError::new("scan callback not set"))?;
318
319        // SAFETY: creates a new table function handle.
320        let func = unsafe { duckdb_create_table_function() };
321
322        // SAFETY: func is a valid newly created handle.
323        unsafe {
324            duckdb_table_function_set_name(func, self.name.as_ptr());
325        }
326
327        // Add positional parameters: merge simple TypeId params and complex LogicalType
328        // params in the order they were added (tracked by position).
329        {
330            let mut simple_idx = 0;
331            let mut logical_idx = 0;
332            let total = self.params.len() + self.logical_params.len();
333            for pos in 0..total {
334                if logical_idx < self.logical_params.len()
335                    && self.logical_params[logical_idx].0 == pos
336                {
337                    unsafe {
338                        duckdb_table_function_add_parameter(
339                            func,
340                            self.logical_params[logical_idx].1.as_raw(),
341                        );
342                    }
343                    logical_idx += 1;
344                } else if simple_idx < self.params.len() {
345                    let lt = LogicalType::new(self.params[simple_idx]);
346                    unsafe {
347                        duckdb_table_function_add_parameter(func, lt.as_raw());
348                    }
349                    simple_idx += 1;
350                }
351            }
352        }
353
354        // Add named parameters.
355        for np in &self.named_params {
356            match np {
357                NamedParam::Simple { name, type_id } => {
358                    let lt = LogicalType::new(*type_id);
359                    unsafe {
360                        duckdb_table_function_add_named_parameter(func, name.as_ptr(), lt.as_raw());
361                    }
362                }
363                NamedParam::Logical { name, logical_type } => unsafe {
364                    duckdb_table_function_add_named_parameter(
365                        func,
366                        name.as_ptr(),
367                        logical_type.as_raw(),
368                    );
369                },
370            }
371        }
372
373        // Set callbacks.
374        // SAFETY: func is valid; callbacks are valid extern "C" fn pointers.
375        unsafe {
376            duckdb_table_function_set_bind(func, Some(bind));
377            duckdb_table_function_set_init(func, Some(init));
378            duckdb_table_function_set_function(func, Some(scan));
379        }
380
381        // Set optional local init.
382        if let Some(local_init) = self.local_init {
383            // SAFETY: func is valid; local_init is a valid extern "C" fn pointer.
384            unsafe {
385                duckdb_table_function_set_local_init(func, Some(local_init));
386            }
387        }
388
389        // Configure projection pushdown.
390        // SAFETY: func is valid.
391        unsafe {
392            duckdb_table_function_supports_projection_pushdown(func, self.projection_pushdown);
393        }
394
395        // Set extra info if provided.
396        if let Some((data, destroy)) = self.extra_info {
397            // SAFETY: func is valid; data and destroy are provided by caller.
398            unsafe {
399                duckdb_table_function_set_extra_info(func, data, Some(destroy));
400            }
401        }
402
403        // Register.
404        // SAFETY: con and func are valid.
405        let result = unsafe { duckdb_register_table_function(con, func) };
406
407        // Always destroy the function handle; ownership transferred to DuckDB on success.
408        // SAFETY: func was created above.
409        unsafe {
410            duckdb_destroy_table_function(&mut { func });
411        }
412
413        if result == DuckDBSuccess {
414            Ok(())
415        } else {
416            Err(ExtensionError::new(format!(
417                "duckdb_register_table_function failed for '{}'",
418                self.name.to_string_lossy()
419            )))
420        }
421    }
422}
423
424#[cfg(test)]
425mod tests {
426    use super::*;
427
428    #[test]
429    fn builder_stores_name() {
430        let b = TableFunctionBuilder::new("my_table_fn");
431        assert_eq!(b.name.to_str().unwrap(), "my_table_fn");
432    }
433
434    #[test]
435    fn builder_stores_params() {
436        let b = TableFunctionBuilder::new("f")
437            .param(TypeId::Varchar)
438            .param(TypeId::BigInt);
439        assert_eq!(b.params.len(), 2);
440        assert_eq!(b.params[0], TypeId::Varchar);
441        assert_eq!(b.params[1], TypeId::BigInt);
442    }
443
444    #[test]
445    fn builder_stores_named_params() {
446        let b = TableFunctionBuilder::new("f")
447            .named_param("path", TypeId::Varchar)
448            .named_param("limit", TypeId::BigInt);
449        assert_eq!(b.named_params.len(), 2);
450        match &b.named_params[0] {
451            NamedParam::Simple { name, .. } => assert_eq!(name.to_str().unwrap(), "path"),
452            NamedParam::Logical { .. } => panic!("expected Simple"),
453        }
454        match &b.named_params[1] {
455            NamedParam::Simple { name, .. } => assert_eq!(name.to_str().unwrap(), "limit"),
456            NamedParam::Logical { .. } => panic!("expected Simple"),
457        }
458    }
459
460    #[test]
461    fn builder_stores_callbacks() {
462        unsafe extern "C" fn my_bind(_: duckdb_bind_info) {}
463        unsafe extern "C" fn my_init(_: duckdb_init_info) {}
464        unsafe extern "C" fn my_scan(_: duckdb_function_info, _: duckdb_data_chunk) {}
465
466        let b = TableFunctionBuilder::new("f")
467            .bind(my_bind)
468            .init(my_init)
469            .scan(my_scan);
470        assert!(b.bind.is_some());
471        assert!(b.init.is_some());
472        assert!(b.scan.is_some());
473    }
474
475    #[test]
476    fn builder_projection_pushdown() {
477        let b = TableFunctionBuilder::new("f").projection_pushdown(true);
478        assert!(b.projection_pushdown);
479    }
480
481    #[test]
482    fn try_new_valid_name() {
483        assert!(TableFunctionBuilder::try_new("read_csv_ext").is_ok());
484    }
485
486    #[test]
487    fn try_new_invalid_name() {
488        assert!(TableFunctionBuilder::try_new("").is_err());
489        assert!(TableFunctionBuilder::try_new("MyFunc").is_err());
490    }
491
492    #[test]
493    fn try_new_null_byte_rejected() {
494        assert!(TableFunctionBuilder::try_new("func\0name").is_err());
495    }
496
497    #[test]
498    fn param_logical_position_tracking() {
499        // Create a fake LogicalType from a dangling (non-null) pointer.
500        // We leak the builder at the end to prevent Drop from calling
501        // duckdb_destroy_logical_type on the invalid pointer.
502        let fake_lt = unsafe { LogicalType::from_raw(std::ptr::NonNull::dangling().as_ptr()) };
503
504        // Build with one simple param followed by one logical param.
505        let b = TableFunctionBuilder::new("f")
506            .param(TypeId::Integer)
507            .param_logical(fake_lt);
508
509        assert_eq!(b.params.len(), 1);
510        assert_eq!(b.logical_params.len(), 1);
511        assert_eq!(b.logical_params[0].0, 1); // position should be 1, not 0
512
513        // Prevent drop of the LogicalType inside b.logical_params
514        // by leaking the entire builder.
515        std::mem::forget(b);
516    }
517}