Skip to main content

quack_rs/table/
builder.rs

1// SPDX-License-Identifier: MIT
2// Copyright 2026 Tom F. <https://github.com/tomtom215/>
3// My way of giving something small back to the open source community
4// and encouraging more Rust development!
5
6//! Builder for registering `DuckDB` table functions.
7//!
8//! Table functions are the backbone of "real" `DuckDB` extensions: they are
9//! SELECT-able, support projection pushdown, named parameters, and can
10//! produce arbitrary output schemas determined at query-parse time.
11//!
12//! # Table function lifecycle
13//!
14//! ```text
15//! 1. bind       — parse args, declare output columns, optionally set cardinality hint
16//! 2. init       — allocate global scan state (shared across threads)
17//! 3. local_init — allocate per-thread scan state (optional)
18//! 4. scan       — fill one output chunk; set chunk size to 0 when exhausted
19//! ```
20//!
21//! # Example: A constant table function
22//!
23//! ```rust,no_run
24//! use quack_rs::table::{TableFunctionBuilder, FfiBindData, FfiInitData};
25//! use quack_rs::types::TypeId;
26//! use libduckdb_sys::{
27//!     duckdb_bind_info, duckdb_init_info, duckdb_function_info,
28//!     duckdb_data_chunk, duckdb_data_chunk_set_size,
29//! };
30//!
31//! struct Config { limit: u64 }
32//! struct State  { emitted: u64 }
33//!
34//! unsafe extern "C" fn bind(info: duckdb_bind_info) {
35//!     unsafe {
36//!         // Declare the output schema.
37//!         quack_rs::table::BindInfo::new(info)
38//!             .add_result_column("n", TypeId::BigInt);
39//!         // Store bind-time configuration.
40//!         FfiBindData::<Config>::set(info, Config { limit: 100 });
41//!     }
42//! }
43//!
44//! unsafe extern "C" fn init(info: duckdb_init_info) {
45//!     unsafe { FfiInitData::<State>::set(info, State { emitted: 0 }); }
46//! }
47//!
48//! unsafe extern "C" fn scan(info: duckdb_function_info, output: duckdb_data_chunk) {
49//!     // scan logic
50//! }
51//!
52//! // fn register(con: libduckdb_sys::duckdb_connection) -> Result<(), quack_rs::error::ExtensionError> {
53//! //     unsafe {
54//! //         TableFunctionBuilder::new("my_table_fn")
55//! //             .bind(bind)
56//! //             .init(init)
57//! //             .scan(scan)
58//! //             .register(con)
59//! //     }
60//! // }
61//! ```
62
63use std::ffi::CString;
64use std::os::raw::c_void;
65
66use libduckdb_sys::{
67    duckdb_bind_info, duckdb_connection, duckdb_create_table_function, duckdb_data_chunk,
68    duckdb_destroy_table_function, duckdb_function_info, duckdb_init_info,
69    duckdb_register_table_function, duckdb_table_function_add_named_parameter,
70    duckdb_table_function_add_parameter, duckdb_table_function_set_bind,
71    duckdb_table_function_set_extra_info, duckdb_table_function_set_function,
72    duckdb_table_function_set_init, duckdb_table_function_set_local_init,
73    duckdb_table_function_set_name, duckdb_table_function_supports_projection_pushdown,
74    DuckDBSuccess,
75};
76
77use crate::error::ExtensionError;
78use crate::types::{LogicalType, TypeId};
79use crate::validate::validate_function_name;
80
81/// The bind callback: declare output columns, read parameters, store bind data.
82pub type BindFn = unsafe extern "C" fn(info: duckdb_bind_info);
83
84/// The init callback: allocate global scan state.
85pub type InitFn = unsafe extern "C" fn(info: duckdb_init_info);
86
87/// The scan callback: fill one output chunk; set chunk size to 0 when done.
88pub type ScanFn = unsafe extern "C" fn(info: duckdb_function_info, output: duckdb_data_chunk);
89
90/// The extra-info destructor callback: called by `DuckDB` to free function-level extra data.
91pub type ExtraDestroyFn = unsafe extern "C" fn(data: *mut c_void);
92
93/// A named parameter specification: (name, type).
94enum NamedParam {
95    Simple {
96        name: CString,
97        type_id: TypeId,
98    },
99    Logical {
100        name: CString,
101        logical_type: LogicalType,
102    },
103}
104
105/// Builder for registering a `DuckDB` table function.
106///
107/// Table functions are the most powerful extension type — they can return
108/// arbitrary result schemas, support named parameters, projection pushdown,
109/// and parallel execution.
110///
111/// # Required fields
112///
113/// - [`bind`][TableFunctionBuilder::bind]: must be set.
114/// - [`init`][TableFunctionBuilder::init]: must be set.
115/// - [`scan`][TableFunctionBuilder::scan]: must be set.
116///
117/// # Optional features
118///
119/// - [`param`][TableFunctionBuilder::param]: positional parameters.
120/// - [`named_param`][TableFunctionBuilder::named_param]: named parameters (`name := value`).
121/// - [`local_init`][TableFunctionBuilder::local_init]: per-thread init (enables parallel scan).
122/// - [`projection_pushdown`][TableFunctionBuilder::projection_pushdown]: hint projection info to `DuckDB`.
123/// - [`extra_info`][TableFunctionBuilder::extra_info]: function-level data available in all callbacks.
124#[must_use]
125pub struct TableFunctionBuilder {
126    name: CString,
127    params: Vec<TypeId>,
128    logical_params: Vec<(usize, LogicalType)>,
129    named_params: Vec<NamedParam>,
130    bind: Option<BindFn>,
131    init: Option<InitFn>,
132    local_init: Option<InitFn>,
133    scan: Option<ScanFn>,
134    projection_pushdown: bool,
135    extra_info: Option<(*mut c_void, ExtraDestroyFn)>,
136}
137
138impl TableFunctionBuilder {
139    /// Creates a new builder for a table function with the given name.
140    ///
141    /// # Panics
142    ///
143    /// Panics if `name` contains an interior null byte.
144    pub fn new(name: &str) -> Self {
145        Self {
146            name: CString::new(name).expect("function name must not contain null bytes"),
147            params: Vec::new(),
148            logical_params: Vec::new(),
149            named_params: Vec::new(),
150            bind: None,
151            init: None,
152            local_init: None,
153            scan: None,
154            projection_pushdown: false,
155            extra_info: None,
156        }
157    }
158
159    /// Creates a new builder with function name validation.
160    ///
161    /// # Errors
162    ///
163    /// Returns `ExtensionError` if the name is invalid.
164    pub fn try_new(name: &str) -> Result<Self, ExtensionError> {
165        validate_function_name(name)?;
166        let c_name = CString::new(name)
167            .map_err(|_| ExtensionError::new("function name contains interior null byte"))?;
168        Ok(Self {
169            name: c_name,
170            params: Vec::new(),
171            logical_params: Vec::new(),
172            named_params: Vec::new(),
173            bind: None,
174            init: None,
175            local_init: None,
176            scan: None,
177            projection_pushdown: false,
178            extra_info: None,
179        })
180    }
181
182    /// Returns the function name.
183    ///
184    /// Useful for introspection and for [`MockRegistrar`][crate::testing::MockRegistrar].
185    pub fn name(&self) -> &str {
186        self.name.to_str().unwrap_or("")
187    }
188
189    /// Adds a positional parameter with the given type.
190    pub fn param(mut self, type_id: TypeId) -> Self {
191        self.params.push(type_id);
192        self
193    }
194
195    /// Adds a positional parameter with a complex [`LogicalType`].
196    ///
197    /// Use this for parameterized types that [`TypeId`] cannot express, such as
198    /// `LIST(BIGINT)`, `MAP(VARCHAR, INTEGER)`, or `STRUCT(...)`.
199    #[mutants::skip] // position arithmetic tested via E2E (requires DuckDB runtime)
200    pub fn param_logical(mut self, logical_type: LogicalType) -> Self {
201        let position = self.params.len() + self.logical_params.len();
202        self.logical_params.push((position, logical_type));
203        self
204    }
205
206    /// Adds a named parameter (e.g., `my_fn(path := 'data.csv')`).
207    ///
208    /// Named parameters are accessed in the bind callback via
209    /// `duckdb_bind_get_named_parameter`.
210    ///
211    /// # Panics
212    ///
213    /// Panics if `name` contains an interior null byte.
214    pub fn named_param(mut self, name: &str, type_id: TypeId) -> Self {
215        self.named_params.push(NamedParam::Simple {
216            name: CString::new(name).expect("parameter name must not contain null bytes"),
217            type_id,
218        });
219        self
220    }
221
222    /// Adds a named parameter with a complex [`LogicalType`].
223    ///
224    /// Use this for parameterized types that [`TypeId`] cannot express.
225    ///
226    /// # Panics
227    ///
228    /// Panics if `name` contains an interior null byte.
229    pub fn named_param_logical(mut self, name: &str, logical_type: LogicalType) -> Self {
230        self.named_params.push(NamedParam::Logical {
231            name: CString::new(name).expect("parameter name must not contain null bytes"),
232            logical_type,
233        });
234        self
235    }
236
237    /// Sets the bind callback.
238    ///
239    /// The bind callback is called once at query-parse time. It must:
240    /// - Declare all output columns via [`crate::table::BindInfo::add_result_column`].
241    /// - Optionally read parameters and store bind data via [`crate::table::FfiBindData::set`].
242    pub fn bind(mut self, f: BindFn) -> Self {
243        self.bind = Some(f);
244        self
245    }
246
247    /// Sets the global init callback.
248    ///
249    /// Called once per query. Use [`crate::table::FfiInitData::set`] to store global scan state.
250    pub fn init(mut self, f: InitFn) -> Self {
251        self.init = Some(f);
252        self
253    }
254
255    /// Sets the per-thread local init callback (optional).
256    ///
257    /// When set, `DuckDB` calls this once per worker thread. Use [`crate::table::FfiLocalInitData::set`]
258    /// to store thread-local scan state. Setting a local init enables parallel scanning.
259    pub fn local_init(mut self, f: InitFn) -> Self {
260        self.local_init = Some(f);
261        self
262    }
263
264    /// Sets the scan callback.
265    ///
266    /// Called repeatedly until all rows are produced. Set the output chunk's size
267    /// to `0` (via `duckdb_data_chunk_set_size(output, 0)`) to signal end of stream.
268    pub fn scan(mut self, f: ScanFn) -> Self {
269        self.scan = Some(f);
270        self
271    }
272
273    /// Enables or disables projection pushdown support (default: disabled).
274    ///
275    /// When enabled, `DuckDB` informs the `init` callback which columns were
276    /// requested. Use `duckdb_init_get_column_count` and `duckdb_init_get_column_index`
277    /// in your init callback to skip producing unrequested columns.
278    pub const fn projection_pushdown(mut self, enable: bool) -> Self {
279        self.projection_pushdown = enable;
280        self
281    }
282
283    /// Sets function-level extra info shared across all callbacks.
284    ///
285    /// This data is available via `duckdb_function_get_extra_info` and
286    /// `duckdb_bind_get_extra_info` in all callbacks. The `destroy` callback
287    /// is called by `DuckDB` when the function is dropped.
288    ///
289    /// # Safety
290    ///
291    /// `data` must remain valid until `DuckDB` calls `destroy`. The typical pattern
292    /// is to box your data: `Box::into_raw(Box::new(my_data)).cast()`.
293    pub unsafe fn extra_info(mut self, data: *mut c_void, destroy: ExtraDestroyFn) -> Self {
294        self.extra_info = Some((data, destroy));
295        self
296    }
297
298    /// Registers the table function on the given connection.
299    ///
300    /// # Errors
301    ///
302    /// Returns `ExtensionError` if:
303    /// - The bind, init, or scan callback was not set.
304    /// - `DuckDB` reports a registration failure.
305    ///
306    /// # Safety
307    ///
308    /// `con` must be a valid, open `duckdb_connection`.
309    pub unsafe fn register(self, con: duckdb_connection) -> Result<(), ExtensionError> {
310        let bind = self
311            .bind
312            .ok_or_else(|| ExtensionError::new("bind callback not set"))?;
313        let init = self
314            .init
315            .ok_or_else(|| ExtensionError::new("init callback not set"))?;
316        let scan = self
317            .scan
318            .ok_or_else(|| ExtensionError::new("scan callback not set"))?;
319
320        // SAFETY: creates a new table function handle.
321        let mut func = unsafe { duckdb_create_table_function() };
322
323        // SAFETY: func is a valid newly created handle.
324        unsafe {
325            duckdb_table_function_set_name(func, self.name.as_ptr());
326        }
327
328        // Add positional parameters: merge simple TypeId params and complex LogicalType
329        // params in the order they were added (tracked by position).
330        {
331            let mut simple_idx = 0;
332            let mut logical_idx = 0;
333            let total = self.params.len() + self.logical_params.len();
334            for pos in 0..total {
335                if logical_idx < self.logical_params.len()
336                    && self.logical_params[logical_idx].0 == pos
337                {
338                    unsafe {
339                        duckdb_table_function_add_parameter(
340                            func,
341                            self.logical_params[logical_idx].1.as_raw(),
342                        );
343                    }
344                    logical_idx += 1;
345                } else if simple_idx < self.params.len() {
346                    let lt = LogicalType::new(self.params[simple_idx]);
347                    unsafe {
348                        duckdb_table_function_add_parameter(func, lt.as_raw());
349                    }
350                    simple_idx += 1;
351                }
352            }
353        }
354
355        // Add named parameters.
356        for np in &self.named_params {
357            match np {
358                NamedParam::Simple { name, type_id } => {
359                    let lt = LogicalType::new(*type_id);
360                    unsafe {
361                        duckdb_table_function_add_named_parameter(func, name.as_ptr(), lt.as_raw());
362                    }
363                }
364                NamedParam::Logical { name, logical_type } => unsafe {
365                    duckdb_table_function_add_named_parameter(
366                        func,
367                        name.as_ptr(),
368                        logical_type.as_raw(),
369                    );
370                },
371            }
372        }
373
374        // Set callbacks.
375        // SAFETY: func is valid; callbacks are valid extern "C" fn pointers.
376        unsafe {
377            duckdb_table_function_set_bind(func, Some(bind));
378            duckdb_table_function_set_init(func, Some(init));
379            duckdb_table_function_set_function(func, Some(scan));
380        }
381
382        // Set optional local init.
383        if let Some(local_init) = self.local_init {
384            // SAFETY: func is valid; local_init is a valid extern "C" fn pointer.
385            unsafe {
386                duckdb_table_function_set_local_init(func, Some(local_init));
387            }
388        }
389
390        // Configure projection pushdown.
391        // SAFETY: func is valid.
392        unsafe {
393            duckdb_table_function_supports_projection_pushdown(func, self.projection_pushdown);
394        }
395
396        // Set extra info if provided.
397        if let Some((data, destroy)) = self.extra_info {
398            // SAFETY: func is valid; data and destroy are provided by caller.
399            unsafe {
400                duckdb_table_function_set_extra_info(func, data, Some(destroy));
401            }
402        }
403
404        // Register.
405        // SAFETY: con and func are valid.
406        let result = unsafe { duckdb_register_table_function(con, func) };
407
408        // Always destroy the function handle; ownership transferred to DuckDB on success.
409        // SAFETY: func was created above.
410        unsafe {
411            duckdb_destroy_table_function(&raw mut func);
412        }
413
414        if result == DuckDBSuccess {
415            Ok(())
416        } else {
417            Err(ExtensionError::new(format!(
418                "duckdb_register_table_function failed for '{}'",
419                self.name.to_string_lossy()
420            )))
421        }
422    }
423}
424
425#[cfg(test)]
426mod tests {
427    use super::*;
428
429    #[test]
430    fn builder_stores_name() {
431        let b = TableFunctionBuilder::new("my_table_fn");
432        assert_eq!(b.name.to_str().unwrap(), "my_table_fn");
433    }
434
435    #[test]
436    fn builder_stores_params() {
437        let b = TableFunctionBuilder::new("f")
438            .param(TypeId::Varchar)
439            .param(TypeId::BigInt);
440        assert_eq!(b.params.len(), 2);
441        assert_eq!(b.params[0], TypeId::Varchar);
442        assert_eq!(b.params[1], TypeId::BigInt);
443    }
444
445    #[test]
446    fn builder_stores_named_params() {
447        let b = TableFunctionBuilder::new("f")
448            .named_param("path", TypeId::Varchar)
449            .named_param("limit", TypeId::BigInt);
450        assert_eq!(b.named_params.len(), 2);
451        match &b.named_params[0] {
452            NamedParam::Simple { name, .. } => assert_eq!(name.to_str().unwrap(), "path"),
453            NamedParam::Logical { .. } => panic!("expected Simple"),
454        }
455        match &b.named_params[1] {
456            NamedParam::Simple { name, .. } => assert_eq!(name.to_str().unwrap(), "limit"),
457            NamedParam::Logical { .. } => panic!("expected Simple"),
458        }
459    }
460
461    #[test]
462    fn builder_stores_callbacks() {
463        unsafe extern "C" fn my_bind(_: duckdb_bind_info) {}
464        unsafe extern "C" fn my_init(_: duckdb_init_info) {}
465        unsafe extern "C" fn my_scan(_: duckdb_function_info, _: duckdb_data_chunk) {}
466
467        let b = TableFunctionBuilder::new("f")
468            .bind(my_bind)
469            .init(my_init)
470            .scan(my_scan);
471        assert!(b.bind.is_some());
472        assert!(b.init.is_some());
473        assert!(b.scan.is_some());
474    }
475
476    #[test]
477    fn builder_projection_pushdown() {
478        let b = TableFunctionBuilder::new("f").projection_pushdown(true);
479        assert!(b.projection_pushdown);
480    }
481
482    #[test]
483    fn try_new_valid_name() {
484        assert!(TableFunctionBuilder::try_new("read_csv_ext").is_ok());
485    }
486
487    #[test]
488    fn try_new_invalid_name() {
489        assert!(TableFunctionBuilder::try_new("").is_err());
490        assert!(TableFunctionBuilder::try_new("MyFunc").is_err());
491    }
492
493    #[test]
494    fn try_new_null_byte_rejected() {
495        assert!(TableFunctionBuilder::try_new("func\0name").is_err());
496    }
497
498    #[test]
499    fn param_logical_position_tracking() {
500        // Create a fake LogicalType from a dangling (non-null) pointer.
501        // We leak the builder at the end to prevent Drop from calling
502        // duckdb_destroy_logical_type on the invalid pointer.
503        let fake_lt = unsafe { LogicalType::from_raw(std::ptr::NonNull::dangling().as_ptr()) };
504
505        // Build with one simple param followed by one logical param.
506        let b = TableFunctionBuilder::new("f")
507            .param(TypeId::Integer)
508            .param_logical(fake_lt);
509
510        assert_eq!(b.params.len(), 1);
511        assert_eq!(b.logical_params.len(), 1);
512        assert_eq!(b.logical_params[0].0, 1); // position should be 1, not 0
513
514        // Prevent drop of the LogicalType inside b.logical_params
515        // by leaking the entire builder.
516        std::mem::forget(b);
517    }
518}