quack_rs/table/builder.rs
1// SPDX-License-Identifier: MIT
2// Copyright 2026 Tom F. <https://github.com/tomtom215/>
3// My way of giving something small back to the open source community
4// and encouraging more Rust development!
5
6//! Builder for registering `DuckDB` table functions.
7//!
8//! Table functions are the backbone of "real" `DuckDB` extensions: they are
9//! SELECT-able, support projection pushdown, named parameters, and can
10//! produce arbitrary output schemas determined at query-parse time.
11//!
12//! # Table function lifecycle
13//!
14//! ```text
15//! 1. bind — parse args, declare output columns, optionally set cardinality hint
16//! 2. init — allocate global scan state (shared across threads)
17//! 3. local_init — allocate per-thread scan state (optional)
18//! 4. scan — fill one output chunk; set chunk size to 0 when exhausted
19//! ```
20//!
21//! # Example: A constant table function
22//!
23//! ```rust,no_run
24//! use quack_rs::table::{TableFunctionBuilder, FfiBindData, FfiInitData};
25//! use quack_rs::types::TypeId;
26//! use libduckdb_sys::{
27//! duckdb_bind_info, duckdb_init_info, duckdb_function_info,
28//! duckdb_data_chunk, duckdb_data_chunk_set_size,
29//! };
30//!
31//! struct Config { limit: u64 }
32//! struct State { emitted: u64 }
33//!
34//! unsafe extern "C" fn bind(info: duckdb_bind_info) {
35//! unsafe {
36//! // Declare the output schema.
37//! quack_rs::table::BindInfo::new(info)
38//! .add_result_column("n", TypeId::BigInt);
39//! // Store bind-time configuration.
40//! FfiBindData::<Config>::set(info, Config { limit: 100 });
41//! }
42//! }
43//!
44//! unsafe extern "C" fn init(info: duckdb_init_info) {
45//! unsafe { FfiInitData::<State>::set(info, State { emitted: 0 }); }
46//! }
47//!
48//! unsafe extern "C" fn scan(info: duckdb_function_info, output: duckdb_data_chunk) {
49//! // scan logic
50//! }
51//!
52//! // fn register(con: libduckdb_sys::duckdb_connection) -> Result<(), quack_rs::error::ExtensionError> {
53//! // unsafe {
54//! // TableFunctionBuilder::new("my_table_fn")
55//! // .bind(bind)
56//! // .init(init)
57//! // .scan(scan)
58//! // .register(con)
59//! // }
60//! // }
61//! ```
62
63use std::ffi::CString;
64use std::os::raw::c_void;
65
66use libduckdb_sys::{
67 duckdb_bind_info, duckdb_connection, duckdb_create_table_function, duckdb_data_chunk,
68 duckdb_destroy_table_function, duckdb_function_info, duckdb_init_info,
69 duckdb_register_table_function, duckdb_table_function_add_named_parameter,
70 duckdb_table_function_add_parameter, duckdb_table_function_set_bind,
71 duckdb_table_function_set_extra_info, duckdb_table_function_set_function,
72 duckdb_table_function_set_init, duckdb_table_function_set_local_init,
73 duckdb_table_function_set_name, duckdb_table_function_supports_projection_pushdown,
74 DuckDBSuccess,
75};
76
77use crate::error::ExtensionError;
78use crate::types::{LogicalType, TypeId};
79use crate::validate::validate_function_name;
80
81/// The bind callback: declare output columns, read parameters, store bind data.
82pub type BindFn = unsafe extern "C" fn(info: duckdb_bind_info);
83
84/// The init callback: allocate global scan state.
85pub type InitFn = unsafe extern "C" fn(info: duckdb_init_info);
86
87/// The scan callback: fill one output chunk; set chunk size to 0 when done.
88pub type ScanFn = unsafe extern "C" fn(info: duckdb_function_info, output: duckdb_data_chunk);
89
90/// The extra-info destructor callback: called by `DuckDB` to free function-level extra data.
91pub type ExtraDestroyFn = unsafe extern "C" fn(data: *mut c_void);
92
93/// A named parameter specification: (name, type).
94enum NamedParam {
95 Simple {
96 name: CString,
97 type_id: TypeId,
98 },
99 Logical {
100 name: CString,
101 logical_type: LogicalType,
102 },
103}
104
105/// Builder for registering a `DuckDB` table function.
106///
107/// Table functions are the most powerful extension type — they can return
108/// arbitrary result schemas, support named parameters, projection pushdown,
109/// and parallel execution.
110///
111/// # Required fields
112///
113/// - [`bind`][TableFunctionBuilder::bind]: must be set.
114/// - [`init`][TableFunctionBuilder::init]: must be set.
115/// - [`scan`][TableFunctionBuilder::scan]: must be set.
116///
117/// # Optional features
118///
119/// - [`param`][TableFunctionBuilder::param]: positional parameters.
120/// - [`named_param`][TableFunctionBuilder::named_param]: named parameters (`name := value`).
121/// - [`local_init`][TableFunctionBuilder::local_init]: per-thread init (enables parallel scan).
122/// - [`projection_pushdown`][TableFunctionBuilder::projection_pushdown]: hint projection info to `DuckDB`.
123/// - [`extra_info`][TableFunctionBuilder::extra_info]: function-level data available in all callbacks.
124#[must_use]
125pub struct TableFunctionBuilder {
126 name: CString,
127 params: Vec<TypeId>,
128 logical_params: Vec<(usize, LogicalType)>,
129 named_params: Vec<NamedParam>,
130 bind: Option<BindFn>,
131 init: Option<InitFn>,
132 local_init: Option<InitFn>,
133 scan: Option<ScanFn>,
134 projection_pushdown: bool,
135 extra_info: Option<(*mut c_void, ExtraDestroyFn)>,
136}
137
138impl TableFunctionBuilder {
139 /// Creates a new builder for a table function with the given name.
140 ///
141 /// # Panics
142 ///
143 /// Panics if `name` contains an interior null byte.
144 pub fn new(name: &str) -> Self {
145 Self {
146 name: CString::new(name).expect("function name must not contain null bytes"),
147 params: Vec::new(),
148 logical_params: Vec::new(),
149 named_params: Vec::new(),
150 bind: None,
151 init: None,
152 local_init: None,
153 scan: None,
154 projection_pushdown: false,
155 extra_info: None,
156 }
157 }
158
159 /// Creates a new builder with function name validation.
160 ///
161 /// # Errors
162 ///
163 /// Returns `ExtensionError` if the name is invalid.
164 pub fn try_new(name: &str) -> Result<Self, ExtensionError> {
165 validate_function_name(name)?;
166 let c_name = CString::new(name)
167 .map_err(|_| ExtensionError::new("function name contains interior null byte"))?;
168 Ok(Self {
169 name: c_name,
170 params: Vec::new(),
171 logical_params: Vec::new(),
172 named_params: Vec::new(),
173 bind: None,
174 init: None,
175 local_init: None,
176 scan: None,
177 projection_pushdown: false,
178 extra_info: None,
179 })
180 }
181
182 /// Returns the function name.
183 ///
184 /// Useful for introspection and for [`MockRegistrar`][crate::testing::MockRegistrar].
185 pub fn name(&self) -> &str {
186 self.name.to_str().unwrap_or("")
187 }
188
189 /// Adds a positional parameter with the given type.
190 pub fn param(mut self, type_id: TypeId) -> Self {
191 self.params.push(type_id);
192 self
193 }
194
195 /// Adds a positional parameter with a complex [`LogicalType`].
196 ///
197 /// Use this for parameterized types that [`TypeId`] cannot express, such as
198 /// `LIST(BIGINT)`, `MAP(VARCHAR, INTEGER)`, or `STRUCT(...)`.
199 #[mutants::skip] // position arithmetic tested via E2E (requires DuckDB runtime)
200 pub fn param_logical(mut self, logical_type: LogicalType) -> Self {
201 let position = self.params.len() + self.logical_params.len();
202 self.logical_params.push((position, logical_type));
203 self
204 }
205
206 /// Adds a named parameter (e.g., `my_fn(path := 'data.csv')`).
207 ///
208 /// Named parameters are accessed in the bind callback via
209 /// `duckdb_bind_get_named_parameter`.
210 ///
211 /// # Panics
212 ///
213 /// Panics if `name` contains an interior null byte.
214 pub fn named_param(mut self, name: &str, type_id: TypeId) -> Self {
215 self.named_params.push(NamedParam::Simple {
216 name: CString::new(name).expect("parameter name must not contain null bytes"),
217 type_id,
218 });
219 self
220 }
221
222 /// Adds a named parameter with a complex [`LogicalType`].
223 ///
224 /// Use this for parameterized types that [`TypeId`] cannot express.
225 ///
226 /// # Panics
227 ///
228 /// Panics if `name` contains an interior null byte.
229 pub fn named_param_logical(mut self, name: &str, logical_type: LogicalType) -> Self {
230 self.named_params.push(NamedParam::Logical {
231 name: CString::new(name).expect("parameter name must not contain null bytes"),
232 logical_type,
233 });
234 self
235 }
236
237 /// Sets the bind callback.
238 ///
239 /// The bind callback is called once at query-parse time. It must:
240 /// - Declare all output columns via [`crate::table::BindInfo::add_result_column`].
241 /// - Optionally read parameters and store bind data via [`crate::table::FfiBindData::set`].
242 pub fn bind(mut self, f: BindFn) -> Self {
243 self.bind = Some(f);
244 self
245 }
246
247 /// Sets the global init callback.
248 ///
249 /// Called once per query. Use [`crate::table::FfiInitData::set`] to store global scan state.
250 pub fn init(mut self, f: InitFn) -> Self {
251 self.init = Some(f);
252 self
253 }
254
255 /// Sets the per-thread local init callback (optional).
256 ///
257 /// When set, `DuckDB` calls this once per worker thread. Use [`crate::table::FfiLocalInitData::set`]
258 /// to store thread-local scan state. Setting a local init enables parallel scanning.
259 pub fn local_init(mut self, f: InitFn) -> Self {
260 self.local_init = Some(f);
261 self
262 }
263
264 /// Sets the scan callback.
265 ///
266 /// Called repeatedly until all rows are produced. Set the output chunk's size
267 /// to `0` (via `duckdb_data_chunk_set_size(output, 0)`) to signal end of stream.
268 pub fn scan(mut self, f: ScanFn) -> Self {
269 self.scan = Some(f);
270 self
271 }
272
273 /// Enables or disables projection pushdown support (default: disabled).
274 ///
275 /// When enabled, `DuckDB` informs the `init` callback which columns were
276 /// requested. Use `duckdb_init_get_column_count` and `duckdb_init_get_column_index`
277 /// in your init callback to skip producing unrequested columns.
278 pub const fn projection_pushdown(mut self, enable: bool) -> Self {
279 self.projection_pushdown = enable;
280 self
281 }
282
283 /// Sets function-level extra info shared across all callbacks.
284 ///
285 /// This data is available via `duckdb_function_get_extra_info` and
286 /// `duckdb_bind_get_extra_info` in all callbacks. The `destroy` callback
287 /// is called by `DuckDB` when the function is dropped.
288 ///
289 /// # Safety
290 ///
291 /// `data` must remain valid until `DuckDB` calls `destroy`. The typical pattern
292 /// is to box your data: `Box::into_raw(Box::new(my_data)).cast()`.
293 pub unsafe fn extra_info(mut self, data: *mut c_void, destroy: ExtraDestroyFn) -> Self {
294 self.extra_info = Some((data, destroy));
295 self
296 }
297
298 /// Registers the table function on the given connection.
299 ///
300 /// # Errors
301 ///
302 /// Returns `ExtensionError` if:
303 /// - The bind, init, or scan callback was not set.
304 /// - `DuckDB` reports a registration failure.
305 ///
306 /// # Safety
307 ///
308 /// `con` must be a valid, open `duckdb_connection`.
309 pub unsafe fn register(self, con: duckdb_connection) -> Result<(), ExtensionError> {
310 let bind = self
311 .bind
312 .ok_or_else(|| ExtensionError::new("bind callback not set"))?;
313 let init = self
314 .init
315 .ok_or_else(|| ExtensionError::new("init callback not set"))?;
316 let scan = self
317 .scan
318 .ok_or_else(|| ExtensionError::new("scan callback not set"))?;
319
320 // SAFETY: creates a new table function handle.
321 let mut func = unsafe { duckdb_create_table_function() };
322
323 // SAFETY: func is a valid newly created handle.
324 unsafe {
325 duckdb_table_function_set_name(func, self.name.as_ptr());
326 }
327
328 // Add positional parameters: merge simple TypeId params and complex LogicalType
329 // params in the order they were added (tracked by position).
330 {
331 let mut simple_idx = 0;
332 let mut logical_idx = 0;
333 let total = self.params.len() + self.logical_params.len();
334 for pos in 0..total {
335 if logical_idx < self.logical_params.len()
336 && self.logical_params[logical_idx].0 == pos
337 {
338 unsafe {
339 duckdb_table_function_add_parameter(
340 func,
341 self.logical_params[logical_idx].1.as_raw(),
342 );
343 }
344 logical_idx += 1;
345 } else if simple_idx < self.params.len() {
346 let lt = LogicalType::new(self.params[simple_idx]);
347 unsafe {
348 duckdb_table_function_add_parameter(func, lt.as_raw());
349 }
350 simple_idx += 1;
351 }
352 }
353 }
354
355 // Add named parameters.
356 for np in &self.named_params {
357 match np {
358 NamedParam::Simple { name, type_id } => {
359 let lt = LogicalType::new(*type_id);
360 unsafe {
361 duckdb_table_function_add_named_parameter(func, name.as_ptr(), lt.as_raw());
362 }
363 }
364 NamedParam::Logical { name, logical_type } => unsafe {
365 duckdb_table_function_add_named_parameter(
366 func,
367 name.as_ptr(),
368 logical_type.as_raw(),
369 );
370 },
371 }
372 }
373
374 // Set callbacks.
375 // SAFETY: func is valid; callbacks are valid extern "C" fn pointers.
376 unsafe {
377 duckdb_table_function_set_bind(func, Some(bind));
378 duckdb_table_function_set_init(func, Some(init));
379 duckdb_table_function_set_function(func, Some(scan));
380 }
381
382 // Set optional local init.
383 if let Some(local_init) = self.local_init {
384 // SAFETY: func is valid; local_init is a valid extern "C" fn pointer.
385 unsafe {
386 duckdb_table_function_set_local_init(func, Some(local_init));
387 }
388 }
389
390 // Configure projection pushdown.
391 // SAFETY: func is valid.
392 unsafe {
393 duckdb_table_function_supports_projection_pushdown(func, self.projection_pushdown);
394 }
395
396 // Set extra info if provided.
397 if let Some((data, destroy)) = self.extra_info {
398 // SAFETY: func is valid; data and destroy are provided by caller.
399 unsafe {
400 duckdb_table_function_set_extra_info(func, data, Some(destroy));
401 }
402 }
403
404 // Register.
405 // SAFETY: con and func are valid.
406 let result = unsafe { duckdb_register_table_function(con, func) };
407
408 // Always destroy the function handle; ownership transferred to DuckDB on success.
409 // SAFETY: func was created above.
410 unsafe {
411 duckdb_destroy_table_function(&raw mut func);
412 }
413
414 if result == DuckDBSuccess {
415 Ok(())
416 } else {
417 Err(ExtensionError::new(format!(
418 "duckdb_register_table_function failed for '{}'",
419 self.name.to_string_lossy()
420 )))
421 }
422 }
423}
424
425#[cfg(test)]
426mod tests {
427 use super::*;
428
429 #[test]
430 fn builder_stores_name() {
431 let b = TableFunctionBuilder::new("my_table_fn");
432 assert_eq!(b.name.to_str().unwrap(), "my_table_fn");
433 }
434
435 #[test]
436 fn builder_stores_params() {
437 let b = TableFunctionBuilder::new("f")
438 .param(TypeId::Varchar)
439 .param(TypeId::BigInt);
440 assert_eq!(b.params.len(), 2);
441 assert_eq!(b.params[0], TypeId::Varchar);
442 assert_eq!(b.params[1], TypeId::BigInt);
443 }
444
445 #[test]
446 fn builder_stores_named_params() {
447 let b = TableFunctionBuilder::new("f")
448 .named_param("path", TypeId::Varchar)
449 .named_param("limit", TypeId::BigInt);
450 assert_eq!(b.named_params.len(), 2);
451 match &b.named_params[0] {
452 NamedParam::Simple { name, .. } => assert_eq!(name.to_str().unwrap(), "path"),
453 NamedParam::Logical { .. } => panic!("expected Simple"),
454 }
455 match &b.named_params[1] {
456 NamedParam::Simple { name, .. } => assert_eq!(name.to_str().unwrap(), "limit"),
457 NamedParam::Logical { .. } => panic!("expected Simple"),
458 }
459 }
460
461 #[test]
462 fn builder_stores_callbacks() {
463 unsafe extern "C" fn my_bind(_: duckdb_bind_info) {}
464 unsafe extern "C" fn my_init(_: duckdb_init_info) {}
465 unsafe extern "C" fn my_scan(_: duckdb_function_info, _: duckdb_data_chunk) {}
466
467 let b = TableFunctionBuilder::new("f")
468 .bind(my_bind)
469 .init(my_init)
470 .scan(my_scan);
471 assert!(b.bind.is_some());
472 assert!(b.init.is_some());
473 assert!(b.scan.is_some());
474 }
475
476 #[test]
477 fn builder_projection_pushdown() {
478 let b = TableFunctionBuilder::new("f").projection_pushdown(true);
479 assert!(b.projection_pushdown);
480 }
481
482 #[test]
483 fn try_new_valid_name() {
484 assert!(TableFunctionBuilder::try_new("read_csv_ext").is_ok());
485 }
486
487 #[test]
488 fn try_new_invalid_name() {
489 assert!(TableFunctionBuilder::try_new("").is_err());
490 assert!(TableFunctionBuilder::try_new("MyFunc").is_err());
491 }
492
493 #[test]
494 fn try_new_null_byte_rejected() {
495 assert!(TableFunctionBuilder::try_new("func\0name").is_err());
496 }
497
498 #[test]
499 fn param_logical_position_tracking() {
500 // Create a fake LogicalType from a dangling (non-null) pointer.
501 // We leak the builder at the end to prevent Drop from calling
502 // duckdb_destroy_logical_type on the invalid pointer.
503 let fake_lt = unsafe { LogicalType::from_raw(std::ptr::NonNull::dangling().as_ptr()) };
504
505 // Build with one simple param followed by one logical param.
506 let b = TableFunctionBuilder::new("f")
507 .param(TypeId::Integer)
508 .param_logical(fake_lt);
509
510 assert_eq!(b.params.len(), 1);
511 assert_eq!(b.logical_params.len(), 1);
512 assert_eq!(b.logical_params[0].0, 1); // position should be 1, not 0
513
514 // Prevent drop of the LogicalType inside b.logical_params
515 // by leaking the entire builder.
516 std::mem::forget(b);
517 }
518}