quack_rs/table/builder.rs
1// SPDX-License-Identifier: MIT
2// Copyright 2026 Tom F. <https://github.com/tomtom215/>
3// My way of giving something small back to the open source community
4// and encouraging more Rust development!
5
6//! Builder for registering `DuckDB` table functions.
7//!
8//! Table functions are the backbone of "real" `DuckDB` extensions: they are
9//! SELECT-able, support projection pushdown, named parameters, and can
10//! produce arbitrary output schemas determined at query-parse time.
11//!
12//! # Table function lifecycle
13//!
14//! ```text
15//! 1. bind — parse args, declare output columns, optionally set cardinality hint
16//! 2. init — allocate global scan state (shared across threads)
17//! 3. local_init — allocate per-thread scan state (optional)
18//! 4. scan — fill one output chunk; set chunk size to 0 when exhausted
19//! ```
20//!
21//! # Example: A constant table function
22//!
23//! ```rust,no_run
24//! use quack_rs::table::{TableFunctionBuilder, FfiBindData, FfiInitData};
25//! use quack_rs::types::TypeId;
26//! use libduckdb_sys::{
27//! duckdb_bind_info, duckdb_init_info, duckdb_function_info,
28//! duckdb_data_chunk, duckdb_data_chunk_set_size,
29//! };
30//!
31//! struct Config { limit: u64 }
32//! struct State { emitted: u64 }
33//!
34//! unsafe extern "C" fn bind(info: duckdb_bind_info) {
35//! unsafe {
36//! // Declare the output schema.
37//! quack_rs::table::BindInfo::new(info)
38//! .add_result_column("n", TypeId::BigInt);
39//! // Store bind-time configuration.
40//! FfiBindData::<Config>::set(info, Config { limit: 100 });
41//! }
42//! }
43//!
44//! unsafe extern "C" fn init(info: duckdb_init_info) {
45//! unsafe { FfiInitData::<State>::set(info, State { emitted: 0 }); }
46//! }
47//!
48//! unsafe extern "C" fn scan(info: duckdb_function_info, output: duckdb_data_chunk) {
49//! // scan logic
50//! }
51//!
52//! // fn register(con: libduckdb_sys::duckdb_connection) -> Result<(), quack_rs::error::ExtensionError> {
53//! // unsafe {
54//! // TableFunctionBuilder::new("my_table_fn")
55//! // .bind(bind)
56//! // .init(init)
57//! // .scan(scan)
58//! // .register(con)
59//! // }
60//! // }
61//! ```
62
63use std::ffi::CString;
64use std::os::raw::c_void;
65
66use libduckdb_sys::{
67 duckdb_bind_info, duckdb_connection, duckdb_create_table_function, duckdb_data_chunk,
68 duckdb_destroy_table_function, duckdb_function_info, duckdb_init_info,
69 duckdb_register_table_function, duckdb_table_function_add_named_parameter,
70 duckdb_table_function_add_parameter, duckdb_table_function_set_bind,
71 duckdb_table_function_set_extra_info, duckdb_table_function_set_function,
72 duckdb_table_function_set_init, duckdb_table_function_set_local_init,
73 duckdb_table_function_set_name, duckdb_table_function_supports_projection_pushdown,
74 DuckDBSuccess,
75};
76
77use crate::error::ExtensionError;
78use crate::types::{LogicalType, TypeId};
79use crate::validate::validate_function_name;
80
81/// The bind callback: declare output columns, read parameters, store bind data.
82pub type BindFn = unsafe extern "C" fn(info: duckdb_bind_info);
83
84/// The init callback: allocate global scan state.
85pub type InitFn = unsafe extern "C" fn(info: duckdb_init_info);
86
87/// The scan callback: fill one output chunk; set chunk size to 0 when done.
88pub type ScanFn = unsafe extern "C" fn(info: duckdb_function_info, output: duckdb_data_chunk);
89
90/// The extra-info destructor callback: called by `DuckDB` to free function-level extra data.
91pub type ExtraDestroyFn = unsafe extern "C" fn(data: *mut c_void);
92
93/// A named parameter specification: (name, type).
94enum NamedParam {
95 Simple {
96 name: CString,
97 type_id: TypeId,
98 },
99 Logical {
100 name: CString,
101 logical_type: LogicalType,
102 },
103}
104
105/// Builder for registering a `DuckDB` table function.
106///
107/// Table functions are the most powerful extension type — they can return
108/// arbitrary result schemas, support named parameters, projection pushdown,
109/// and parallel execution.
110///
111/// # Required fields
112///
113/// - [`bind`][TableFunctionBuilder::bind]: must be set.
114/// - [`init`][TableFunctionBuilder::init]: must be set.
115/// - [`scan`][TableFunctionBuilder::scan]: must be set.
116///
117/// # Optional features
118///
119/// - [`param`][TableFunctionBuilder::param]: positional parameters.
120/// - [`named_param`][TableFunctionBuilder::named_param]: named parameters (`name := value`).
121/// - [`local_init`][TableFunctionBuilder::local_init]: per-thread init (enables parallel scan).
122/// - [`projection_pushdown`][TableFunctionBuilder::projection_pushdown]: hint projection info to `DuckDB`.
123/// - [`extra_info`][TableFunctionBuilder::extra_info]: function-level data available in all callbacks.
124#[must_use]
125pub struct TableFunctionBuilder {
126 name: CString,
127 params: Vec<TypeId>,
128 logical_params: Vec<(usize, LogicalType)>,
129 named_params: Vec<NamedParam>,
130 bind: Option<BindFn>,
131 init: Option<InitFn>,
132 local_init: Option<InitFn>,
133 scan: Option<ScanFn>,
134 projection_pushdown: bool,
135 extra_info: Option<(*mut c_void, ExtraDestroyFn)>,
136}
137
138impl TableFunctionBuilder {
139 /// Creates a new builder for a table function with the given name.
140 ///
141 /// # Panics
142 ///
143 /// Panics if `name` contains an interior null byte.
144 pub fn new(name: &str) -> Self {
145 Self {
146 name: CString::new(name).expect("function name must not contain null bytes"),
147 params: Vec::new(),
148 logical_params: Vec::new(),
149 named_params: Vec::new(),
150 bind: None,
151 init: None,
152 local_init: None,
153 scan: None,
154 projection_pushdown: false,
155 extra_info: None,
156 }
157 }
158
159 /// Creates a new builder with function name validation.
160 ///
161 /// # Errors
162 ///
163 /// Returns `ExtensionError` if the name is invalid.
164 pub fn try_new(name: &str) -> Result<Self, ExtensionError> {
165 validate_function_name(name)?;
166 let c_name = CString::new(name)
167 .map_err(|_| ExtensionError::new("function name contains interior null byte"))?;
168 Ok(Self {
169 name: c_name,
170 params: Vec::new(),
171 logical_params: Vec::new(),
172 named_params: Vec::new(),
173 bind: None,
174 init: None,
175 local_init: None,
176 scan: None,
177 projection_pushdown: false,
178 extra_info: None,
179 })
180 }
181
182 /// Returns the function name.
183 ///
184 /// Useful for introspection and for [`MockRegistrar`][crate::testing::MockRegistrar].
185 pub fn name(&self) -> &str {
186 self.name.to_str().unwrap_or("")
187 }
188
189 /// Adds a positional parameter with the given type.
190 pub fn param(mut self, type_id: TypeId) -> Self {
191 self.params.push(type_id);
192 self
193 }
194
195 /// Adds a positional parameter with a complex [`LogicalType`].
196 ///
197 /// Use this for parameterized types that [`TypeId`] cannot express, such as
198 /// `LIST(BIGINT)`, `MAP(VARCHAR, INTEGER)`, or `STRUCT(...)`.
199 pub fn param_logical(mut self, logical_type: LogicalType) -> Self {
200 let position = self.params.len() + self.logical_params.len();
201 self.logical_params.push((position, logical_type));
202 self
203 }
204
205 /// Adds a named parameter (e.g., `my_fn(path := 'data.csv')`).
206 ///
207 /// Named parameters are accessed in the bind callback via
208 /// `duckdb_bind_get_named_parameter`.
209 ///
210 /// # Panics
211 ///
212 /// Panics if `name` contains an interior null byte.
213 pub fn named_param(mut self, name: &str, type_id: TypeId) -> Self {
214 self.named_params.push(NamedParam::Simple {
215 name: CString::new(name).expect("parameter name must not contain null bytes"),
216 type_id,
217 });
218 self
219 }
220
221 /// Adds a named parameter with a complex [`LogicalType`].
222 ///
223 /// Use this for parameterized types that [`TypeId`] cannot express.
224 ///
225 /// # Panics
226 ///
227 /// Panics if `name` contains an interior null byte.
228 pub fn named_param_logical(mut self, name: &str, logical_type: LogicalType) -> Self {
229 self.named_params.push(NamedParam::Logical {
230 name: CString::new(name).expect("parameter name must not contain null bytes"),
231 logical_type,
232 });
233 self
234 }
235
236 /// Sets the bind callback.
237 ///
238 /// The bind callback is called once at query-parse time. It must:
239 /// - Declare all output columns via [`crate::table::BindInfo::add_result_column`].
240 /// - Optionally read parameters and store bind data via [`crate::table::FfiBindData::set`].
241 pub fn bind(mut self, f: BindFn) -> Self {
242 self.bind = Some(f);
243 self
244 }
245
246 /// Sets the global init callback.
247 ///
248 /// Called once per query. Use [`crate::table::FfiInitData::set`] to store global scan state.
249 pub fn init(mut self, f: InitFn) -> Self {
250 self.init = Some(f);
251 self
252 }
253
254 /// Sets the per-thread local init callback (optional).
255 ///
256 /// When set, `DuckDB` calls this once per worker thread. Use [`crate::table::FfiLocalInitData::set`]
257 /// to store thread-local scan state. Setting a local init enables parallel scanning.
258 pub fn local_init(mut self, f: InitFn) -> Self {
259 self.local_init = Some(f);
260 self
261 }
262
263 /// Sets the scan callback.
264 ///
265 /// Called repeatedly until all rows are produced. Set the output chunk's size
266 /// to `0` (via `duckdb_data_chunk_set_size(output, 0)`) to signal end of stream.
267 pub fn scan(mut self, f: ScanFn) -> Self {
268 self.scan = Some(f);
269 self
270 }
271
272 /// Enables or disables projection pushdown support (default: disabled).
273 ///
274 /// When enabled, `DuckDB` informs the `init` callback which columns were
275 /// requested. Use `duckdb_init_get_column_count` and `duckdb_init_get_column_index`
276 /// in your init callback to skip producing unrequested columns.
277 pub const fn projection_pushdown(mut self, enable: bool) -> Self {
278 self.projection_pushdown = enable;
279 self
280 }
281
282 /// Sets function-level extra info shared across all callbacks.
283 ///
284 /// This data is available via `duckdb_function_get_extra_info` and
285 /// `duckdb_bind_get_extra_info` in all callbacks. The `destroy` callback
286 /// is called by `DuckDB` when the function is dropped.
287 ///
288 /// # Safety
289 ///
290 /// `data` must remain valid until `DuckDB` calls `destroy`. The typical pattern
291 /// is to box your data: `Box::into_raw(Box::new(my_data)).cast()`.
292 pub unsafe fn extra_info(mut self, data: *mut c_void, destroy: ExtraDestroyFn) -> Self {
293 self.extra_info = Some((data, destroy));
294 self
295 }
296
297 /// Registers the table function on the given connection.
298 ///
299 /// # Errors
300 ///
301 /// Returns `ExtensionError` if:
302 /// - The bind, init, or scan callback was not set.
303 /// - `DuckDB` reports a registration failure.
304 ///
305 /// # Safety
306 ///
307 /// `con` must be a valid, open `duckdb_connection`.
308 pub unsafe fn register(self, con: duckdb_connection) -> Result<(), ExtensionError> {
309 let bind = self
310 .bind
311 .ok_or_else(|| ExtensionError::new("bind callback not set"))?;
312 let init = self
313 .init
314 .ok_or_else(|| ExtensionError::new("init callback not set"))?;
315 let scan = self
316 .scan
317 .ok_or_else(|| ExtensionError::new("scan callback not set"))?;
318
319 // SAFETY: creates a new table function handle.
320 let func = unsafe { duckdb_create_table_function() };
321
322 // SAFETY: func is a valid newly created handle.
323 unsafe {
324 duckdb_table_function_set_name(func, self.name.as_ptr());
325 }
326
327 // Add positional parameters: merge simple TypeId params and complex LogicalType
328 // params in the order they were added (tracked by position).
329 {
330 let mut simple_idx = 0;
331 let mut logical_idx = 0;
332 let total = self.params.len() + self.logical_params.len();
333 for pos in 0..total {
334 if logical_idx < self.logical_params.len()
335 && self.logical_params[logical_idx].0 == pos
336 {
337 unsafe {
338 duckdb_table_function_add_parameter(
339 func,
340 self.logical_params[logical_idx].1.as_raw(),
341 );
342 }
343 logical_idx += 1;
344 } else if simple_idx < self.params.len() {
345 let lt = LogicalType::new(self.params[simple_idx]);
346 unsafe {
347 duckdb_table_function_add_parameter(func, lt.as_raw());
348 }
349 simple_idx += 1;
350 }
351 }
352 }
353
354 // Add named parameters.
355 for np in &self.named_params {
356 match np {
357 NamedParam::Simple { name, type_id } => {
358 let lt = LogicalType::new(*type_id);
359 unsafe {
360 duckdb_table_function_add_named_parameter(func, name.as_ptr(), lt.as_raw());
361 }
362 }
363 NamedParam::Logical { name, logical_type } => unsafe {
364 duckdb_table_function_add_named_parameter(
365 func,
366 name.as_ptr(),
367 logical_type.as_raw(),
368 );
369 },
370 }
371 }
372
373 // Set callbacks.
374 // SAFETY: func is valid; callbacks are valid extern "C" fn pointers.
375 unsafe {
376 duckdb_table_function_set_bind(func, Some(bind));
377 duckdb_table_function_set_init(func, Some(init));
378 duckdb_table_function_set_function(func, Some(scan));
379 }
380
381 // Set optional local init.
382 if let Some(local_init) = self.local_init {
383 // SAFETY: func is valid; local_init is a valid extern "C" fn pointer.
384 unsafe {
385 duckdb_table_function_set_local_init(func, Some(local_init));
386 }
387 }
388
389 // Configure projection pushdown.
390 // SAFETY: func is valid.
391 unsafe {
392 duckdb_table_function_supports_projection_pushdown(func, self.projection_pushdown);
393 }
394
395 // Set extra info if provided.
396 if let Some((data, destroy)) = self.extra_info {
397 // SAFETY: func is valid; data and destroy are provided by caller.
398 unsafe {
399 duckdb_table_function_set_extra_info(func, data, Some(destroy));
400 }
401 }
402
403 // Register.
404 // SAFETY: con and func are valid.
405 let result = unsafe { duckdb_register_table_function(con, func) };
406
407 // Always destroy the function handle; ownership transferred to DuckDB on success.
408 // SAFETY: func was created above.
409 unsafe {
410 duckdb_destroy_table_function(&mut { func });
411 }
412
413 if result == DuckDBSuccess {
414 Ok(())
415 } else {
416 Err(ExtensionError::new(format!(
417 "duckdb_register_table_function failed for '{}'",
418 self.name.to_string_lossy()
419 )))
420 }
421 }
422}
423
424#[cfg(test)]
425mod tests {
426 use super::*;
427
428 #[test]
429 fn builder_stores_name() {
430 let b = TableFunctionBuilder::new("my_table_fn");
431 assert_eq!(b.name.to_str().unwrap(), "my_table_fn");
432 }
433
434 #[test]
435 fn builder_stores_params() {
436 let b = TableFunctionBuilder::new("f")
437 .param(TypeId::Varchar)
438 .param(TypeId::BigInt);
439 assert_eq!(b.params.len(), 2);
440 assert_eq!(b.params[0], TypeId::Varchar);
441 assert_eq!(b.params[1], TypeId::BigInt);
442 }
443
444 #[test]
445 fn builder_stores_named_params() {
446 let b = TableFunctionBuilder::new("f")
447 .named_param("path", TypeId::Varchar)
448 .named_param("limit", TypeId::BigInt);
449 assert_eq!(b.named_params.len(), 2);
450 match &b.named_params[0] {
451 NamedParam::Simple { name, .. } => assert_eq!(name.to_str().unwrap(), "path"),
452 NamedParam::Logical { .. } => panic!("expected Simple"),
453 }
454 match &b.named_params[1] {
455 NamedParam::Simple { name, .. } => assert_eq!(name.to_str().unwrap(), "limit"),
456 NamedParam::Logical { .. } => panic!("expected Simple"),
457 }
458 }
459
460 #[test]
461 fn builder_stores_callbacks() {
462 unsafe extern "C" fn my_bind(_: duckdb_bind_info) {}
463 unsafe extern "C" fn my_init(_: duckdb_init_info) {}
464 unsafe extern "C" fn my_scan(_: duckdb_function_info, _: duckdb_data_chunk) {}
465
466 let b = TableFunctionBuilder::new("f")
467 .bind(my_bind)
468 .init(my_init)
469 .scan(my_scan);
470 assert!(b.bind.is_some());
471 assert!(b.init.is_some());
472 assert!(b.scan.is_some());
473 }
474
475 #[test]
476 fn builder_projection_pushdown() {
477 let b = TableFunctionBuilder::new("f").projection_pushdown(true);
478 assert!(b.projection_pushdown);
479 }
480
481 #[test]
482 fn try_new_valid_name() {
483 assert!(TableFunctionBuilder::try_new("read_csv_ext").is_ok());
484 }
485
486 #[test]
487 fn try_new_invalid_name() {
488 assert!(TableFunctionBuilder::try_new("").is_err());
489 assert!(TableFunctionBuilder::try_new("MyFunc").is_err());
490 }
491
492 #[test]
493 fn try_new_null_byte_rejected() {
494 assert!(TableFunctionBuilder::try_new("func\0name").is_err());
495 }
496
497 #[test]
498 fn param_logical_position_tracking() {
499 // Create a fake LogicalType from a dangling (non-null) pointer.
500 // We leak the builder at the end to prevent Drop from calling
501 // duckdb_destroy_logical_type on the invalid pointer.
502 let fake_lt = unsafe { LogicalType::from_raw(std::ptr::NonNull::dangling().as_ptr()) };
503
504 // Build with one simple param followed by one logical param.
505 let b = TableFunctionBuilder::new("f")
506 .param(TypeId::Integer)
507 .param_logical(fake_lt);
508
509 assert_eq!(b.params.len(), 1);
510 assert_eq!(b.logical_params.len(), 1);
511 assert_eq!(b.logical_params[0].0, 1); // position should be 1, not 0
512
513 // Prevent drop of the LogicalType inside b.logical_params
514 // by leaking the entire builder.
515 std::mem::forget(b);
516 }
517}