quack_rs/table/bind_data.rs
1// SPDX-License-Identifier: MIT
2// Copyright 2026 Tom F. <https://github.com/tomtom215/>
3// My way of giving something small back to the open source community
4// and encouraging more Rust development!
5
6//! Type-safe bind data management for table functions.
7//!
8//! [`FfiBindData<T>`] stores user-defined data during the `bind` phase of a table
9//! function and provides safe retrieval in the `init` and `scan` phases.
10//!
11//! # `DuckDB` table function lifecycle
12//!
13//! ```text
14//! bind → stores bind_data (T)
15//! init → reads bind_data to set up global state
16//! local_init → reads bind_data to set up per-thread state (optional)
17//! scan → reads bind_data + init_data + local_init_data, fills output chunk
18//! ```
19//!
20//! # Example
21//!
22//! ```rust,no_run
23//! use quack_rs::table::FfiBindData;
24//! use libduckdb_sys::{duckdb_bind_info, duckdb_function_info};
25//!
26//! struct MyConfig { path: String }
27//!
28//! unsafe extern "C" fn my_bind(info: duckdb_bind_info) {
29//! // store bind data
30//! unsafe { FfiBindData::<MyConfig>::set(info, MyConfig { path: "data.csv".into() }); }
31//! }
32//!
33//! unsafe extern "C" fn my_scan(info: duckdb_function_info, _output: libduckdb_sys::duckdb_data_chunk) {
34//! // retrieve bind data in scan
35//! if let Some(cfg) = unsafe { FfiBindData::<MyConfig>::get_from_function(info) } {
36//! let _ = &cfg.path;
37//! }
38//! }
39//! ```
40
41use std::os::raw::c_void;
42
43use libduckdb_sys::{
44 duckdb_bind_info, duckdb_bind_set_bind_data, duckdb_function_get_bind_data,
45 duckdb_function_info, duckdb_init_get_bind_data, duckdb_init_info,
46};
47
48/// Type-safe bind data wrapper for `DuckDB` table functions.
49///
50/// `FfiBindData<T>` boxes a `T` on the heap during bind and provides
51/// safe access in subsequent phases. `DuckDB` owns the allocation lifetime
52/// and calls the provided `destroy` callback when the query is done.
53///
54/// # Memory model
55///
56/// - [`set`][FfiBindData::set] — boxes `T` via `Box::into_raw`, registers
57/// the pointer and the [`destroy`][FfiBindData::destroy] destructor with `DuckDB`.
58/// - `DuckDB` calls `destroy` when the query completes, which drops the `Box<T>`.
59/// - Retrieval methods borrow the `T` for the duration of the callback.
60pub struct FfiBindData<T: 'static> {
61 _marker: std::marker::PhantomData<T>,
62}
63
64impl<T: 'static> FfiBindData<T> {
65 /// Stores `data` as the bind data for this table function invocation.
66 ///
67 /// Call this inside your `bind` callback to save configuration that will
68 /// be accessed in `init` and `scan` callbacks.
69 ///
70 /// # Safety
71 ///
72 /// - `info` must be a valid `duckdb_bind_info` provided by `DuckDB` in a bind callback.
73 /// - Must be called at most once per bind invocation; calling twice leaks the first allocation.
74 pub unsafe fn set(info: duckdb_bind_info, data: T) {
75 let raw = Box::into_raw(Box::new(data)).cast::<c_void>();
76 // SAFETY: info is valid; raw is a non-null heap allocation owned by DuckDB after this call.
77 unsafe {
78 duckdb_bind_set_bind_data(info, raw, Some(Self::destroy));
79 }
80 }
81
82 /// Retrieves a shared reference to the bind data from a bind callback.
83 ///
84 /// Returns `None` if no bind data was set or the pointer is null.
85 ///
86 /// # Safety
87 ///
88 /// - `info` must be a valid `duckdb_bind_info`.
89 /// - No mutable reference to the same data must exist.
90 /// - The returned reference is valid for the duration of the bind callback.
91 pub const fn get_from_bind<'a>(info: duckdb_bind_info) -> Option<&'a T> {
92 // Note: duckdb_bind_get_extra_info retrieves the extra_info set on the *function*,
93 // not the bind_data. There is no "get bind data from bind info" in the C API —
94 // that is intentional: bind data is write-only during bind and read-only afterward.
95 // If you need to read data you set, store it in the closure or a pre-existing struct.
96 //
97 // This method is provided for completeness via duckdb_bind_get_extra_info
98 // which retrieves function-level extra_info, not bind_data. Users who need
99 // to read data inside their own bind callback should pass it differently.
100 //
101 // For bind_data retrieval in *init* and *scan*, use get_from_init / get_from_function.
102 let _ = info; // Suppress unused variable warning; this design choice is intentional.
103 None
104 }
105
106 /// Retrieves a shared reference to the bind data from a global init callback.
107 ///
108 /// Returns `None` if no bind data was set or the pointer is null.
109 ///
110 /// # Safety
111 ///
112 /// - `info` must be a valid `duckdb_init_info`.
113 /// - No mutable reference to the same data must exist simultaneously.
114 /// - The returned reference is valid for the duration of the init callback.
115 pub unsafe fn get_from_init<'a>(info: duckdb_init_info) -> Option<&'a T> {
116 // SAFETY: info is valid per caller's contract.
117 let raw = unsafe { duckdb_init_get_bind_data(info) };
118 if raw.is_null() {
119 return None;
120 }
121 // SAFETY: raw was set by set() via Box::into_raw. It is non-null and valid.
122 // No mutable reference exists per caller's contract.
123 Some(unsafe { &*raw.cast::<T>() })
124 }
125
126 /// Retrieves a shared reference to the bind data from a scan callback.
127 ///
128 /// Returns `None` if no bind data was set or the pointer is null.
129 ///
130 /// # Safety
131 ///
132 /// - `info` must be a valid `duckdb_function_info` from a scan callback.
133 /// - No mutable reference to the same data must exist simultaneously.
134 /// - The returned reference is valid for the duration of the scan callback.
135 pub unsafe fn get_from_function<'a>(info: duckdb_function_info) -> Option<&'a T> {
136 // SAFETY: info is valid per caller's contract.
137 let raw = unsafe { duckdb_function_get_bind_data(info) };
138 if raw.is_null() {
139 return None;
140 }
141 // SAFETY: raw was set by set() via Box::into_raw. It is non-null and valid.
142 Some(unsafe { &*raw.cast::<T>() })
143 }
144
145 /// The destroy callback passed to `duckdb_bind_set_bind_data`.
146 ///
147 /// `DuckDB` calls this when the query is complete. It drops the `Box<T>`.
148 ///
149 /// # Safety
150 ///
151 /// - `ptr` must have been allocated by [`set`][FfiBindData::set] via `Box::into_raw`.
152 /// - Must be called exactly once (`DuckDB` guarantees this for bind data destroyers).
153 pub unsafe extern "C" fn destroy(ptr: *mut c_void) {
154 if !ptr.is_null() {
155 // SAFETY: ptr was created by Box::into_raw(Box::<T>::new(...)) in set().
156 // DuckDB calls this exactly once.
157 unsafe { drop(Box::from_raw(ptr.cast::<T>())) };
158 }
159 }
160}
161
162#[cfg(test)]
163mod tests {
164 use super::*;
165
166 #[allow(dead_code)]
167 struct Config {
168 value: i32,
169 }
170
171 #[test]
172 fn destroy_null_is_noop() {
173 // Must not panic or crash
174 unsafe { FfiBindData::<Config>::destroy(std::ptr::null_mut()) };
175 }
176
177 #[test]
178 fn destroy_allocated_box() {
179 let boxed = Box::new(Config { value: 42 });
180 let raw = Box::into_raw(boxed).cast::<c_void>();
181 // SAFETY: raw is a valid Box-allocated pointer.
182 unsafe { FfiBindData::<Config>::destroy(raw) };
183 // If we reach here without panic/UB, the test passes.
184 }
185
186 #[test]
187 fn get_from_bind_returns_none() {
188 // get_from_bind is intentionally unimplemented (returns None by design)
189 // Test that calling it with null doesn't panic.
190 let result = FfiBindData::<Config>::get_from_bind(std::ptr::null_mut());
191 assert!(result.is_none());
192 }
193}