Skip to main content

quack_rs/vector/
writer.rs

1// SPDX-License-Identifier: MIT
2// Copyright 2026 Tom F. <https://github.com/tomtom215/>
3// My way of giving something small back to the open source community
4// and encouraging more Rust development!
5
6//! Safe typed writing to `DuckDB` result vectors.
7//!
8//! [`VectorWriter`] provides safe methods for writing typed values and NULL
9//! flags to a `DuckDB` output vector from within a `finalize` callback.
10//!
11//! # Pitfall L4: `ensure_validity_writable`
12//!
13//! When writing NULL values, you must call `duckdb_vector_ensure_validity_writable`
14//! before `duckdb_vector_get_validity`. If you skip this call, `get_validity`
15//! returns an uninitialized pointer that will cause a segfault or silent corruption.
16//!
17//! [`VectorWriter::set_null`] calls `ensure_validity_writable` automatically.
18
19use libduckdb_sys::{
20    duckdb_validity_set_row_invalid, duckdb_validity_set_row_valid, duckdb_vector,
21    duckdb_vector_assign_string_element_len, duckdb_vector_ensure_validity_writable,
22    duckdb_vector_get_data, duckdb_vector_get_validity, idx_t,
23};
24
25/// A typed writer for a `DuckDB` output vector in a `finalize` callback.
26///
27/// # Example
28///
29/// ```rust,no_run
30/// use quack_rs::vector::VectorWriter;
31/// use libduckdb_sys::duckdb_vector;
32///
33/// // Inside finalize:
34/// // let mut writer = unsafe { VectorWriter::new(result_vector) };
35/// // for row in 0..count {
36/// //     if let Some(val) = compute_result(row) {
37/// //         unsafe { writer.write_i64(row, val) };
38/// //     } else {
39/// //         unsafe { writer.set_null(row) };
40/// //     }
41/// // }
42/// ```
43pub struct VectorWriter {
44    vector: duckdb_vector,
45    data: *mut u8,
46}
47
48impl VectorWriter {
49    /// Creates a new `VectorWriter` for the given result vector.
50    ///
51    /// # Safety
52    ///
53    /// `vector` must be a valid `DuckDB` output vector obtained in a `finalize`
54    /// callback. The vector must not be destroyed while this writer is live.
55    pub unsafe fn new(vector: duckdb_vector) -> Self {
56        // SAFETY: Caller guarantees vector is valid.
57        let data = unsafe { duckdb_vector_get_data(vector) }.cast::<u8>();
58        Self { vector, data }
59    }
60
61    /// Creates a `VectorWriter` directly from a raw `duckdb_vector` handle.
62    ///
63    /// Use this when you need to write into a child vector (e.g., a STRUCT field
64    /// or LIST element vector) obtained from
65    /// [`StructVector::get_child`][crate::vector::complex::StructVector::get_child] or
66    /// [`ListVector::get_child`][crate::vector::complex::ListVector::get_child].
67    ///
68    /// # Safety
69    ///
70    /// `vector` must be a valid, writable `duckdb_vector`. The vector must not be
71    /// destroyed while this writer is live.
72    pub unsafe fn from_vector(vector: duckdb_vector) -> Self {
73        // SAFETY: caller guarantees vector is valid.
74        let data = unsafe { duckdb_vector_get_data(vector) }.cast::<u8>();
75        Self { vector, data }
76    }
77
78    /// Writes an `i8` (TINYINT) value at row `idx`.
79    ///
80    /// # Safety
81    ///
82    /// - `idx` must be within the vector's capacity.
83    /// - The vector must have `TINYINT` type.
84    #[inline]
85    pub const unsafe fn write_i8(&mut self, idx: usize, value: i8) {
86        // SAFETY: data points to a valid writable TINYINT array. idx is in bounds.
87        unsafe { core::ptr::write_unaligned(self.data.add(idx).cast::<i8>(), value) };
88    }
89
90    /// Writes an `i16` (SMALLINT) value at row `idx`.
91    ///
92    /// # Safety
93    ///
94    /// See [`write_i8`][Self::write_i8].
95    #[inline]
96    pub const unsafe fn write_i16(&mut self, idx: usize, value: i16) {
97        // SAFETY: 2-byte aligned write to valid SMALLINT vector.
98        unsafe { core::ptr::write_unaligned(self.data.add(idx * 2).cast::<i16>(), value) };
99    }
100
101    /// Writes an `i32` (INTEGER) value at row `idx`.
102    ///
103    /// # Safety
104    ///
105    /// See [`write_i8`][Self::write_i8].
106    #[inline]
107    pub const unsafe fn write_i32(&mut self, idx: usize, value: i32) {
108        // SAFETY: 4-byte aligned write to valid INTEGER vector.
109        unsafe { core::ptr::write_unaligned(self.data.add(idx * 4).cast::<i32>(), value) };
110    }
111
112    /// Writes an `i64` (BIGINT / TIMESTAMP) value at row `idx`.
113    ///
114    /// # Safety
115    ///
116    /// See [`write_i8`][Self::write_i8].
117    #[inline]
118    pub const unsafe fn write_i64(&mut self, idx: usize, value: i64) {
119        // SAFETY: 8-byte aligned write to valid BIGINT vector.
120        unsafe { core::ptr::write_unaligned(self.data.add(idx * 8).cast::<i64>(), value) };
121    }
122
123    /// Writes a `u8` (UTINYINT) value at row `idx`.
124    ///
125    /// # Safety
126    ///
127    /// See [`write_i8`][Self::write_i8].
128    #[inline]
129    pub const unsafe fn write_u8(&mut self, idx: usize, value: u8) {
130        // SAFETY: 1-byte write to valid UTINYINT vector.
131        unsafe { *self.data.add(idx) = value };
132    }
133
134    /// Writes a `u32` (UINTEGER) value at row `idx`.
135    ///
136    /// # Safety
137    ///
138    /// See [`write_i8`][Self::write_i8].
139    #[inline]
140    pub const unsafe fn write_u32(&mut self, idx: usize, value: u32) {
141        // SAFETY: 4-byte aligned write to valid UINTEGER vector.
142        unsafe { core::ptr::write_unaligned(self.data.add(idx * 4).cast::<u32>(), value) };
143    }
144
145    /// Writes a `u64` (UBIGINT) value at row `idx`.
146    ///
147    /// # Safety
148    ///
149    /// See [`write_i8`][Self::write_i8].
150    #[inline]
151    pub const unsafe fn write_u64(&mut self, idx: usize, value: u64) {
152        // SAFETY: 8-byte aligned write to valid UBIGINT vector.
153        unsafe { core::ptr::write_unaligned(self.data.add(idx * 8).cast::<u64>(), value) };
154    }
155
156    /// Writes an `f32` (FLOAT) value at row `idx`.
157    ///
158    /// # Safety
159    ///
160    /// See [`write_i8`][Self::write_i8].
161    #[inline]
162    pub const unsafe fn write_f32(&mut self, idx: usize, value: f32) {
163        // SAFETY: 4-byte aligned write to valid FLOAT vector.
164        unsafe { core::ptr::write_unaligned(self.data.add(idx * 4).cast::<f32>(), value) };
165    }
166
167    /// Writes an `f64` (DOUBLE) value at row `idx`.
168    ///
169    /// # Safety
170    ///
171    /// See [`write_i8`][Self::write_i8].
172    #[inline]
173    pub const unsafe fn write_f64(&mut self, idx: usize, value: f64) {
174        // SAFETY: 8-byte aligned write to valid DOUBLE vector.
175        unsafe { core::ptr::write_unaligned(self.data.add(idx * 8).cast::<f64>(), value) };
176    }
177
178    /// Writes a `bool` (BOOLEAN) value at row `idx`.
179    ///
180    /// Booleans are stored as a single byte: `1` for `true`, `0` for `false`.
181    ///
182    /// # Safety
183    ///
184    /// - `idx` must be within the vector's capacity.
185    /// - The vector must have `BOOLEAN` type.
186    #[inline]
187    pub unsafe fn write_bool(&mut self, idx: usize, value: bool) {
188        // SAFETY: BOOLEAN stored as 1 byte.
189        unsafe { *self.data.add(idx) = u8::from(value) };
190    }
191
192    /// Writes an `i128` (HUGEINT) value at row `idx`.
193    ///
194    /// `DuckDB` stores HUGEINT as `{ lower: u64, upper: i64 }` in little-endian
195    /// layout, totaling 16 bytes per value.
196    ///
197    /// # Safety
198    ///
199    /// - `idx` must be within the vector's capacity.
200    /// - The vector must have `HUGEINT` type.
201    #[inline]
202    pub const unsafe fn write_i128(&mut self, idx: usize, value: i128) {
203        // SAFETY: HUGEINT = { lower: u64, upper: i64 } = 16 bytes.
204        let base = unsafe { self.data.add(idx * 16) };
205        #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
206        let lower = value as u64;
207        #[allow(clippy::cast_possible_truncation)]
208        let upper = (value >> 64) as i64;
209        unsafe {
210            core::ptr::write_unaligned(base.cast::<u64>(), lower);
211            core::ptr::write_unaligned(base.add(8).cast::<i64>(), upper);
212        }
213    }
214
215    /// Writes a `u16` (USMALLINT) value at row `idx`.
216    ///
217    /// # Safety
218    ///
219    /// See [`write_i8`][Self::write_i8].
220    #[inline]
221    pub const unsafe fn write_u16(&mut self, idx: usize, value: u16) {
222        // SAFETY: 2-byte aligned write to valid USMALLINT vector.
223        unsafe { core::ptr::write_unaligned(self.data.add(idx * 2).cast::<u16>(), value) };
224    }
225
226    /// Writes a VARCHAR string value at row `idx`.
227    ///
228    /// This uses `duckdb_vector_assign_string_element_len` which handles both
229    /// the inline (≤12 bytes) and pointer (>12 bytes) storage formats
230    /// automatically. `DuckDB` manages the memory for the string data.
231    ///
232    /// # Note on very long strings
233    ///
234    /// If `value.len()` exceeds `idx_t::MAX` (2^64 − 1 on 64-bit platforms),
235    /// the length is silently clamped to `idx_t::MAX`. In practice, this limit
236    /// is unreachable on any current hardware (≈18 exabytes), so no explicit
237    /// error path is provided.
238    ///
239    /// # Safety
240    ///
241    /// - `idx` must be within the vector's capacity.
242    /// - The vector must have `VARCHAR` type.
243    pub unsafe fn write_varchar(&mut self, idx: usize, value: &str) {
244        // SAFETY: self.vector is valid per constructor's contract.
245        // duckdb_vector_assign_string_element_len copies the string data.
246        unsafe {
247            duckdb_vector_assign_string_element_len(
248                self.vector,
249                idx as idx_t,
250                value.as_ptr().cast::<std::os::raw::c_char>(),
251                idx_t::try_from(value.len()).unwrap_or(idx_t::MAX),
252            );
253        }
254    }
255
256    /// Writes a `DATE` value at row `idx` as days since the Unix epoch.
257    ///
258    /// `DuckDB` stores DATE as a 4-byte `i32`. This is a semantic alias for
259    /// [`write_i32`][Self::write_i32].
260    ///
261    /// # Safety
262    ///
263    /// - `idx` must be within the vector's capacity.
264    /// - The vector must have `DATE` type.
265    #[inline]
266    pub const unsafe fn write_date(&mut self, idx: usize, days_since_epoch: i32) {
267        // SAFETY: DATE is stored as i32.
268        unsafe { self.write_i32(idx, days_since_epoch) };
269    }
270
271    /// Writes a `TIMESTAMP` value at row `idx` as microseconds since the Unix epoch.
272    ///
273    /// `DuckDB` stores TIMESTAMP as an 8-byte `i64`. This is a semantic alias for
274    /// [`write_i64`][Self::write_i64].
275    ///
276    /// # Safety
277    ///
278    /// - `idx` must be within the vector's capacity.
279    /// - The vector must have `TIMESTAMP` type.
280    #[inline]
281    pub const unsafe fn write_timestamp(&mut self, idx: usize, micros_since_epoch: i64) {
282        // SAFETY: TIMESTAMP is stored as i64.
283        unsafe { self.write_i64(idx, micros_since_epoch) };
284    }
285
286    /// Writes a `TIME` value at row `idx` as microseconds since midnight.
287    ///
288    /// `DuckDB` stores TIME as an 8-byte `i64`. This is a semantic alias for
289    /// [`write_i64`][Self::write_i64].
290    ///
291    /// # Safety
292    ///
293    /// - `idx` must be within the vector's capacity.
294    /// - The vector must have `TIME` type.
295    #[inline]
296    pub const unsafe fn write_time(&mut self, idx: usize, micros_since_midnight: i64) {
297        // SAFETY: TIME is stored as i64.
298        unsafe { self.write_i64(idx, micros_since_midnight) };
299    }
300
301    /// Writes an INTERVAL value at row `idx`.
302    ///
303    /// `DuckDB` stores INTERVAL as `{ months: i32, days: i32, micros: i64 }` in a
304    /// 16-byte layout. This method writes all three components at the correct offsets.
305    ///
306    /// # Safety
307    ///
308    /// - `idx` must be within the vector's capacity.
309    /// - The vector must have `INTERVAL` type.
310    #[inline]
311    pub const unsafe fn write_interval(
312        &mut self,
313        idx: usize,
314        value: crate::interval::DuckInterval,
315    ) {
316        // SAFETY: INTERVAL = { months: i32 @ 0, days: i32 @ 4, micros: i64 @ 8 } = 16 bytes.
317        let base = unsafe { self.data.add(idx * 16) };
318        unsafe {
319            core::ptr::write_unaligned(base.cast::<i32>(), value.months);
320            core::ptr::write_unaligned(base.add(4).cast::<i32>(), value.days);
321            core::ptr::write_unaligned(base.add(8).cast::<i64>(), value.micros);
322        }
323    }
324
325    /// Writes a `BLOB` (binary) value at row `idx`.
326    ///
327    /// This uses the same underlying storage as VARCHAR — `DuckDB` stores BLOBs
328    /// using `duckdb_vector_assign_string_element_len`, which copies the data.
329    ///
330    /// # Safety
331    ///
332    /// - `idx` must be within the vector's capacity.
333    /// - The vector must have `BLOB` type.
334    pub unsafe fn write_blob(&mut self, idx: usize, value: &[u8]) {
335        // SAFETY: BLOB uses the same storage as VARCHAR.
336        unsafe {
337            duckdb_vector_assign_string_element_len(
338                self.vector,
339                idx as idx_t,
340                value.as_ptr().cast::<std::os::raw::c_char>(),
341                idx_t::try_from(value.len()).unwrap_or(idx_t::MAX),
342            );
343        }
344    }
345
346    /// Writes a `UUID` value at row `idx`.
347    ///
348    /// `DuckDB` stores UUID as a HUGEINT (128-bit integer). This is a semantic
349    /// alias for [`write_i128`][Self::write_i128].
350    ///
351    /// # Safety
352    ///
353    /// - `idx` must be within the vector's capacity.
354    /// - The vector must have `UUID` type.
355    #[inline]
356    pub const unsafe fn write_uuid(&mut self, idx: usize, value: i128) {
357        // SAFETY: UUID is stored as HUGEINT (i128).
358        unsafe { self.write_i128(idx, value) };
359    }
360
361    /// Writes a VARCHAR string value at row `idx`.
362    ///
363    /// This is an alias for [`write_varchar`][VectorWriter::write_varchar] provided
364    /// for discoverability — extension authors often look for `write_str` first.
365    ///
366    /// # Safety
367    ///
368    /// - `idx` must be within the vector's capacity.
369    /// - The vector must have `VARCHAR` type.
370    #[inline]
371    pub unsafe fn write_str(&mut self, idx: usize, value: &str) {
372        // SAFETY: Delegates to write_varchar; same contract.
373        unsafe { self.write_varchar(idx, value) };
374    }
375
376    /// Marks row `idx` as NULL in the output vector.
377    ///
378    /// # Pitfall L4: `ensure_validity_writable`
379    ///
380    /// This method calls `duckdb_vector_ensure_validity_writable` before
381    /// `duckdb_vector_get_validity`, which is required before writing any NULL
382    /// flags. Forgetting this call returns an uninitialized pointer.
383    ///
384    /// # Safety
385    ///
386    /// - `idx` must be within the vector's capacity.
387    pub unsafe fn set_null(&mut self, idx: usize) {
388        // SAFETY: self.vector is valid per constructor's contract.
389        // PITFALL L4: must call ensure_validity_writable before get_validity for NULL output.
390        unsafe {
391            duckdb_vector_ensure_validity_writable(self.vector);
392        }
393        // SAFETY: ensure_validity_writable allocates the bitmap; it is now safe to read.
394        let validity = unsafe { duckdb_vector_get_validity(self.vector) };
395        // SAFETY: validity is now initialized and idx is in bounds per caller's contract.
396        unsafe {
397            duckdb_validity_set_row_invalid(validity, idx as idx_t);
398        }
399    }
400
401    /// Marks row `idx` as valid (non-NULL) in the output vector.
402    ///
403    /// Use this to undo a previous [`set_null`][Self::set_null] call for a row,
404    /// or to explicitly mark a row as valid after writing its value.
405    ///
406    /// Like [`set_null`][Self::set_null], this calls `ensure_validity_writable`
407    /// before modifying the validity bitmap.
408    ///
409    /// # Safety
410    ///
411    /// - `idx` must be within the vector's capacity.
412    pub unsafe fn set_valid(&mut self, idx: usize) {
413        // SAFETY: self.vector is valid per constructor's contract.
414        unsafe {
415            duckdb_vector_ensure_validity_writable(self.vector);
416        }
417        let validity = unsafe { duckdb_vector_get_validity(self.vector) };
418        // SAFETY: validity is now initialized and idx is in bounds per caller's contract.
419        unsafe {
420            duckdb_validity_set_row_valid(validity, idx as idx_t);
421        }
422    }
423
424    /// Returns the underlying raw vector handle.
425    #[must_use]
426    #[inline]
427    pub const fn as_raw(&self) -> duckdb_vector {
428        self.vector
429    }
430}
431
432#[cfg(test)]
433mod tests {
434    // Functional tests for VectorWriter require a live DuckDB instance and are
435    // located in tests/integration_test.rs. Unit tests here verify the struct
436    // layout and any pure-Rust logic.
437
438    #[test]
439    fn size_of_vector_writer() {
440        use super::VectorWriter;
441        use std::mem::size_of;
442        // VectorWriter contains a pointer + a pointer = 2 * pointer size
443        assert_eq!(size_of::<VectorWriter>(), 2 * size_of::<usize>());
444    }
445}