quack_rs/vector/writer.rs
1// SPDX-License-Identifier: MIT
2// Copyright 2026 Tom F. <https://github.com/tomtom215/>
3// My way of giving something small back to the open source community
4// and encouraging more Rust development!
5
6//! Safe typed writing to `DuckDB` result vectors.
7//!
8//! [`VectorWriter`] provides safe methods for writing typed values and NULL
9//! flags to a `DuckDB` output vector from within a `finalize` callback.
10//!
11//! # Pitfall L4: `ensure_validity_writable`
12//!
13//! When writing NULL values, you must call `duckdb_vector_ensure_validity_writable`
14//! before `duckdb_vector_get_validity`. If you skip this call, `get_validity`
15//! returns an uninitialized pointer that will cause a segfault or silent corruption.
16//!
17//! [`VectorWriter::set_null`] calls `ensure_validity_writable` automatically.
18
19use libduckdb_sys::{
20 duckdb_validity_set_row_invalid, duckdb_validity_set_row_valid, duckdb_vector,
21 duckdb_vector_assign_string_element_len, duckdb_vector_ensure_validity_writable,
22 duckdb_vector_get_data, duckdb_vector_get_validity, idx_t,
23};
24
25/// A typed writer for a `DuckDB` output vector in a `finalize` callback.
26///
27/// # Example
28///
29/// ```rust,no_run
30/// use quack_rs::vector::VectorWriter;
31/// use libduckdb_sys::duckdb_vector;
32///
33/// // Inside finalize:
34/// // let mut writer = unsafe { VectorWriter::new(result_vector) };
35/// // for row in 0..count {
36/// // if let Some(val) = compute_result(row) {
37/// // unsafe { writer.write_i64(row, val) };
38/// // } else {
39/// // unsafe { writer.set_null(row) };
40/// // }
41/// // }
42/// ```
43pub struct VectorWriter {
44 vector: duckdb_vector,
45 data: *mut u8,
46}
47
48impl VectorWriter {
49 /// Creates a new `VectorWriter` for the given result vector.
50 ///
51 /// # Safety
52 ///
53 /// `vector` must be a valid `DuckDB` output vector obtained in a `finalize`
54 /// callback. The vector must not be destroyed while this writer is live.
55 pub unsafe fn new(vector: duckdb_vector) -> Self {
56 // SAFETY: Caller guarantees vector is valid.
57 let data = unsafe { duckdb_vector_get_data(vector) }.cast::<u8>();
58 Self { vector, data }
59 }
60
61 /// Creates a `VectorWriter` directly from a raw `duckdb_vector` handle.
62 ///
63 /// Use this when you need to write into a child vector (e.g., a STRUCT field
64 /// or LIST element vector) obtained from
65 /// [`StructVector::get_child`][crate::vector::complex::StructVector::get_child] or
66 /// [`ListVector::get_child`][crate::vector::complex::ListVector::get_child].
67 ///
68 /// # Safety
69 ///
70 /// `vector` must be a valid, writable `duckdb_vector`. The vector must not be
71 /// destroyed while this writer is live.
72 pub unsafe fn from_vector(vector: duckdb_vector) -> Self {
73 // SAFETY: caller guarantees vector is valid.
74 let data = unsafe { duckdb_vector_get_data(vector) }.cast::<u8>();
75 Self { vector, data }
76 }
77
78 /// Writes an `i8` (TINYINT) value at row `idx`.
79 ///
80 /// # Safety
81 ///
82 /// - `idx` must be within the vector's capacity.
83 /// - The vector must have `TINYINT` type.
84 #[inline]
85 pub const unsafe fn write_i8(&mut self, idx: usize, value: i8) {
86 // SAFETY: data points to a valid writable TINYINT array. idx is in bounds.
87 unsafe { core::ptr::write_unaligned(self.data.add(idx).cast::<i8>(), value) };
88 }
89
90 /// Writes an `i16` (SMALLINT) value at row `idx`.
91 ///
92 /// # Safety
93 ///
94 /// See [`write_i8`][Self::write_i8].
95 #[inline]
96 pub const unsafe fn write_i16(&mut self, idx: usize, value: i16) {
97 // SAFETY: 2-byte aligned write to valid SMALLINT vector.
98 unsafe { core::ptr::write_unaligned(self.data.add(idx * 2).cast::<i16>(), value) };
99 }
100
101 /// Writes an `i32` (INTEGER) value at row `idx`.
102 ///
103 /// # Safety
104 ///
105 /// See [`write_i8`][Self::write_i8].
106 #[inline]
107 pub const unsafe fn write_i32(&mut self, idx: usize, value: i32) {
108 // SAFETY: 4-byte aligned write to valid INTEGER vector.
109 unsafe { core::ptr::write_unaligned(self.data.add(idx * 4).cast::<i32>(), value) };
110 }
111
112 /// Writes an `i64` (BIGINT / TIMESTAMP) value at row `idx`.
113 ///
114 /// # Safety
115 ///
116 /// See [`write_i8`][Self::write_i8].
117 #[inline]
118 pub const unsafe fn write_i64(&mut self, idx: usize, value: i64) {
119 // SAFETY: 8-byte aligned write to valid BIGINT vector.
120 unsafe { core::ptr::write_unaligned(self.data.add(idx * 8).cast::<i64>(), value) };
121 }
122
123 /// Writes a `u8` (UTINYINT) value at row `idx`.
124 ///
125 /// # Safety
126 ///
127 /// See [`write_i8`][Self::write_i8].
128 #[inline]
129 pub const unsafe fn write_u8(&mut self, idx: usize, value: u8) {
130 // SAFETY: 1-byte write to valid UTINYINT vector.
131 unsafe { *self.data.add(idx) = value };
132 }
133
134 /// Writes a `u32` (UINTEGER) value at row `idx`.
135 ///
136 /// # Safety
137 ///
138 /// See [`write_i8`][Self::write_i8].
139 #[inline]
140 pub const unsafe fn write_u32(&mut self, idx: usize, value: u32) {
141 // SAFETY: 4-byte aligned write to valid UINTEGER vector.
142 unsafe { core::ptr::write_unaligned(self.data.add(idx * 4).cast::<u32>(), value) };
143 }
144
145 /// Writes a `u64` (UBIGINT) value at row `idx`.
146 ///
147 /// # Safety
148 ///
149 /// See [`write_i8`][Self::write_i8].
150 #[inline]
151 pub const unsafe fn write_u64(&mut self, idx: usize, value: u64) {
152 // SAFETY: 8-byte aligned write to valid UBIGINT vector.
153 unsafe { core::ptr::write_unaligned(self.data.add(idx * 8).cast::<u64>(), value) };
154 }
155
156 /// Writes an `f32` (FLOAT) value at row `idx`.
157 ///
158 /// # Safety
159 ///
160 /// See [`write_i8`][Self::write_i8].
161 #[inline]
162 pub const unsafe fn write_f32(&mut self, idx: usize, value: f32) {
163 // SAFETY: 4-byte aligned write to valid FLOAT vector.
164 unsafe { core::ptr::write_unaligned(self.data.add(idx * 4).cast::<f32>(), value) };
165 }
166
167 /// Writes an `f64` (DOUBLE) value at row `idx`.
168 ///
169 /// # Safety
170 ///
171 /// See [`write_i8`][Self::write_i8].
172 #[inline]
173 pub const unsafe fn write_f64(&mut self, idx: usize, value: f64) {
174 // SAFETY: 8-byte aligned write to valid DOUBLE vector.
175 unsafe { core::ptr::write_unaligned(self.data.add(idx * 8).cast::<f64>(), value) };
176 }
177
178 /// Writes a `bool` (BOOLEAN) value at row `idx`.
179 ///
180 /// Booleans are stored as a single byte: `1` for `true`, `0` for `false`.
181 ///
182 /// # Safety
183 ///
184 /// - `idx` must be within the vector's capacity.
185 /// - The vector must have `BOOLEAN` type.
186 #[inline]
187 pub unsafe fn write_bool(&mut self, idx: usize, value: bool) {
188 // SAFETY: BOOLEAN stored as 1 byte.
189 unsafe { *self.data.add(idx) = u8::from(value) };
190 }
191
192 /// Writes an `i128` (HUGEINT) value at row `idx`.
193 ///
194 /// `DuckDB` stores HUGEINT as `{ lower: u64, upper: i64 }` in little-endian
195 /// layout, totaling 16 bytes per value.
196 ///
197 /// # Safety
198 ///
199 /// - `idx` must be within the vector's capacity.
200 /// - The vector must have `HUGEINT` type.
201 #[inline]
202 pub const unsafe fn write_i128(&mut self, idx: usize, value: i128) {
203 // SAFETY: HUGEINT = { lower: u64, upper: i64 } = 16 bytes.
204 let base = unsafe { self.data.add(idx * 16) };
205 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
206 let lower = value as u64;
207 #[allow(clippy::cast_possible_truncation)]
208 let upper = (value >> 64) as i64;
209 unsafe {
210 core::ptr::write_unaligned(base.cast::<u64>(), lower);
211 core::ptr::write_unaligned(base.add(8).cast::<i64>(), upper);
212 }
213 }
214
215 /// Writes a `u16` (USMALLINT) value at row `idx`.
216 ///
217 /// # Safety
218 ///
219 /// See [`write_i8`][Self::write_i8].
220 #[inline]
221 pub const unsafe fn write_u16(&mut self, idx: usize, value: u16) {
222 // SAFETY: 2-byte aligned write to valid USMALLINT vector.
223 unsafe { core::ptr::write_unaligned(self.data.add(idx * 2).cast::<u16>(), value) };
224 }
225
226 /// Writes a VARCHAR string value at row `idx`.
227 ///
228 /// This uses `duckdb_vector_assign_string_element_len` which handles both
229 /// the inline (≤12 bytes) and pointer (>12 bytes) storage formats
230 /// automatically. `DuckDB` manages the memory for the string data.
231 ///
232 /// # Note on very long strings
233 ///
234 /// If `value.len()` exceeds `idx_t::MAX` (2^64 − 1 on 64-bit platforms),
235 /// the length is silently clamped to `idx_t::MAX`. In practice, this limit
236 /// is unreachable on any current hardware (≈18 exabytes), so no explicit
237 /// error path is provided.
238 ///
239 /// # Safety
240 ///
241 /// - `idx` must be within the vector's capacity.
242 /// - The vector must have `VARCHAR` type.
243 pub unsafe fn write_varchar(&mut self, idx: usize, value: &str) {
244 // SAFETY: self.vector is valid per constructor's contract.
245 // duckdb_vector_assign_string_element_len copies the string data.
246 unsafe {
247 duckdb_vector_assign_string_element_len(
248 self.vector,
249 idx as idx_t,
250 value.as_ptr().cast::<std::os::raw::c_char>(),
251 idx_t::try_from(value.len()).unwrap_or(idx_t::MAX),
252 );
253 }
254 }
255
256 /// Writes a `DATE` value at row `idx` as days since the Unix epoch.
257 ///
258 /// `DuckDB` stores DATE as a 4-byte `i32`. This is a semantic alias for
259 /// [`write_i32`][Self::write_i32].
260 ///
261 /// # Safety
262 ///
263 /// - `idx` must be within the vector's capacity.
264 /// - The vector must have `DATE` type.
265 #[inline]
266 pub const unsafe fn write_date(&mut self, idx: usize, days_since_epoch: i32) {
267 // SAFETY: DATE is stored as i32.
268 unsafe { self.write_i32(idx, days_since_epoch) };
269 }
270
271 /// Writes a `TIMESTAMP` value at row `idx` as microseconds since the Unix epoch.
272 ///
273 /// `DuckDB` stores TIMESTAMP as an 8-byte `i64`. This is a semantic alias for
274 /// [`write_i64`][Self::write_i64].
275 ///
276 /// # Safety
277 ///
278 /// - `idx` must be within the vector's capacity.
279 /// - The vector must have `TIMESTAMP` type.
280 #[inline]
281 pub const unsafe fn write_timestamp(&mut self, idx: usize, micros_since_epoch: i64) {
282 // SAFETY: TIMESTAMP is stored as i64.
283 unsafe { self.write_i64(idx, micros_since_epoch) };
284 }
285
286 /// Writes a `TIME` value at row `idx` as microseconds since midnight.
287 ///
288 /// `DuckDB` stores TIME as an 8-byte `i64`. This is a semantic alias for
289 /// [`write_i64`][Self::write_i64].
290 ///
291 /// # Safety
292 ///
293 /// - `idx` must be within the vector's capacity.
294 /// - The vector must have `TIME` type.
295 #[inline]
296 pub const unsafe fn write_time(&mut self, idx: usize, micros_since_midnight: i64) {
297 // SAFETY: TIME is stored as i64.
298 unsafe { self.write_i64(idx, micros_since_midnight) };
299 }
300
301 /// Writes an INTERVAL value at row `idx`.
302 ///
303 /// `DuckDB` stores INTERVAL as `{ months: i32, days: i32, micros: i64 }` in a
304 /// 16-byte layout. This method writes all three components at the correct offsets.
305 ///
306 /// # Safety
307 ///
308 /// - `idx` must be within the vector's capacity.
309 /// - The vector must have `INTERVAL` type.
310 #[inline]
311 pub const unsafe fn write_interval(
312 &mut self,
313 idx: usize,
314 value: crate::interval::DuckInterval,
315 ) {
316 // SAFETY: INTERVAL = { months: i32 @ 0, days: i32 @ 4, micros: i64 @ 8 } = 16 bytes.
317 let base = unsafe { self.data.add(idx * 16) };
318 unsafe {
319 core::ptr::write_unaligned(base.cast::<i32>(), value.months);
320 core::ptr::write_unaligned(base.add(4).cast::<i32>(), value.days);
321 core::ptr::write_unaligned(base.add(8).cast::<i64>(), value.micros);
322 }
323 }
324
325 /// Writes a `BLOB` (binary) value at row `idx`.
326 ///
327 /// This uses the same underlying storage as VARCHAR — `DuckDB` stores BLOBs
328 /// using `duckdb_vector_assign_string_element_len`, which copies the data.
329 ///
330 /// # Safety
331 ///
332 /// - `idx` must be within the vector's capacity.
333 /// - The vector must have `BLOB` type.
334 pub unsafe fn write_blob(&mut self, idx: usize, value: &[u8]) {
335 // SAFETY: BLOB uses the same storage as VARCHAR.
336 unsafe {
337 duckdb_vector_assign_string_element_len(
338 self.vector,
339 idx as idx_t,
340 value.as_ptr().cast::<std::os::raw::c_char>(),
341 idx_t::try_from(value.len()).unwrap_or(idx_t::MAX),
342 );
343 }
344 }
345
346 /// Writes a `UUID` value at row `idx`.
347 ///
348 /// `DuckDB` stores UUID as a HUGEINT (128-bit integer). This is a semantic
349 /// alias for [`write_i128`][Self::write_i128].
350 ///
351 /// # Safety
352 ///
353 /// - `idx` must be within the vector's capacity.
354 /// - The vector must have `UUID` type.
355 #[inline]
356 pub const unsafe fn write_uuid(&mut self, idx: usize, value: i128) {
357 // SAFETY: UUID is stored as HUGEINT (i128).
358 unsafe { self.write_i128(idx, value) };
359 }
360
361 /// Writes a VARCHAR string value at row `idx`.
362 ///
363 /// This is an alias for [`write_varchar`][VectorWriter::write_varchar] provided
364 /// for discoverability — extension authors often look for `write_str` first.
365 ///
366 /// # Safety
367 ///
368 /// - `idx` must be within the vector's capacity.
369 /// - The vector must have `VARCHAR` type.
370 #[inline]
371 pub unsafe fn write_str(&mut self, idx: usize, value: &str) {
372 // SAFETY: Delegates to write_varchar; same contract.
373 unsafe { self.write_varchar(idx, value) };
374 }
375
376 /// Marks row `idx` as NULL in the output vector.
377 ///
378 /// # Pitfall L4: `ensure_validity_writable`
379 ///
380 /// This method calls `duckdb_vector_ensure_validity_writable` before
381 /// `duckdb_vector_get_validity`, which is required before writing any NULL
382 /// flags. Forgetting this call returns an uninitialized pointer.
383 ///
384 /// # Safety
385 ///
386 /// - `idx` must be within the vector's capacity.
387 pub unsafe fn set_null(&mut self, idx: usize) {
388 // SAFETY: self.vector is valid per constructor's contract.
389 // PITFALL L4: must call ensure_validity_writable before get_validity for NULL output.
390 unsafe {
391 duckdb_vector_ensure_validity_writable(self.vector);
392 }
393 // SAFETY: ensure_validity_writable allocates the bitmap; it is now safe to read.
394 let validity = unsafe { duckdb_vector_get_validity(self.vector) };
395 // SAFETY: validity is now initialized and idx is in bounds per caller's contract.
396 unsafe {
397 duckdb_validity_set_row_invalid(validity, idx as idx_t);
398 }
399 }
400
401 /// Marks row `idx` as valid (non-NULL) in the output vector.
402 ///
403 /// Use this to undo a previous [`set_null`][Self::set_null] call for a row,
404 /// or to explicitly mark a row as valid after writing its value.
405 ///
406 /// Like [`set_null`][Self::set_null], this calls `ensure_validity_writable`
407 /// before modifying the validity bitmap.
408 ///
409 /// # Safety
410 ///
411 /// - `idx` must be within the vector's capacity.
412 pub unsafe fn set_valid(&mut self, idx: usize) {
413 // SAFETY: self.vector is valid per constructor's contract.
414 unsafe {
415 duckdb_vector_ensure_validity_writable(self.vector);
416 }
417 let validity = unsafe { duckdb_vector_get_validity(self.vector) };
418 // SAFETY: validity is now initialized and idx is in bounds per caller's contract.
419 unsafe {
420 duckdb_validity_set_row_valid(validity, idx as idx_t);
421 }
422 }
423
424 /// Returns the underlying raw vector handle.
425 #[must_use]
426 #[inline]
427 pub const fn as_raw(&self) -> duckdb_vector {
428 self.vector
429 }
430}
431
432#[cfg(test)]
433mod tests {
434 // Functional tests for VectorWriter require a live DuckDB instance and are
435 // located in tests/integration_test.rs. Unit tests here verify the struct
436 // layout and any pure-Rust logic.
437
438 #[test]
439 fn size_of_vector_writer() {
440 use super::VectorWriter;
441 use std::mem::size_of;
442 // VectorWriter contains a pointer + a pointer = 2 * pointer size
443 assert_eq!(size_of::<VectorWriter>(), 2 * size_of::<usize>());
444 }
445}