Skip to main content

quack_rs/vector/
complex.rs

1// SPDX-License-Identifier: MIT
2// Copyright 2026 Tom F. <https://github.com/tomtom215/>
3// My way of giving something small back to the open source community
4// and encouraging more Rust development!
5
6//! Complex type vector operations: STRUCT fields, LIST elements, MAP entries.
7//!
8//! `DuckDB` stores complex types as nested vectors:
9//!
10//! - **STRUCT**: a parent vector with N child vectors, one per field.
11//! - **LIST**: a parent vector holding `duckdb_list_entry { offset, length }` per row,
12//!   plus a single flat child vector containing all elements end-to-end.
13//! - **MAP**: stored as `LIST<STRUCT{key, value}>` — the list's child vector is a
14//!   STRUCT with two children: `key` (index 0) and `value` (index 1).
15//!
16//! # Reading vs writing
17//!
18//! - Use [`StructVector`] / [`ListVector`] / [`MapVector`] to access child vectors
19//!   from input or output vectors.
20//! - Child vectors are themselves `duckdb_vector` handles — pass them to
21//!   [`VectorReader`] or
22//!   [`VectorWriter`] to read/write the actual values.
23//!
24//! # Example: Reading a STRUCT column
25//!
26//! ```rust,no_run
27//! use quack_rs::vector::{VectorReader, complex::StructVector};
28//! use libduckdb_sys::{duckdb_data_chunk, duckdb_data_chunk_get_vector};
29//!
30//! // Inside a table function scan callback:
31//! // let parent_vec = unsafe { duckdb_data_chunk_get_vector(chunk, 0) };
32//! // let x_vec = StructVector::get_child(parent_vec, 0); // field index 0
33//! // let x_reader = unsafe { VectorReader::from_vector(x_vec, row_count) };
34//! // let x: f64 = unsafe { x_reader.read_f64(row_idx) };
35//! ```
36//!
37//! # Example: Writing a LIST column
38//!
39//! ```rust,no_run
40//! use quack_rs::vector::{VectorWriter, complex::ListVector};
41//! use libduckdb_sys::{duckdb_data_chunk_get_vector, duckdb_data_chunk};
42//!
43//! // Inside a scan callback:
44//! // let list_vec = unsafe { duckdb_data_chunk_get_vector(output, 0) };
45//! // // Write 3 elements for row 0: [10, 20, 30]
46//! // ListVector::reserve(list_vec, 3);
47//! // ListVector::set_size(list_vec, 3);
48//! // // Write the list offset/length entry for row 0.
49//! // ListVector::set_entry(list_vec, 0, 0, 3); // row=0, offset=0, length=3
50//! // // Write values into the child vector.
51//! // let child = ListVector::get_child(list_vec);
52//! // let mut writer = unsafe { VectorWriter::from_vector(child) };
53//! // unsafe { writer.write_i64(0, 10); writer.write_i64(1, 20); writer.write_i64(2, 30); }
54//! ```
55
56use libduckdb_sys::{
57    duckdb_array_vector_get_child, duckdb_list_entry, duckdb_list_vector_get_child,
58    duckdb_list_vector_get_size, duckdb_list_vector_reserve, duckdb_list_vector_set_size,
59    duckdb_struct_vector_get_child, duckdb_vector, duckdb_vector_get_data, idx_t,
60};
61
62use crate::vector::{VectorReader, VectorWriter};
63
64// ─── STRUCT ──────────────────────────────────────────────────────────────────
65
66/// Operations on STRUCT vectors (accessing child field vectors).
67pub struct StructVector;
68
69impl StructVector {
70    /// Returns the child vector for the given field index of a STRUCT vector.
71    ///
72    /// Field indices correspond to the order of fields in the STRUCT type definition.
73    ///
74    /// # Safety
75    ///
76    /// - `vector` must be a valid `DuckDB` STRUCT vector.
77    /// - `field_idx` must be a valid field index (0 ≤ `field_idx` < number of struct fields).
78    /// - The returned vector is borrowed from `vector` and must not outlive it.
79    #[inline]
80    #[must_use]
81    pub unsafe fn get_child(vector: duckdb_vector, field_idx: usize) -> duckdb_vector {
82        // SAFETY: caller guarantees vector is a valid STRUCT vector and field_idx is valid.
83        unsafe { duckdb_struct_vector_get_child(vector, field_idx as idx_t) }
84    }
85
86    /// Creates a [`VectorReader`] for the given field of a STRUCT vector.
87    ///
88    /// # Safety
89    ///
90    /// - `vector` must be a valid `DuckDB` STRUCT vector.
91    /// - `field_idx` must be a valid field index.
92    /// - `row_count` must match the number of rows in the parent chunk.
93    pub unsafe fn field_reader(
94        vector: duckdb_vector,
95        field_idx: usize,
96        row_count: usize,
97    ) -> VectorReader {
98        let child = unsafe { Self::get_child(vector, field_idx) };
99        // SAFETY: child is a valid vector with row_count rows.
100        unsafe { VectorReader::from_vector(child, row_count) }
101    }
102
103    /// Creates a [`VectorWriter`] for the given field of a STRUCT vector.
104    ///
105    /// # Safety
106    ///
107    /// - `vector` must be a valid `DuckDB` STRUCT vector.
108    /// - `field_idx` must be a valid field index.
109    pub unsafe fn field_writer(vector: duckdb_vector, field_idx: usize) -> VectorWriter {
110        let child = unsafe { Self::get_child(vector, field_idx) };
111        // SAFETY: child is a valid writable vector.
112        unsafe { VectorWriter::from_vector(child) }
113    }
114}
115
116// ─── LIST ────────────────────────────────────────────────────────────────────
117
118/// Operations on LIST vectors.
119///
120/// A LIST vector stores a `duckdb_list_entry { offset: u64, length: u64 }` per row
121/// in the parent vector, and all element values in a flat child vector.
122///
123/// # Write workflow
124///
125/// 1. [`reserve`][ListVector::reserve] — ensure child vector has capacity.
126/// 2. Write element values into the child via [`get_child`][ListVector::get_child] + [`VectorWriter`].
127/// 3. [`set_size`][ListVector::set_size] — tell `DuckDB` how many elements were written.
128/// 4. [`set_entry`][ListVector::set_entry] — write the offset/length for each parent row.
129pub struct ListVector;
130
131impl ListVector {
132    /// Returns the child vector containing all list elements (flat, across all rows).
133    ///
134    /// # Safety
135    ///
136    /// - `vector` must be a valid `DuckDB` LIST vector.
137    /// - The returned handle is borrowed from `vector`.
138    #[inline]
139    #[must_use]
140    pub unsafe fn get_child(vector: duckdb_vector) -> duckdb_vector {
141        // SAFETY: caller guarantees vector is a valid LIST vector.
142        unsafe { duckdb_list_vector_get_child(vector) }
143    }
144
145    /// Returns the total number of elements currently in the child vector.
146    ///
147    /// # Safety
148    ///
149    /// `vector` must be a valid `DuckDB` LIST vector.
150    #[inline]
151    #[must_use]
152    pub unsafe fn get_size(vector: duckdb_vector) -> usize {
153        usize::try_from(unsafe { duckdb_list_vector_get_size(vector) }).unwrap_or(0)
154    }
155
156    /// Sets the number of elements in the child vector.
157    ///
158    /// Call after writing all element values. `DuckDB` uses this to know how many
159    /// child elements are valid.
160    ///
161    /// # Safety
162    ///
163    /// - `vector` must be a valid `DuckDB` LIST vector.
164    /// - `size` must equal the number of elements written into the child vector.
165    #[inline]
166    pub unsafe fn set_size(vector: duckdb_vector, size: usize) {
167        // SAFETY: caller guarantees vector is valid.
168        unsafe { duckdb_list_vector_set_size(vector, size as idx_t) };
169    }
170
171    /// Reserves capacity in the child vector for at least `capacity` elements.
172    ///
173    /// Call before writing elements to ensure the child vector has enough space.
174    ///
175    /// # Safety
176    ///
177    /// `vector` must be a valid `DuckDB` LIST vector.
178    #[inline]
179    pub unsafe fn reserve(vector: duckdb_vector, capacity: usize) {
180        // SAFETY: caller guarantees vector is valid.
181        unsafe { duckdb_list_vector_reserve(vector, capacity as idx_t) };
182    }
183
184    /// Writes the offset/length metadata entry for a parent row.
185    ///
186    /// This tells `DuckDB` where in the flat child vector this row's elements start
187    /// and how many elements it has.
188    ///
189    /// # Safety
190    ///
191    /// - `vector` must be a valid `DuckDB` LIST vector.
192    /// - `row_idx` must be a valid row index in the parent vector.
193    /// - `offset + length` must not exceed the size of the child vector.
194    pub unsafe fn set_entry(vector: duckdb_vector, row_idx: usize, offset: u64, length: u64) {
195        // SAFETY: vector is valid; we write to the parent vector's data at row_idx.
196        let data = unsafe { duckdb_vector_get_data(vector) };
197        // The parent stores duckdb_list_entry per row. Each entry is { offset: u64, length: u64 }.
198        let entry_ptr = unsafe { data.cast::<duckdb_list_entry>().add(row_idx) };
199        // SAFETY: entry_ptr is in bounds for the allocated vector.
200        unsafe {
201            (*entry_ptr).offset = offset;
202            (*entry_ptr).length = length;
203        }
204    }
205
206    /// Returns the `duckdb_list_entry` for a given row (for reading).
207    ///
208    /// # Safety
209    ///
210    /// - `vector` must be a valid `DuckDB` LIST vector.
211    /// - `row_idx` must be a valid row index.
212    #[must_use]
213    pub unsafe fn get_entry(vector: duckdb_vector, row_idx: usize) -> duckdb_list_entry {
214        let data = unsafe { duckdb_vector_get_data(vector) };
215        let entry_ptr = unsafe { data.cast::<duckdb_list_entry>().add(row_idx) };
216        // SAFETY: entry_ptr is valid and initialized by DuckDB or a prior set_entry call.
217        unsafe { core::ptr::read_unaligned(entry_ptr) }
218    }
219
220    /// Creates a [`VectorWriter`] for the child vector (elements).
221    ///
222    /// # Safety
223    ///
224    /// - `vector` must be a valid `DuckDB` LIST vector.
225    /// - The child must have been reserved with at least `capacity` elements.
226    pub unsafe fn child_writer(vector: duckdb_vector) -> VectorWriter {
227        let child = unsafe { Self::get_child(vector) };
228        unsafe { VectorWriter::from_vector(child) }
229    }
230
231    /// Creates a [`VectorReader`] for the child vector (reading list elements).
232    ///
233    /// # Safety
234    ///
235    /// - `vector` must be a valid `DuckDB` LIST vector.
236    /// - `element_count` must equal the total number of elements in the child.
237    pub unsafe fn child_reader(vector: duckdb_vector, element_count: usize) -> VectorReader {
238        let child = unsafe { Self::get_child(vector) };
239        unsafe { VectorReader::from_vector(child, element_count) }
240    }
241}
242
243// ─── MAP ─────────────────────────────────────────────────────────────────────
244
245/// Operations on MAP vectors.
246///
247/// `DuckDB` stores maps as `LIST<STRUCT{key: K, value: V}>`.
248/// The child of the list vector is a STRUCT vector with two fields:
249/// - field index 0: keys
250/// - field index 1: values
251///
252/// # Example
253///
254/// ```rust,no_run
255/// use quack_rs::vector::complex::MapVector;
256/// use libduckdb_sys::duckdb_vector;
257///
258/// // Reading MAP keys from a MAP vector:
259/// // let keys_vec = unsafe { MapVector::keys(map_vector) };
260/// // let vals_vec = unsafe { MapVector::values(map_vector) };
261/// ```
262pub struct MapVector;
263
264impl MapVector {
265    /// Returns the child STRUCT vector (contains both keys and values as fields).
266    ///
267    /// # Safety
268    ///
269    /// `vector` must be a valid `DuckDB` MAP vector.
270    #[inline]
271    #[must_use]
272    pub unsafe fn struct_child(vector: duckdb_vector) -> duckdb_vector {
273        // MAP is LIST<STRUCT{key,value}>, so the list child is a STRUCT vector.
274        unsafe { duckdb_list_vector_get_child(vector) }
275    }
276
277    /// Returns the keys vector (STRUCT field 0 of the MAP's child).
278    ///
279    /// # Safety
280    ///
281    /// `vector` must be a valid `DuckDB` MAP vector.
282    #[inline]
283    #[must_use]
284    pub unsafe fn keys(vector: duckdb_vector) -> duckdb_vector {
285        let struct_vec = unsafe { Self::struct_child(vector) };
286        // SAFETY: MAP child STRUCT always has key at field 0, value at field 1.
287        unsafe { duckdb_struct_vector_get_child(struct_vec, 0) }
288    }
289
290    /// Returns the values vector (STRUCT field 1 of the MAP's child).
291    ///
292    /// # Safety
293    ///
294    /// `vector` must be a valid `DuckDB` MAP vector.
295    #[inline]
296    #[must_use]
297    pub unsafe fn values(vector: duckdb_vector) -> duckdb_vector {
298        let struct_vec = unsafe { Self::struct_child(vector) };
299        // SAFETY: MAP child STRUCT always has key at field 0, value at field 1.
300        unsafe { duckdb_struct_vector_get_child(struct_vec, 1) }
301    }
302
303    /// Returns the total number of key-value pairs across all rows.
304    ///
305    /// # Safety
306    ///
307    /// `vector` must be a valid `DuckDB` MAP vector.
308    #[inline]
309    #[must_use]
310    pub unsafe fn total_entry_count(vector: duckdb_vector) -> usize {
311        usize::try_from(unsafe { duckdb_list_vector_get_size(vector) }).unwrap_or(0)
312    }
313
314    /// Reserves capacity in the MAP's child vector for at least `capacity` entries.
315    ///
316    /// # Safety
317    ///
318    /// `vector` must be a valid `DuckDB` MAP vector.
319    #[inline]
320    pub unsafe fn reserve(vector: duckdb_vector, capacity: usize) {
321        unsafe { duckdb_list_vector_reserve(vector, capacity as idx_t) };
322    }
323
324    /// Sets the total number of key-value entries written.
325    ///
326    /// # Safety
327    ///
328    /// `vector` must be a valid `DuckDB` MAP vector.
329    #[inline]
330    pub unsafe fn set_size(vector: duckdb_vector, size: usize) {
331        unsafe { duckdb_list_vector_set_size(vector, size as idx_t) };
332    }
333
334    /// Writes the offset/length metadata for a parent MAP row.
335    ///
336    /// This has the same semantics as [`ListVector::set_entry`], since MAP is a LIST.
337    ///
338    /// # Safety
339    ///
340    /// Same as [`ListVector::set_entry`].
341    #[inline]
342    pub unsafe fn set_entry(vector: duckdb_vector, row_idx: usize, offset: u64, length: u64) {
343        // SAFETY: same layout as ListVector.
344        unsafe { ListVector::set_entry(vector, row_idx, offset, length) };
345    }
346
347    /// Returns the `duckdb_list_entry` for a given MAP row (for reading).
348    ///
349    /// # Safety
350    ///
351    /// Same as [`ListVector::get_entry`].
352    #[must_use]
353    pub unsafe fn get_entry(vector: duckdb_vector, row_idx: usize) -> duckdb_list_entry {
354        unsafe { ListVector::get_entry(vector, row_idx) }
355    }
356
357    /// Creates a [`VectorWriter`] for the keys vector (STRUCT field 0).
358    ///
359    /// # Safety
360    ///
361    /// `vector` must be a valid `DuckDB` MAP vector.
362    pub unsafe fn key_writer(vector: duckdb_vector) -> VectorWriter {
363        let keys = unsafe { Self::keys(vector) };
364        // SAFETY: keys is a valid writable child vector.
365        unsafe { VectorWriter::from_vector(keys) }
366    }
367
368    /// Creates a [`VectorWriter`] for the values vector (STRUCT field 1).
369    ///
370    /// # Safety
371    ///
372    /// `vector` must be a valid `DuckDB` MAP vector.
373    pub unsafe fn value_writer(vector: duckdb_vector) -> VectorWriter {
374        let vals = unsafe { Self::values(vector) };
375        // SAFETY: vals is a valid writable child vector.
376        unsafe { VectorWriter::from_vector(vals) }
377    }
378
379    /// Creates a [`VectorReader`] for the keys vector.
380    ///
381    /// # Safety
382    ///
383    /// - `vector` must be a valid `DuckDB` MAP vector.
384    /// - `element_count` must equal the total number of key-value entries.
385    pub unsafe fn key_reader(vector: duckdb_vector, element_count: usize) -> VectorReader {
386        let keys = unsafe { Self::keys(vector) };
387        // SAFETY: keys is a valid vector with element_count elements.
388        unsafe { VectorReader::from_vector(keys, element_count) }
389    }
390
391    /// Creates a [`VectorReader`] for the values vector.
392    ///
393    /// # Safety
394    ///
395    /// - `vector` must be a valid `DuckDB` MAP vector.
396    /// - `element_count` must equal the total number of key-value entries.
397    pub unsafe fn value_reader(vector: duckdb_vector, element_count: usize) -> VectorReader {
398        let vals = unsafe { Self::values(vector) };
399        // SAFETY: vals is a valid vector with element_count elements.
400        unsafe { VectorReader::from_vector(vals, element_count) }
401    }
402}
403
404// ─── ARRAY ──────────────────────────────────────────────────────────────────
405
406/// Helpers for working with `ARRAY` vectors (fixed-size arrays).
407pub struct ArrayVector;
408
409impl ArrayVector {
410    /// Returns the child vector of an array vector.
411    ///
412    /// # Safety
413    ///
414    /// - `vector` must be a valid `DuckDB` ARRAY vector.
415    /// - The returned handle is borrowed from `vector` and must not outlive it.
416    #[inline]
417    #[must_use]
418    pub unsafe fn get_child(vector: duckdb_vector) -> duckdb_vector {
419        unsafe { duckdb_array_vector_get_child(vector) }
420    }
421}
422
423#[cfg(test)]
424mod tests {
425    use super::*;
426    use libduckdb_sys::duckdb_list_entry;
427
428    #[test]
429    fn list_entry_layout() {
430        // Verify duckdb_list_entry has the expected size (2 × u64 = 16 bytes).
431        assert_eq!(
432            core::mem::size_of::<duckdb_list_entry>(),
433            16,
434            "duckdb_list_entry should be {{ offset: u64, length: u64 }}"
435        );
436    }
437
438    #[test]
439    fn set_and_get_list_entry() {
440        // Simulate the list parent vector data buffer (one row).
441        let mut data = duckdb_list_entry {
442            offset: 0,
443            length: 0,
444        };
445        let vec_ptr: duckdb_vector = std::ptr::addr_of_mut!(data).cast();
446
447        // Write entry for row 0: offset=5, length=3.
448        // We bypass the actual DuckDB call and test the pointer arithmetic directly.
449        let entry_ptr = std::ptr::addr_of_mut!(data);
450        unsafe {
451            (*entry_ptr).offset = 5;
452            (*entry_ptr).length = 3;
453        }
454        assert_eq!(data.offset, 5);
455        assert_eq!(data.length, 3);
456        let _ = vec_ptr; // suppress unused warning; no FFI call possible without runtime
457    }
458}