quack_rs/vector/complex.rs
1// SPDX-License-Identifier: MIT
2// Copyright 2026 Tom F. <https://github.com/tomtom215/>
3// My way of giving something small back to the open source community
4// and encouraging more Rust development!
5
6//! Complex type vector operations: STRUCT fields, LIST elements, MAP entries.
7//!
8//! `DuckDB` stores complex types as nested vectors:
9//!
10//! - **STRUCT**: a parent vector with N child vectors, one per field.
11//! - **LIST**: a parent vector holding `duckdb_list_entry { offset, length }` per row,
12//! plus a single flat child vector containing all elements end-to-end.
13//! - **MAP**: stored as `LIST<STRUCT{key, value}>` — the list's child vector is a
14//! STRUCT with two children: `key` (index 0) and `value` (index 1).
15//!
16//! # Reading vs writing
17//!
18//! - Use [`StructVector`] / [`ListVector`] / [`MapVector`] to access child vectors
19//! from input or output vectors.
20//! - Child vectors are themselves `duckdb_vector` handles — pass them to
21//! [`VectorReader`] or
22//! [`VectorWriter`] to read/write the actual values.
23//!
24//! # Example: Reading a STRUCT column
25//!
26//! ```rust,no_run
27//! use quack_rs::vector::{VectorReader, complex::StructVector};
28//! use libduckdb_sys::{duckdb_data_chunk, duckdb_data_chunk_get_vector};
29//!
30//! // Inside a table function scan callback:
31//! // let parent_vec = unsafe { duckdb_data_chunk_get_vector(chunk, 0) };
32//! // let x_vec = StructVector::get_child(parent_vec, 0); // field index 0
33//! // let x_reader = unsafe { VectorReader::from_vector(x_vec, row_count) };
34//! // let x: f64 = unsafe { x_reader.read_f64(row_idx) };
35//! ```
36//!
37//! # Example: Writing a LIST column
38//!
39//! ```rust,no_run
40//! use quack_rs::vector::{VectorWriter, complex::ListVector};
41//! use libduckdb_sys::{duckdb_data_chunk_get_vector, duckdb_data_chunk};
42//!
43//! // Inside a scan callback:
44//! // let list_vec = unsafe { duckdb_data_chunk_get_vector(output, 0) };
45//! // // Write 3 elements for row 0: [10, 20, 30]
46//! // ListVector::reserve(list_vec, 3);
47//! // ListVector::set_size(list_vec, 3);
48//! // // Write the list offset/length entry for row 0.
49//! // ListVector::set_entry(list_vec, 0, 0, 3); // row=0, offset=0, length=3
50//! // // Write values into the child vector.
51//! // let child = ListVector::get_child(list_vec);
52//! // let mut writer = unsafe { VectorWriter::from_vector(child) };
53//! // unsafe { writer.write_i64(0, 10); writer.write_i64(1, 20); writer.write_i64(2, 30); }
54//! ```
55
56use libduckdb_sys::{
57 duckdb_array_vector_get_child, duckdb_list_entry, duckdb_list_vector_get_child,
58 duckdb_list_vector_get_size, duckdb_list_vector_reserve, duckdb_list_vector_set_size,
59 duckdb_struct_vector_get_child, duckdb_vector, duckdb_vector_get_data, idx_t,
60};
61
62use crate::vector::{VectorReader, VectorWriter};
63
64// ─── STRUCT ──────────────────────────────────────────────────────────────────
65
66/// Operations on STRUCT vectors (accessing child field vectors).
67pub struct StructVector;
68
69impl StructVector {
70 /// Returns the child vector for the given field index of a STRUCT vector.
71 ///
72 /// Field indices correspond to the order of fields in the STRUCT type definition.
73 ///
74 /// # Safety
75 ///
76 /// - `vector` must be a valid `DuckDB` STRUCT vector.
77 /// - `field_idx` must be a valid field index (0 ≤ `field_idx` < number of struct fields).
78 /// - The returned vector is borrowed from `vector` and must not outlive it.
79 #[inline]
80 #[must_use]
81 pub unsafe fn get_child(vector: duckdb_vector, field_idx: usize) -> duckdb_vector {
82 // SAFETY: caller guarantees vector is a valid STRUCT vector and field_idx is valid.
83 unsafe { duckdb_struct_vector_get_child(vector, field_idx as idx_t) }
84 }
85
86 /// Creates a [`VectorReader`] for the given field of a STRUCT vector.
87 ///
88 /// # Safety
89 ///
90 /// - `vector` must be a valid `DuckDB` STRUCT vector.
91 /// - `field_idx` must be a valid field index.
92 /// - `row_count` must match the number of rows in the parent chunk.
93 pub unsafe fn field_reader(
94 vector: duckdb_vector,
95 field_idx: usize,
96 row_count: usize,
97 ) -> VectorReader {
98 let child = unsafe { Self::get_child(vector, field_idx) };
99 // SAFETY: child is a valid vector with row_count rows.
100 unsafe { VectorReader::from_vector(child, row_count) }
101 }
102
103 /// Creates a [`VectorWriter`] for the given field of a STRUCT vector.
104 ///
105 /// # Safety
106 ///
107 /// - `vector` must be a valid `DuckDB` STRUCT vector.
108 /// - `field_idx` must be a valid field index.
109 pub unsafe fn field_writer(vector: duckdb_vector, field_idx: usize) -> VectorWriter {
110 let child = unsafe { Self::get_child(vector, field_idx) };
111 // SAFETY: child is a valid writable vector.
112 unsafe { VectorWriter::from_vector(child) }
113 }
114}
115
116// ─── LIST ────────────────────────────────────────────────────────────────────
117
118/// Operations on LIST vectors.
119///
120/// A LIST vector stores a `duckdb_list_entry { offset: u64, length: u64 }` per row
121/// in the parent vector, and all element values in a flat child vector.
122///
123/// # Write workflow
124///
125/// 1. [`reserve`][ListVector::reserve] — ensure child vector has capacity.
126/// 2. Write element values into the child via [`get_child`][ListVector::get_child] + [`VectorWriter`].
127/// 3. [`set_size`][ListVector::set_size] — tell `DuckDB` how many elements were written.
128/// 4. [`set_entry`][ListVector::set_entry] — write the offset/length for each parent row.
129pub struct ListVector;
130
131impl ListVector {
132 /// Returns the child vector containing all list elements (flat, across all rows).
133 ///
134 /// # Safety
135 ///
136 /// - `vector` must be a valid `DuckDB` LIST vector.
137 /// - The returned handle is borrowed from `vector`.
138 #[inline]
139 #[must_use]
140 pub unsafe fn get_child(vector: duckdb_vector) -> duckdb_vector {
141 // SAFETY: caller guarantees vector is a valid LIST vector.
142 unsafe { duckdb_list_vector_get_child(vector) }
143 }
144
145 /// Returns the total number of elements currently in the child vector.
146 ///
147 /// # Safety
148 ///
149 /// `vector` must be a valid `DuckDB` LIST vector.
150 #[inline]
151 #[must_use]
152 pub unsafe fn get_size(vector: duckdb_vector) -> usize {
153 usize::try_from(unsafe { duckdb_list_vector_get_size(vector) }).unwrap_or(0)
154 }
155
156 /// Sets the number of elements in the child vector.
157 ///
158 /// Call after writing all element values. `DuckDB` uses this to know how many
159 /// child elements are valid.
160 ///
161 /// # Safety
162 ///
163 /// - `vector` must be a valid `DuckDB` LIST vector.
164 /// - `size` must equal the number of elements written into the child vector.
165 #[inline]
166 pub unsafe fn set_size(vector: duckdb_vector, size: usize) {
167 // SAFETY: caller guarantees vector is valid.
168 unsafe { duckdb_list_vector_set_size(vector, size as idx_t) };
169 }
170
171 /// Reserves capacity in the child vector for at least `capacity` elements.
172 ///
173 /// Call before writing elements to ensure the child vector has enough space.
174 ///
175 /// # Safety
176 ///
177 /// `vector` must be a valid `DuckDB` LIST vector.
178 #[inline]
179 pub unsafe fn reserve(vector: duckdb_vector, capacity: usize) {
180 // SAFETY: caller guarantees vector is valid.
181 unsafe { duckdb_list_vector_reserve(vector, capacity as idx_t) };
182 }
183
184 /// Writes the offset/length metadata entry for a parent row.
185 ///
186 /// This tells `DuckDB` where in the flat child vector this row's elements start
187 /// and how many elements it has.
188 ///
189 /// # Safety
190 ///
191 /// - `vector` must be a valid `DuckDB` LIST vector.
192 /// - `row_idx` must be a valid row index in the parent vector.
193 /// - `offset + length` must not exceed the size of the child vector.
194 pub unsafe fn set_entry(vector: duckdb_vector, row_idx: usize, offset: u64, length: u64) {
195 // SAFETY: vector is valid; we write to the parent vector's data at row_idx.
196 let data = unsafe { duckdb_vector_get_data(vector) };
197 // The parent stores duckdb_list_entry per row. Each entry is { offset: u64, length: u64 }.
198 let entry_ptr = unsafe { data.cast::<duckdb_list_entry>().add(row_idx) };
199 // SAFETY: entry_ptr is in bounds for the allocated vector.
200 unsafe {
201 (*entry_ptr).offset = offset;
202 (*entry_ptr).length = length;
203 }
204 }
205
206 /// Returns the `duckdb_list_entry` for a given row (for reading).
207 ///
208 /// # Safety
209 ///
210 /// - `vector` must be a valid `DuckDB` LIST vector.
211 /// - `row_idx` must be a valid row index.
212 #[must_use]
213 pub unsafe fn get_entry(vector: duckdb_vector, row_idx: usize) -> duckdb_list_entry {
214 let data = unsafe { duckdb_vector_get_data(vector) };
215 let entry_ptr = unsafe { data.cast::<duckdb_list_entry>().add(row_idx) };
216 // SAFETY: entry_ptr is valid and initialized by DuckDB or a prior set_entry call.
217 unsafe { core::ptr::read_unaligned(entry_ptr) }
218 }
219
220 /// Creates a [`VectorWriter`] for the child vector (elements).
221 ///
222 /// # Safety
223 ///
224 /// - `vector` must be a valid `DuckDB` LIST vector.
225 /// - The child must have been reserved with at least `capacity` elements.
226 pub unsafe fn child_writer(vector: duckdb_vector) -> VectorWriter {
227 let child = unsafe { Self::get_child(vector) };
228 unsafe { VectorWriter::from_vector(child) }
229 }
230
231 /// Creates a [`VectorReader`] for the child vector (reading list elements).
232 ///
233 /// # Safety
234 ///
235 /// - `vector` must be a valid `DuckDB` LIST vector.
236 /// - `element_count` must equal the total number of elements in the child.
237 pub unsafe fn child_reader(vector: duckdb_vector, element_count: usize) -> VectorReader {
238 let child = unsafe { Self::get_child(vector) };
239 unsafe { VectorReader::from_vector(child, element_count) }
240 }
241}
242
243// ─── MAP ─────────────────────────────────────────────────────────────────────
244
245/// Operations on MAP vectors.
246///
247/// `DuckDB` stores maps as `LIST<STRUCT{key: K, value: V}>`.
248/// The child of the list vector is a STRUCT vector with two fields:
249/// - field index 0: keys
250/// - field index 1: values
251///
252/// # Example
253///
254/// ```rust,no_run
255/// use quack_rs::vector::complex::MapVector;
256/// use libduckdb_sys::duckdb_vector;
257///
258/// // Reading MAP keys from a MAP vector:
259/// // let keys_vec = unsafe { MapVector::keys(map_vector) };
260/// // let vals_vec = unsafe { MapVector::values(map_vector) };
261/// ```
262pub struct MapVector;
263
264impl MapVector {
265 /// Returns the child STRUCT vector (contains both keys and values as fields).
266 ///
267 /// # Safety
268 ///
269 /// `vector` must be a valid `DuckDB` MAP vector.
270 #[inline]
271 #[must_use]
272 pub unsafe fn struct_child(vector: duckdb_vector) -> duckdb_vector {
273 // MAP is LIST<STRUCT{key,value}>, so the list child is a STRUCT vector.
274 unsafe { duckdb_list_vector_get_child(vector) }
275 }
276
277 /// Returns the keys vector (STRUCT field 0 of the MAP's child).
278 ///
279 /// # Safety
280 ///
281 /// `vector` must be a valid `DuckDB` MAP vector.
282 #[inline]
283 #[must_use]
284 pub unsafe fn keys(vector: duckdb_vector) -> duckdb_vector {
285 let struct_vec = unsafe { Self::struct_child(vector) };
286 // SAFETY: MAP child STRUCT always has key at field 0, value at field 1.
287 unsafe { duckdb_struct_vector_get_child(struct_vec, 0) }
288 }
289
290 /// Returns the values vector (STRUCT field 1 of the MAP's child).
291 ///
292 /// # Safety
293 ///
294 /// `vector` must be a valid `DuckDB` MAP vector.
295 #[inline]
296 #[must_use]
297 pub unsafe fn values(vector: duckdb_vector) -> duckdb_vector {
298 let struct_vec = unsafe { Self::struct_child(vector) };
299 // SAFETY: MAP child STRUCT always has key at field 0, value at field 1.
300 unsafe { duckdb_struct_vector_get_child(struct_vec, 1) }
301 }
302
303 /// Returns the total number of key-value pairs across all rows.
304 ///
305 /// # Safety
306 ///
307 /// `vector` must be a valid `DuckDB` MAP vector.
308 #[inline]
309 #[must_use]
310 pub unsafe fn total_entry_count(vector: duckdb_vector) -> usize {
311 usize::try_from(unsafe { duckdb_list_vector_get_size(vector) }).unwrap_or(0)
312 }
313
314 /// Reserves capacity in the MAP's child vector for at least `capacity` entries.
315 ///
316 /// # Safety
317 ///
318 /// `vector` must be a valid `DuckDB` MAP vector.
319 #[inline]
320 pub unsafe fn reserve(vector: duckdb_vector, capacity: usize) {
321 unsafe { duckdb_list_vector_reserve(vector, capacity as idx_t) };
322 }
323
324 /// Sets the total number of key-value entries written.
325 ///
326 /// # Safety
327 ///
328 /// `vector` must be a valid `DuckDB` MAP vector.
329 #[inline]
330 pub unsafe fn set_size(vector: duckdb_vector, size: usize) {
331 unsafe { duckdb_list_vector_set_size(vector, size as idx_t) };
332 }
333
334 /// Writes the offset/length metadata for a parent MAP row.
335 ///
336 /// This has the same semantics as [`ListVector::set_entry`], since MAP is a LIST.
337 ///
338 /// # Safety
339 ///
340 /// Same as [`ListVector::set_entry`].
341 #[inline]
342 pub unsafe fn set_entry(vector: duckdb_vector, row_idx: usize, offset: u64, length: u64) {
343 // SAFETY: same layout as ListVector.
344 unsafe { ListVector::set_entry(vector, row_idx, offset, length) };
345 }
346
347 /// Returns the `duckdb_list_entry` for a given MAP row (for reading).
348 ///
349 /// # Safety
350 ///
351 /// Same as [`ListVector::get_entry`].
352 #[must_use]
353 pub unsafe fn get_entry(vector: duckdb_vector, row_idx: usize) -> duckdb_list_entry {
354 unsafe { ListVector::get_entry(vector, row_idx) }
355 }
356
357 /// Creates a [`VectorWriter`] for the keys vector (STRUCT field 0).
358 ///
359 /// # Safety
360 ///
361 /// `vector` must be a valid `DuckDB` MAP vector.
362 pub unsafe fn key_writer(vector: duckdb_vector) -> VectorWriter {
363 let keys = unsafe { Self::keys(vector) };
364 // SAFETY: keys is a valid writable child vector.
365 unsafe { VectorWriter::from_vector(keys) }
366 }
367
368 /// Creates a [`VectorWriter`] for the values vector (STRUCT field 1).
369 ///
370 /// # Safety
371 ///
372 /// `vector` must be a valid `DuckDB` MAP vector.
373 pub unsafe fn value_writer(vector: duckdb_vector) -> VectorWriter {
374 let vals = unsafe { Self::values(vector) };
375 // SAFETY: vals is a valid writable child vector.
376 unsafe { VectorWriter::from_vector(vals) }
377 }
378
379 /// Creates a [`VectorReader`] for the keys vector.
380 ///
381 /// # Safety
382 ///
383 /// - `vector` must be a valid `DuckDB` MAP vector.
384 /// - `element_count` must equal the total number of key-value entries.
385 pub unsafe fn key_reader(vector: duckdb_vector, element_count: usize) -> VectorReader {
386 let keys = unsafe { Self::keys(vector) };
387 // SAFETY: keys is a valid vector with element_count elements.
388 unsafe { VectorReader::from_vector(keys, element_count) }
389 }
390
391 /// Creates a [`VectorReader`] for the values vector.
392 ///
393 /// # Safety
394 ///
395 /// - `vector` must be a valid `DuckDB` MAP vector.
396 /// - `element_count` must equal the total number of key-value entries.
397 pub unsafe fn value_reader(vector: duckdb_vector, element_count: usize) -> VectorReader {
398 let vals = unsafe { Self::values(vector) };
399 // SAFETY: vals is a valid vector with element_count elements.
400 unsafe { VectorReader::from_vector(vals, element_count) }
401 }
402}
403
404// ─── ARRAY ──────────────────────────────────────────────────────────────────
405
406/// Helpers for working with `ARRAY` vectors (fixed-size arrays).
407pub struct ArrayVector;
408
409impl ArrayVector {
410 /// Returns the child vector of an array vector.
411 ///
412 /// # Safety
413 ///
414 /// - `vector` must be a valid `DuckDB` ARRAY vector.
415 /// - The returned handle is borrowed from `vector` and must not outlive it.
416 #[inline]
417 #[must_use]
418 pub unsafe fn get_child(vector: duckdb_vector) -> duckdb_vector {
419 unsafe { duckdb_array_vector_get_child(vector) }
420 }
421}
422
423#[cfg(test)]
424mod tests {
425 use super::*;
426 use libduckdb_sys::duckdb_list_entry;
427
428 #[test]
429 fn list_entry_layout() {
430 // Verify duckdb_list_entry has the expected size (2 × u64 = 16 bytes).
431 assert_eq!(
432 core::mem::size_of::<duckdb_list_entry>(),
433 16,
434 "duckdb_list_entry should be {{ offset: u64, length: u64 }}"
435 );
436 }
437
438 #[test]
439 fn set_and_get_list_entry() {
440 // Simulate the list parent vector data buffer (one row).
441 let mut data = duckdb_list_entry {
442 offset: 0,
443 length: 0,
444 };
445 let vec_ptr: duckdb_vector = std::ptr::addr_of_mut!(data).cast();
446
447 // Write entry for row 0: offset=5, length=3.
448 // We bypass the actual DuckDB call and test the pointer arithmetic directly.
449 let entry_ptr = std::ptr::addr_of_mut!(data);
450 unsafe {
451 (*entry_ptr).offset = 5;
452 (*entry_ptr).length = 3;
453 }
454 assert_eq!(data.offset, 5);
455 assert_eq!(data.length, 3);
456 let _ = vec_ptr; // suppress unused warning; no FFI call possible without runtime
457 }
458}