quack-rs 0.12.0

Production-grade Rust SDK for building DuckDB loadable extensions
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
// SPDX-License-Identifier: MIT
// Copyright 2026 Tom F. <https://github.com/tomtom215/>
// My way of giving something small back to the open source community
// and encouraging more Rust development!

//! Complex type vector operations: STRUCT fields, LIST elements, MAP entries.
//!
//! `DuckDB` stores complex types as nested vectors:
//!
//! - **STRUCT**: a parent vector with N child vectors, one per field.
//! - **LIST**: a parent vector holding `duckdb_list_entry { offset, length }` per row,
//!   plus a single flat child vector containing all elements end-to-end.
//! - **MAP**: stored as `LIST<STRUCT{key, value}>` — the list's child vector is a
//!   STRUCT with two children: `key` (index 0) and `value` (index 1).
//!
//! # Reading vs writing
//!
//! - Use [`StructVector`] / [`ListVector`] / [`MapVector`] to access child vectors
//!   from input or output vectors.
//! - Child vectors are themselves `duckdb_vector` handles — pass them to
//!   [`VectorReader`] or
//!   [`VectorWriter`] to read/write the actual values.
//!
//! # Example: Reading a STRUCT column
//!
//! ```rust,no_run
//! use quack_rs::vector::{VectorReader, complex::StructVector};
//! use libduckdb_sys::{duckdb_data_chunk, duckdb_data_chunk_get_vector};
//!
//! // Inside a table function scan callback:
//! // let parent_vec = unsafe { duckdb_data_chunk_get_vector(chunk, 0) };
//! // let x_vec = StructVector::get_child(parent_vec, 0); // field index 0
//! // let x_reader = unsafe { VectorReader::from_vector(x_vec, row_count) };
//! // let x: f64 = unsafe { x_reader.read_f64(row_idx) };
//! ```
//!
//! # Example: Writing a LIST column
//!
//! ```rust,no_run
//! use quack_rs::vector::{VectorWriter, complex::ListVector};
//! use libduckdb_sys::{duckdb_data_chunk_get_vector, duckdb_data_chunk};
//!
//! // Inside a scan callback:
//! // let list_vec = unsafe { duckdb_data_chunk_get_vector(output, 0) };
//! // // Write 3 elements for row 0: [10, 20, 30]
//! // ListVector::reserve(list_vec, 3);
//! // ListVector::set_size(list_vec, 3);
//! // // Write the list offset/length entry for row 0.
//! // ListVector::set_entry(list_vec, 0, 0, 3); // row=0, offset=0, length=3
//! // // Write values into the child vector.
//! // let child = ListVector::get_child(list_vec);
//! // let mut writer = unsafe { VectorWriter::from_vector(child) };
//! // unsafe { writer.write_i64(0, 10); writer.write_i64(1, 20); writer.write_i64(2, 30); }
//! ```

use libduckdb_sys::{
    duckdb_array_vector_get_child, duckdb_list_entry, duckdb_list_vector_get_child,
    duckdb_list_vector_get_size, duckdb_list_vector_reserve, duckdb_list_vector_set_size,
    duckdb_struct_vector_get_child, duckdb_vector, duckdb_vector_get_data, idx_t,
};

use crate::vector::{VectorReader, VectorWriter};

// ─── STRUCT ──────────────────────────────────────────────────────────────────

/// Operations on STRUCT vectors (accessing child field vectors).
pub struct StructVector;

impl StructVector {
    /// Returns the child vector for the given field index of a STRUCT vector.
    ///
    /// Field indices correspond to the order of fields in the STRUCT type definition.
    ///
    /// # Safety
    ///
    /// - `vector` must be a valid `DuckDB` STRUCT vector.
    /// - `field_idx` must be a valid field index (0 ≤ `field_idx` < number of struct fields).
    /// - The returned vector is borrowed from `vector` and must not outlive it.
    #[inline]
    #[must_use]
    pub unsafe fn get_child(vector: duckdb_vector, field_idx: usize) -> duckdb_vector {
        // SAFETY: caller guarantees vector is a valid STRUCT vector and field_idx is valid.
        unsafe { duckdb_struct_vector_get_child(vector, field_idx as idx_t) }
    }

    /// Creates a [`VectorReader`] for the given field of a STRUCT vector.
    ///
    /// # Safety
    ///
    /// - `vector` must be a valid `DuckDB` STRUCT vector.
    /// - `field_idx` must be a valid field index.
    /// - `row_count` must match the number of rows in the parent chunk.
    pub unsafe fn field_reader(
        vector: duckdb_vector,
        field_idx: usize,
        row_count: usize,
    ) -> VectorReader {
        let child = unsafe { Self::get_child(vector, field_idx) };
        // SAFETY: child is a valid vector with row_count rows.
        unsafe { VectorReader::from_vector(child, row_count) }
    }

    /// Creates a [`VectorWriter`] for the given field of a STRUCT vector.
    ///
    /// # Safety
    ///
    /// - `vector` must be a valid `DuckDB` STRUCT vector.
    /// - `field_idx` must be a valid field index.
    pub unsafe fn field_writer(vector: duckdb_vector, field_idx: usize) -> VectorWriter {
        let child = unsafe { Self::get_child(vector, field_idx) };
        // SAFETY: child is a valid writable vector.
        unsafe { VectorWriter::from_vector(child) }
    }
}

// ─── LIST ────────────────────────────────────────────────────────────────────

/// Operations on LIST vectors.
///
/// A LIST vector stores a `duckdb_list_entry { offset: u64, length: u64 }` per row
/// in the parent vector, and all element values in a flat child vector.
///
/// # Write workflow
///
/// 1. [`reserve`][ListVector::reserve] — ensure child vector has capacity.
/// 2. Write element values into the child via [`get_child`][ListVector::get_child] + [`VectorWriter`].
/// 3. [`set_size`][ListVector::set_size] — tell `DuckDB` how many elements were written.
/// 4. [`set_entry`][ListVector::set_entry] — write the offset/length for each parent row.
pub struct ListVector;

impl ListVector {
    /// Returns the child vector containing all list elements (flat, across all rows).
    ///
    /// # Safety
    ///
    /// - `vector` must be a valid `DuckDB` LIST vector.
    /// - The returned handle is borrowed from `vector`.
    #[inline]
    #[must_use]
    pub unsafe fn get_child(vector: duckdb_vector) -> duckdb_vector {
        // SAFETY: caller guarantees vector is a valid LIST vector.
        unsafe { duckdb_list_vector_get_child(vector) }
    }

    /// Returns the total number of elements currently in the child vector.
    ///
    /// # Safety
    ///
    /// `vector` must be a valid `DuckDB` LIST vector.
    #[inline]
    #[must_use]
    pub unsafe fn get_size(vector: duckdb_vector) -> usize {
        usize::try_from(unsafe { duckdb_list_vector_get_size(vector) }).unwrap_or(0)
    }

    /// Sets the number of elements in the child vector.
    ///
    /// Call after writing all element values. `DuckDB` uses this to know how many
    /// child elements are valid.
    ///
    /// # Safety
    ///
    /// - `vector` must be a valid `DuckDB` LIST vector.
    /// - `size` must equal the number of elements written into the child vector.
    #[inline]
    pub unsafe fn set_size(vector: duckdb_vector, size: usize) {
        // SAFETY: caller guarantees vector is valid.
        unsafe { duckdb_list_vector_set_size(vector, size as idx_t) };
    }

    /// Reserves capacity in the child vector for at least `capacity` elements.
    ///
    /// Call before writing elements to ensure the child vector has enough space.
    ///
    /// # Safety
    ///
    /// `vector` must be a valid `DuckDB` LIST vector.
    #[inline]
    pub unsafe fn reserve(vector: duckdb_vector, capacity: usize) {
        // SAFETY: caller guarantees vector is valid.
        unsafe { duckdb_list_vector_reserve(vector, capacity as idx_t) };
    }

    /// Writes the offset/length metadata entry for a parent row.
    ///
    /// This tells `DuckDB` where in the flat child vector this row's elements start
    /// and how many elements it has.
    ///
    /// # Safety
    ///
    /// - `vector` must be a valid `DuckDB` LIST vector.
    /// - `row_idx` must be a valid row index in the parent vector.
    /// - `offset + length` must not exceed the size of the child vector.
    pub unsafe fn set_entry(vector: duckdb_vector, row_idx: usize, offset: u64, length: u64) {
        // SAFETY: vector is valid; we write to the parent vector's data at row_idx.
        let data = unsafe { duckdb_vector_get_data(vector) };
        // The parent stores duckdb_list_entry per row. Each entry is { offset: u64, length: u64 }.
        let entry_ptr = unsafe { data.cast::<duckdb_list_entry>().add(row_idx) };
        // SAFETY: entry_ptr is in bounds for the allocated vector.
        unsafe {
            (*entry_ptr).offset = offset;
            (*entry_ptr).length = length;
        }
    }

    /// Returns the `duckdb_list_entry` for a given row (for reading).
    ///
    /// # Safety
    ///
    /// - `vector` must be a valid `DuckDB` LIST vector.
    /// - `row_idx` must be a valid row index.
    #[must_use]
    pub unsafe fn get_entry(vector: duckdb_vector, row_idx: usize) -> duckdb_list_entry {
        let data = unsafe { duckdb_vector_get_data(vector) };
        let entry_ptr = unsafe { data.cast::<duckdb_list_entry>().add(row_idx) };
        // SAFETY: entry_ptr is valid and initialized by DuckDB or a prior set_entry call.
        unsafe { core::ptr::read_unaligned(entry_ptr) }
    }

    /// Creates a [`VectorWriter`] for the child vector (elements).
    ///
    /// # Safety
    ///
    /// - `vector` must be a valid `DuckDB` LIST vector.
    /// - The child must have been reserved with at least `capacity` elements.
    pub unsafe fn child_writer(vector: duckdb_vector) -> VectorWriter {
        let child = unsafe { Self::get_child(vector) };
        unsafe { VectorWriter::from_vector(child) }
    }

    /// Creates a [`VectorReader`] for the child vector (reading list elements).
    ///
    /// # Safety
    ///
    /// - `vector` must be a valid `DuckDB` LIST vector.
    /// - `element_count` must equal the total number of elements in the child.
    pub unsafe fn child_reader(vector: duckdb_vector, element_count: usize) -> VectorReader {
        let child = unsafe { Self::get_child(vector) };
        unsafe { VectorReader::from_vector(child, element_count) }
    }
}

// ─── MAP ─────────────────────────────────────────────────────────────────────

/// Operations on MAP vectors.
///
/// `DuckDB` stores maps as `LIST<STRUCT{key: K, value: V}>`.
/// The child of the list vector is a STRUCT vector with two fields:
/// - field index 0: keys
/// - field index 1: values
///
/// # Example
///
/// ```rust,no_run
/// use quack_rs::vector::complex::MapVector;
/// use libduckdb_sys::duckdb_vector;
///
/// // Reading MAP keys from a MAP vector:
/// // let keys_vec = unsafe { MapVector::keys(map_vector) };
/// // let vals_vec = unsafe { MapVector::values(map_vector) };
/// ```
pub struct MapVector;

impl MapVector {
    /// Returns the child STRUCT vector (contains both keys and values as fields).
    ///
    /// # Safety
    ///
    /// `vector` must be a valid `DuckDB` MAP vector.
    #[inline]
    #[must_use]
    pub unsafe fn struct_child(vector: duckdb_vector) -> duckdb_vector {
        // MAP is LIST<STRUCT{key,value}>, so the list child is a STRUCT vector.
        unsafe { duckdb_list_vector_get_child(vector) }
    }

    /// Returns the keys vector (STRUCT field 0 of the MAP's child).
    ///
    /// # Safety
    ///
    /// `vector` must be a valid `DuckDB` MAP vector.
    #[inline]
    #[must_use]
    pub unsafe fn keys(vector: duckdb_vector) -> duckdb_vector {
        let struct_vec = unsafe { Self::struct_child(vector) };
        // SAFETY: MAP child STRUCT always has key at field 0, value at field 1.
        unsafe { duckdb_struct_vector_get_child(struct_vec, 0) }
    }

    /// Returns the values vector (STRUCT field 1 of the MAP's child).
    ///
    /// # Safety
    ///
    /// `vector` must be a valid `DuckDB` MAP vector.
    #[inline]
    #[must_use]
    pub unsafe fn values(vector: duckdb_vector) -> duckdb_vector {
        let struct_vec = unsafe { Self::struct_child(vector) };
        // SAFETY: MAP child STRUCT always has key at field 0, value at field 1.
        unsafe { duckdb_struct_vector_get_child(struct_vec, 1) }
    }

    /// Returns the total number of key-value pairs across all rows.
    ///
    /// # Safety
    ///
    /// `vector` must be a valid `DuckDB` MAP vector.
    #[inline]
    #[must_use]
    pub unsafe fn total_entry_count(vector: duckdb_vector) -> usize {
        usize::try_from(unsafe { duckdb_list_vector_get_size(vector) }).unwrap_or(0)
    }

    /// Reserves capacity in the MAP's child vector for at least `capacity` entries.
    ///
    /// # Safety
    ///
    /// `vector` must be a valid `DuckDB` MAP vector.
    #[inline]
    pub unsafe fn reserve(vector: duckdb_vector, capacity: usize) {
        unsafe { duckdb_list_vector_reserve(vector, capacity as idx_t) };
    }

    /// Sets the total number of key-value entries written.
    ///
    /// # Safety
    ///
    /// `vector` must be a valid `DuckDB` MAP vector.
    #[inline]
    pub unsafe fn set_size(vector: duckdb_vector, size: usize) {
        unsafe { duckdb_list_vector_set_size(vector, size as idx_t) };
    }

    /// Writes the offset/length metadata for a parent MAP row.
    ///
    /// This has the same semantics as [`ListVector::set_entry`], since MAP is a LIST.
    ///
    /// # Safety
    ///
    /// Same as [`ListVector::set_entry`].
    #[inline]
    pub unsafe fn set_entry(vector: duckdb_vector, row_idx: usize, offset: u64, length: u64) {
        // SAFETY: same layout as ListVector.
        unsafe { ListVector::set_entry(vector, row_idx, offset, length) };
    }

    /// Returns the `duckdb_list_entry` for a given MAP row (for reading).
    ///
    /// # Safety
    ///
    /// Same as [`ListVector::get_entry`].
    #[must_use]
    pub unsafe fn get_entry(vector: duckdb_vector, row_idx: usize) -> duckdb_list_entry {
        unsafe { ListVector::get_entry(vector, row_idx) }
    }

    /// Creates a [`VectorWriter`] for the keys vector (STRUCT field 0).
    ///
    /// # Safety
    ///
    /// `vector` must be a valid `DuckDB` MAP vector.
    pub unsafe fn key_writer(vector: duckdb_vector) -> VectorWriter {
        let keys = unsafe { Self::keys(vector) };
        // SAFETY: keys is a valid writable child vector.
        unsafe { VectorWriter::from_vector(keys) }
    }

    /// Creates a [`VectorWriter`] for the values vector (STRUCT field 1).
    ///
    /// # Safety
    ///
    /// `vector` must be a valid `DuckDB` MAP vector.
    pub unsafe fn value_writer(vector: duckdb_vector) -> VectorWriter {
        let vals = unsafe { Self::values(vector) };
        // SAFETY: vals is a valid writable child vector.
        unsafe { VectorWriter::from_vector(vals) }
    }

    /// Creates a [`VectorReader`] for the keys vector.
    ///
    /// # Safety
    ///
    /// - `vector` must be a valid `DuckDB` MAP vector.
    /// - `element_count` must equal the total number of key-value entries.
    pub unsafe fn key_reader(vector: duckdb_vector, element_count: usize) -> VectorReader {
        let keys = unsafe { Self::keys(vector) };
        // SAFETY: keys is a valid vector with element_count elements.
        unsafe { VectorReader::from_vector(keys, element_count) }
    }

    /// Creates a [`VectorReader`] for the values vector.
    ///
    /// # Safety
    ///
    /// - `vector` must be a valid `DuckDB` MAP vector.
    /// - `element_count` must equal the total number of key-value entries.
    pub unsafe fn value_reader(vector: duckdb_vector, element_count: usize) -> VectorReader {
        let vals = unsafe { Self::values(vector) };
        // SAFETY: vals is a valid vector with element_count elements.
        unsafe { VectorReader::from_vector(vals, element_count) }
    }
}

// ─── ARRAY ──────────────────────────────────────────────────────────────────

/// Helpers for working with `ARRAY` vectors (fixed-size arrays).
pub struct ArrayVector;

impl ArrayVector {
    /// Returns the child vector of an array vector.
    ///
    /// # Safety
    ///
    /// - `vector` must be a valid `DuckDB` ARRAY vector.
    /// - The returned handle is borrowed from `vector` and must not outlive it.
    #[inline]
    #[must_use]
    pub unsafe fn get_child(vector: duckdb_vector) -> duckdb_vector {
        unsafe { duckdb_array_vector_get_child(vector) }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use libduckdb_sys::duckdb_list_entry;

    #[test]
    fn list_entry_layout() {
        // Verify duckdb_list_entry has the expected size (2 × u64 = 16 bytes).
        assert_eq!(
            core::mem::size_of::<duckdb_list_entry>(),
            16,
            "duckdb_list_entry should be {{ offset: u64, length: u64 }}"
        );
    }

    #[test]
    fn set_and_get_list_entry() {
        // Simulate the list parent vector data buffer (one row).
        let mut data = duckdb_list_entry {
            offset: 0,
            length: 0,
        };
        let vec_ptr: duckdb_vector = std::ptr::addr_of_mut!(data).cast();

        // Write entry for row 0: offset=5, length=3.
        // We bypass the actual DuckDB call and test the pointer arithmetic directly.
        let entry_ptr = std::ptr::addr_of_mut!(data);
        unsafe {
            (*entry_ptr).offset = 5;
            (*entry_ptr).length = 3;
        }
        assert_eq!(data.offset, 5);
        assert_eq!(data.length, 3);
        let _ = vec_ptr; // suppress unused warning; no FFI call possible without runtime
    }
}