Skip to main content

quack_rs/
data_chunk.rs

1// SPDX-License-Identifier: MIT
2// Copyright 2026 Tom F. <https://github.com/tomtom215/>
3// My way of giving something small back to the open source community
4// and encouraging more Rust development!
5
6//! Ergonomic wrapper around `DuckDB` data chunks.
7//!
8//! [`DataChunk`] provides safe, convenient access to the vectors and metadata
9//! of a `duckdb_data_chunk`, eliminating the raw FFI calls that extension
10//! authors currently need to write in every scan callback.
11//!
12//! # Example
13//!
14//! ```rust,no_run
15//! use quack_rs::data_chunk::DataChunk;
16//! use quack_rs::vector::{VectorWriter, VectorReader};
17//! use libduckdb_sys::{duckdb_function_info, duckdb_data_chunk};
18//!
19//! unsafe extern "C" fn my_scan(info: duckdb_function_info, output: duckdb_data_chunk) {
20//!     let chunk = unsafe { DataChunk::from_raw(output) };
21//!     let mut writer = unsafe { chunk.writer(0) };
22//!     unsafe { writer.write_i64(0, 42) };
23//!     unsafe { chunk.set_size(1) };
24//! }
25//! ```
26
27use libduckdb_sys::{
28    duckdb_data_chunk, duckdb_data_chunk_get_column_count, duckdb_data_chunk_get_size,
29    duckdb_data_chunk_get_vector, duckdb_data_chunk_set_size, duckdb_vector, idx_t,
30};
31
32use crate::chunk_writer::ChunkWriter;
33use crate::vector::complex::StructVector;
34use crate::vector::{StructReader, StructWriter, VectorReader, VectorWriter};
35
36/// A non-owning wrapper around a `duckdb_data_chunk`.
37///
38/// This wrapper does **not** destroy the chunk on drop — `DuckDB` owns the
39/// chunk and manages its lifetime. `DataChunk` simply provides ergonomic
40/// methods for accessing vectors and metadata within callback functions.
41pub struct DataChunk {
42    raw: duckdb_data_chunk,
43}
44
45impl DataChunk {
46    /// Wraps a raw `duckdb_data_chunk` handle.
47    ///
48    /// # Safety
49    ///
50    /// `raw` must be a valid `duckdb_data_chunk` obtained from a `DuckDB`
51    /// callback (e.g., a scan callback's `output` parameter or an aggregate
52    /// `update` callback's `input` chunk). The chunk must remain valid for
53    /// the lifetime of this wrapper.
54    #[inline]
55    #[must_use]
56    pub const unsafe fn from_raw(raw: duckdb_data_chunk) -> Self {
57        Self { raw }
58    }
59
60    /// Returns the number of rows in this data chunk.
61    #[inline]
62    #[must_use]
63    pub fn size(&self) -> usize {
64        // SAFETY: self.raw is valid per constructor contract.
65        usize::try_from(unsafe { duckdb_data_chunk_get_size(self.raw) }).unwrap_or(0)
66    }
67
68    /// Sets the number of rows in this data chunk.
69    ///
70    /// Call this in scan callbacks after writing output rows. Set to `0` to
71    /// signal end of stream.
72    ///
73    /// # Safety
74    ///
75    /// `size` must not exceed the chunk's capacity (typically 2048).
76    #[inline]
77    pub unsafe fn set_size(&self, size: usize) {
78        // SAFETY: self.raw is valid per constructor contract.
79        unsafe { duckdb_data_chunk_set_size(self.raw, size as idx_t) };
80    }
81
82    /// Returns the number of columns in this data chunk.
83    #[inline]
84    #[must_use]
85    pub fn column_count(&self) -> usize {
86        // SAFETY: self.raw is valid per constructor contract.
87        usize::try_from(unsafe { duckdb_data_chunk_get_column_count(self.raw) }).unwrap_or(0)
88    }
89
90    /// Returns the raw `duckdb_vector` handle for the given column index.
91    ///
92    /// # Safety
93    ///
94    /// `col_idx` must be less than [`column_count`][DataChunk::column_count].
95    #[inline]
96    #[must_use]
97    pub unsafe fn vector(&self, col_idx: usize) -> duckdb_vector {
98        // SAFETY: self.raw is valid and col_idx is in bounds per caller's contract.
99        unsafe { duckdb_data_chunk_get_vector(self.raw, col_idx as idx_t) }
100    }
101
102    /// Creates a [`VectorWriter`] for the given column index.
103    ///
104    /// # Safety
105    ///
106    /// - `col_idx` must be less than [`column_count`][DataChunk::column_count].
107    /// - The chunk must be a writable output chunk (not a read-only input chunk).
108    pub unsafe fn writer(&self, col_idx: usize) -> VectorWriter {
109        let vec = unsafe { self.vector(col_idx) };
110        // SAFETY: vec is a valid writable vector from the output chunk.
111        unsafe { VectorWriter::from_vector(vec) }
112    }
113
114    /// Creates a [`VectorReader`] for the given column index.
115    ///
116    /// The reader's row count is set to this chunk's current [`size`][DataChunk::size].
117    ///
118    /// # Safety
119    ///
120    /// `col_idx` must be less than [`column_count`][DataChunk::column_count].
121    pub unsafe fn reader(&self, col_idx: usize) -> VectorReader {
122        // SAFETY: self.raw is valid; col_idx is in bounds per caller's contract.
123        unsafe { VectorReader::new(self.raw, col_idx) }
124    }
125
126    /// Creates a [`StructReader`] for a STRUCT column at the given index.
127    ///
128    /// This is a convenience method that combines [`vector`][Self::vector] with
129    /// [`StructReader::new`].
130    ///
131    /// # Safety
132    ///
133    /// - `col_idx` must be less than [`column_count`][Self::column_count].
134    /// - The column at `col_idx` must have a STRUCT type with `field_count` fields.
135    pub unsafe fn struct_reader(&self, col_idx: usize, field_count: usize) -> StructReader {
136        let vec = unsafe { self.vector(col_idx) };
137        // SAFETY: vec is a valid STRUCT vector per caller's contract.
138        unsafe { StructReader::new(vec, field_count, self.size()) }
139    }
140
141    /// Creates a [`VectorReader`] for a field of a STRUCT column.
142    ///
143    /// Convenience for accessing a specific field in a STRUCT input column.
144    ///
145    /// # Safety
146    ///
147    /// - `col_idx` must be less than [`column_count`][Self::column_count].
148    /// - The column at `col_idx` must have a STRUCT type.
149    /// - `field_idx` must be a valid field index within the STRUCT.
150    pub unsafe fn struct_field_reader(&self, col_idx: usize, field_idx: usize) -> VectorReader {
151        let vec = unsafe { self.vector(col_idx) };
152        // SAFETY: vec is a valid STRUCT vector per caller's contract.
153        unsafe { StructVector::field_reader(vec, field_idx, self.size()) }
154    }
155
156    /// Creates a [`StructWriter`] for a STRUCT column at the given index.
157    ///
158    /// This is a convenience method that combines [`vector`][Self::vector] with
159    /// [`StructWriter::new`].
160    ///
161    /// # Safety
162    ///
163    /// - `col_idx` must be less than [`column_count`][Self::column_count].
164    /// - The column at `col_idx` must have a STRUCT type with `field_count` fields.
165    /// - The chunk must be a writable output chunk.
166    pub unsafe fn struct_writer(&self, col_idx: usize, field_count: usize) -> StructWriter {
167        let vec = unsafe { self.vector(col_idx) };
168        // SAFETY: vec is a valid STRUCT vector per caller's contract.
169        unsafe { StructWriter::new(vec, field_count) }
170    }
171
172    /// Creates a [`ChunkWriter`] for this output data chunk.
173    ///
174    /// The [`ChunkWriter`] tracks rows via [`next_row()`][ChunkWriter::next_row]
175    /// and automatically calls `set_size` on drop.
176    ///
177    /// # Safety
178    ///
179    /// This chunk must be a valid, writable output chunk from a table function
180    /// scan callback.
181    pub const unsafe fn into_chunk_writer(self) -> ChunkWriter {
182        // SAFETY: self.raw is valid per constructor's contract.
183        unsafe { ChunkWriter::new(self.raw) }
184    }
185
186    /// Returns the raw `duckdb_data_chunk` handle.
187    #[inline]
188    #[must_use]
189    pub const fn as_raw(&self) -> duckdb_data_chunk {
190        self.raw
191    }
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197
198    #[test]
199    fn size_of_data_chunk() {
200        assert_eq!(
201            std::mem::size_of::<DataChunk>(),
202            std::mem::size_of::<usize>()
203        );
204    }
205}