quack_rs/data_chunk.rs
1// SPDX-License-Identifier: MIT
2// Copyright 2026 Tom F. <https://github.com/tomtom215/>
3// My way of giving something small back to the open source community
4// and encouraging more Rust development!
5
6//! Ergonomic wrapper around `DuckDB` data chunks.
7//!
8//! [`DataChunk`] provides safe, convenient access to the vectors and metadata
9//! of a `duckdb_data_chunk`, eliminating the raw FFI calls that extension
10//! authors currently need to write in every scan callback.
11//!
12//! # Example
13//!
14//! ```rust,no_run
15//! use quack_rs::data_chunk::DataChunk;
16//! use quack_rs::vector::{VectorWriter, VectorReader};
17//! use libduckdb_sys::{duckdb_function_info, duckdb_data_chunk};
18//!
19//! unsafe extern "C" fn my_scan(info: duckdb_function_info, output: duckdb_data_chunk) {
20//! let chunk = unsafe { DataChunk::from_raw(output) };
21//! let mut writer = unsafe { chunk.writer(0) };
22//! unsafe { writer.write_i64(0, 42) };
23//! unsafe { chunk.set_size(1) };
24//! }
25//! ```
26
27use libduckdb_sys::{
28 duckdb_data_chunk, duckdb_data_chunk_get_column_count, duckdb_data_chunk_get_size,
29 duckdb_data_chunk_get_vector, duckdb_data_chunk_set_size, duckdb_vector, idx_t,
30};
31
32use crate::chunk_writer::ChunkWriter;
33use crate::vector::complex::StructVector;
34use crate::vector::{StructReader, StructWriter, VectorReader, VectorWriter};
35
36/// A non-owning wrapper around a `duckdb_data_chunk`.
37///
38/// This wrapper does **not** destroy the chunk on drop — `DuckDB` owns the
39/// chunk and manages its lifetime. `DataChunk` simply provides ergonomic
40/// methods for accessing vectors and metadata within callback functions.
41pub struct DataChunk {
42 raw: duckdb_data_chunk,
43}
44
45impl DataChunk {
46 /// Wraps a raw `duckdb_data_chunk` handle.
47 ///
48 /// # Safety
49 ///
50 /// `raw` must be a valid `duckdb_data_chunk` obtained from a `DuckDB`
51 /// callback (e.g., a scan callback's `output` parameter or an aggregate
52 /// `update` callback's `input` chunk). The chunk must remain valid for
53 /// the lifetime of this wrapper.
54 #[inline]
55 #[must_use]
56 pub const unsafe fn from_raw(raw: duckdb_data_chunk) -> Self {
57 Self { raw }
58 }
59
60 /// Returns the number of rows in this data chunk.
61 #[inline]
62 #[must_use]
63 pub fn size(&self) -> usize {
64 // SAFETY: self.raw is valid per constructor contract.
65 usize::try_from(unsafe { duckdb_data_chunk_get_size(self.raw) }).unwrap_or(0)
66 }
67
68 /// Sets the number of rows in this data chunk.
69 ///
70 /// Call this in scan callbacks after writing output rows. Set to `0` to
71 /// signal end of stream.
72 ///
73 /// # Safety
74 ///
75 /// `size` must not exceed the chunk's capacity (typically 2048).
76 #[inline]
77 pub unsafe fn set_size(&self, size: usize) {
78 // SAFETY: self.raw is valid per constructor contract.
79 unsafe { duckdb_data_chunk_set_size(self.raw, size as idx_t) };
80 }
81
82 /// Returns the number of columns in this data chunk.
83 #[inline]
84 #[must_use]
85 pub fn column_count(&self) -> usize {
86 // SAFETY: self.raw is valid per constructor contract.
87 usize::try_from(unsafe { duckdb_data_chunk_get_column_count(self.raw) }).unwrap_or(0)
88 }
89
90 /// Returns the raw `duckdb_vector` handle for the given column index.
91 ///
92 /// # Safety
93 ///
94 /// `col_idx` must be less than [`column_count`][DataChunk::column_count].
95 #[inline]
96 #[must_use]
97 pub unsafe fn vector(&self, col_idx: usize) -> duckdb_vector {
98 // SAFETY: self.raw is valid and col_idx is in bounds per caller's contract.
99 unsafe { duckdb_data_chunk_get_vector(self.raw, col_idx as idx_t) }
100 }
101
102 /// Creates a [`VectorWriter`] for the given column index.
103 ///
104 /// # Safety
105 ///
106 /// - `col_idx` must be less than [`column_count`][DataChunk::column_count].
107 /// - The chunk must be a writable output chunk (not a read-only input chunk).
108 pub unsafe fn writer(&self, col_idx: usize) -> VectorWriter {
109 let vec = unsafe { self.vector(col_idx) };
110 // SAFETY: vec is a valid writable vector from the output chunk.
111 unsafe { VectorWriter::from_vector(vec) }
112 }
113
114 /// Creates a [`VectorReader`] for the given column index.
115 ///
116 /// The reader's row count is set to this chunk's current [`size`][DataChunk::size].
117 ///
118 /// # Safety
119 ///
120 /// `col_idx` must be less than [`column_count`][DataChunk::column_count].
121 pub unsafe fn reader(&self, col_idx: usize) -> VectorReader {
122 // SAFETY: self.raw is valid; col_idx is in bounds per caller's contract.
123 unsafe { VectorReader::new(self.raw, col_idx) }
124 }
125
126 /// Creates a [`StructReader`] for a STRUCT column at the given index.
127 ///
128 /// This is a convenience method that combines [`vector`][Self::vector] with
129 /// [`StructReader::new`].
130 ///
131 /// # Safety
132 ///
133 /// - `col_idx` must be less than [`column_count`][Self::column_count].
134 /// - The column at `col_idx` must have a STRUCT type with `field_count` fields.
135 pub unsafe fn struct_reader(&self, col_idx: usize, field_count: usize) -> StructReader {
136 let vec = unsafe { self.vector(col_idx) };
137 // SAFETY: vec is a valid STRUCT vector per caller's contract.
138 unsafe { StructReader::new(vec, field_count, self.size()) }
139 }
140
141 /// Creates a [`VectorReader`] for a field of a STRUCT column.
142 ///
143 /// Convenience for accessing a specific field in a STRUCT input column.
144 ///
145 /// # Safety
146 ///
147 /// - `col_idx` must be less than [`column_count`][Self::column_count].
148 /// - The column at `col_idx` must have a STRUCT type.
149 /// - `field_idx` must be a valid field index within the STRUCT.
150 pub unsafe fn struct_field_reader(&self, col_idx: usize, field_idx: usize) -> VectorReader {
151 let vec = unsafe { self.vector(col_idx) };
152 // SAFETY: vec is a valid STRUCT vector per caller's contract.
153 unsafe { StructVector::field_reader(vec, field_idx, self.size()) }
154 }
155
156 /// Creates a [`StructWriter`] for a STRUCT column at the given index.
157 ///
158 /// This is a convenience method that combines [`vector`][Self::vector] with
159 /// [`StructWriter::new`].
160 ///
161 /// # Safety
162 ///
163 /// - `col_idx` must be less than [`column_count`][Self::column_count].
164 /// - The column at `col_idx` must have a STRUCT type with `field_count` fields.
165 /// - The chunk must be a writable output chunk.
166 pub unsafe fn struct_writer(&self, col_idx: usize, field_count: usize) -> StructWriter {
167 let vec = unsafe { self.vector(col_idx) };
168 // SAFETY: vec is a valid STRUCT vector per caller's contract.
169 unsafe { StructWriter::new(vec, field_count) }
170 }
171
172 /// Creates a [`ChunkWriter`] for this output data chunk.
173 ///
174 /// The [`ChunkWriter`] tracks rows via [`next_row()`][ChunkWriter::next_row]
175 /// and automatically calls `set_size` on drop.
176 ///
177 /// # Safety
178 ///
179 /// This chunk must be a valid, writable output chunk from a table function
180 /// scan callback.
181 pub const unsafe fn into_chunk_writer(self) -> ChunkWriter {
182 // SAFETY: self.raw is valid per constructor's contract.
183 unsafe { ChunkWriter::new(self.raw) }
184 }
185
186 /// Returns the raw `duckdb_data_chunk` handle.
187 #[inline]
188 #[must_use]
189 pub const fn as_raw(&self) -> duckdb_data_chunk {
190 self.raw
191 }
192}
193
194#[cfg(test)]
195mod tests {
196 use super::*;
197
198 #[test]
199 fn size_of_data_chunk() {
200 assert_eq!(
201 std::mem::size_of::<DataChunk>(),
202 std::mem::size_of::<usize>()
203 );
204 }
205}