duckdb/core/
vector.rs

1use std::{any::Any, ffi::CString, slice};
2
3use libduckdb_sys::{
4    duckdb_array_type_array_size, duckdb_array_vector_get_child, duckdb_validity_row_is_valid, DuckDbString,
5};
6
7use super::LogicalTypeHandle;
8use crate::ffi::{
9    duckdb_list_entry, duckdb_list_vector_get_child, duckdb_list_vector_get_size, duckdb_list_vector_reserve,
10    duckdb_list_vector_set_size, duckdb_struct_type_child_count, duckdb_struct_type_child_name,
11    duckdb_struct_vector_get_child, duckdb_validity_set_row_invalid, duckdb_vector,
12    duckdb_vector_assign_string_element, duckdb_vector_assign_string_element_len,
13    duckdb_vector_ensure_validity_writable, duckdb_vector_get_column_type, duckdb_vector_get_data,
14    duckdb_vector_get_validity, duckdb_vector_size,
15};
16
17/// Vector trait.
18pub trait Vector {
19    /// Returns a reference to the underlying Any type that this trait object
20    fn as_any(&self) -> &dyn Any;
21    /// Returns a mutable reference to the underlying Any type that this trait object
22    fn as_mut_any(&mut self) -> &mut dyn Any;
23}
24
25/// A flat vector
26pub struct FlatVector {
27    ptr: duckdb_vector,
28    capacity: usize,
29}
30
31impl From<duckdb_vector> for FlatVector {
32    fn from(ptr: duckdb_vector) -> Self {
33        Self {
34            ptr,
35            capacity: unsafe { duckdb_vector_size() as usize },
36        }
37    }
38}
39
40impl Vector for FlatVector {
41    fn as_any(&self) -> &dyn Any {
42        self
43    }
44
45    fn as_mut_any(&mut self) -> &mut dyn Any {
46        self
47    }
48}
49
50impl FlatVector {
51    fn with_capacity(ptr: duckdb_vector, capacity: usize) -> Self {
52        Self { ptr, capacity }
53    }
54
55    /// Returns the capacity of the vector
56    pub fn capacity(&self) -> usize {
57        self.capacity
58    }
59
60    /// Returns true if the row at the given index is null
61    pub fn row_is_null(&self, row: u64) -> bool {
62        // use idx_t entry_idx = row_idx / 64; idx_t idx_in_entry = row_idx % 64; bool is_valid = validity_mask[entry_idx] & (1 « idx_in_entry);
63        // as the row is valid function is slower
64        let valid = unsafe {
65            let validity = duckdb_vector_get_validity(self.ptr);
66
67            // validity can return a NULL pointer if the entire vector is valid
68            if validity.is_null() {
69                return false;
70            }
71
72            duckdb_validity_row_is_valid(validity, row)
73        };
74
75        !valid
76    }
77
78    /// Returns an unsafe mutable pointer to the vector’s
79    pub fn as_mut_ptr<T>(&self) -> *mut T {
80        unsafe { duckdb_vector_get_data(self.ptr).cast() }
81    }
82
83    /// Returns a slice of the vector
84    pub fn as_slice<T>(&self) -> &[T] {
85        unsafe { slice::from_raw_parts(self.as_mut_ptr(), self.capacity()) }
86    }
87
88    /// Returns a slice of the vector up to a certain length
89    pub fn as_slice_with_len<T>(&self, len: usize) -> &[T] {
90        unsafe { slice::from_raw_parts(self.as_mut_ptr(), len) }
91    }
92
93    /// Returns a mutable slice of the vector
94    pub fn as_mut_slice<T>(&mut self) -> &mut [T] {
95        unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), self.capacity()) }
96    }
97
98    /// Returns a mutable slice of the vector up to a certain length
99    pub fn as_mut_slice_with_len<T>(&mut self, len: usize) -> &mut [T] {
100        unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), len) }
101    }
102
103    /// Returns the logical type of the vector
104    pub fn logical_type(&self) -> LogicalTypeHandle {
105        unsafe { LogicalTypeHandle::new(duckdb_vector_get_column_type(self.ptr)) }
106    }
107
108    /// Set row as null
109    pub fn set_null(&mut self, row: usize) {
110        unsafe {
111            duckdb_vector_ensure_validity_writable(self.ptr);
112            let idx = duckdb_vector_get_validity(self.ptr);
113            duckdb_validity_set_row_invalid(idx, row as u64);
114        }
115    }
116
117    /// Copy data to the vector.
118    pub fn copy<T: Copy>(&mut self, data: &[T]) {
119        assert!(data.len() <= self.capacity());
120        self.as_mut_slice::<T>()[0..data.len()].copy_from_slice(data);
121    }
122}
123
124/// A trait for inserting data into a vector.
125pub trait Inserter<T> {
126    /// Insert a value into the vector.
127    fn insert(&self, index: usize, value: T);
128}
129
130impl Inserter<CString> for FlatVector {
131    fn insert(&self, index: usize, value: CString) {
132        unsafe {
133            duckdb_vector_assign_string_element(self.ptr, index as u64, value.as_ptr());
134        }
135    }
136}
137
138impl Inserter<&str> for FlatVector {
139    fn insert(&self, index: usize, value: &str) {
140        let cstr = CString::new(value.as_bytes()).unwrap();
141        unsafe {
142            duckdb_vector_assign_string_element(self.ptr, index as u64, cstr.as_ptr());
143        }
144    }
145}
146
147impl Inserter<&[u8]> for FlatVector {
148    fn insert(&self, index: usize, value: &[u8]) {
149        let value_size = value.len();
150        unsafe {
151            // This function also works for binary data. https://duckdb.org/docs/api/c/api#duckdb_vector_assign_string_element_len
152            duckdb_vector_assign_string_element_len(
153                self.ptr,
154                index as u64,
155                value.as_ptr() as *const ::std::os::raw::c_char,
156                value_size as u64,
157            );
158        }
159    }
160}
161
162/// A list vector.
163pub struct ListVector {
164    /// ListVector does not own the vector pointer.
165    entries: FlatVector,
166}
167
168impl From<duckdb_vector> for ListVector {
169    fn from(ptr: duckdb_vector) -> Self {
170        Self {
171            entries: FlatVector::from(ptr),
172        }
173    }
174}
175
176impl ListVector {
177    /// Returns the number of entries in the list vector.
178    pub fn len(&self) -> usize {
179        unsafe { duckdb_list_vector_get_size(self.entries.ptr) as usize }
180    }
181
182    /// Returns true if the list vector is empty.
183    pub fn is_empty(&self) -> bool {
184        self.len() == 0
185    }
186
187    /// Returns the child vector.
188    // TODO: not ideal interface. Where should we keep capacity.
189    pub fn child(&self, capacity: usize) -> FlatVector {
190        self.reserve(capacity);
191        FlatVector::with_capacity(unsafe { duckdb_list_vector_get_child(self.entries.ptr) }, capacity)
192    }
193
194    /// Take the child as [StructVector].
195    pub fn struct_child(&self, capacity: usize) -> StructVector {
196        self.reserve(capacity);
197        StructVector::from(unsafe { duckdb_list_vector_get_child(self.entries.ptr) })
198    }
199
200    /// Take the child as [ArrayVector].
201    pub fn array_child(&self) -> ArrayVector {
202        ArrayVector::from(unsafe { duckdb_list_vector_get_child(self.entries.ptr) })
203    }
204
205    /// Take the child as [ListVector].
206    pub fn list_child(&self) -> ListVector {
207        ListVector::from(unsafe { duckdb_list_vector_get_child(self.entries.ptr) })
208    }
209
210    /// Set primitive data to the child node.
211    pub fn set_child<T: Copy>(&self, data: &[T]) {
212        self.child(data.len()).copy(data);
213        self.set_len(data.len());
214    }
215
216    /// Set offset and length to the entry.
217    pub fn set_entry(&mut self, idx: usize, offset: usize, length: usize) {
218        self.entries.as_mut_slice::<duckdb_list_entry>()[idx].offset = offset as u64;
219        self.entries.as_mut_slice::<duckdb_list_entry>()[idx].length = length as u64;
220    }
221
222    /// Set row as null
223    pub fn set_null(&mut self, row: usize) {
224        unsafe {
225            duckdb_vector_ensure_validity_writable(self.entries.ptr);
226            let idx = duckdb_vector_get_validity(self.entries.ptr);
227            duckdb_validity_set_row_invalid(idx, row as u64);
228        }
229    }
230
231    /// Reserve the capacity for its child node.
232    fn reserve(&self, capacity: usize) {
233        unsafe {
234            duckdb_list_vector_reserve(self.entries.ptr, capacity as u64);
235        }
236    }
237
238    /// Set the length of the list vector.
239    pub fn set_len(&self, new_len: usize) {
240        unsafe {
241            duckdb_list_vector_set_size(self.entries.ptr, new_len as u64);
242        }
243    }
244}
245
246/// A array vector. (fixed-size list)
247pub struct ArrayVector {
248    ptr: duckdb_vector,
249}
250
251impl From<duckdb_vector> for ArrayVector {
252    fn from(ptr: duckdb_vector) -> Self {
253        Self { ptr }
254    }
255}
256
257impl ArrayVector {
258    /// Get the logical type of this ArrayVector.
259    pub fn logical_type(&self) -> LogicalTypeHandle {
260        unsafe { LogicalTypeHandle::new(duckdb_vector_get_column_type(self.ptr)) }
261    }
262
263    /// Returns the size of the array type.
264    pub fn get_array_size(&self) -> u64 {
265        let ty = self.logical_type();
266        unsafe { duckdb_array_type_array_size(ty.ptr) as u64 }
267    }
268
269    /// Returns the child vector.
270    /// capacity should be a multiple of the array size.
271    // TODO: not ideal interface. Where should we keep count.
272    pub fn child(&self, capacity: usize) -> FlatVector {
273        FlatVector::with_capacity(unsafe { duckdb_array_vector_get_child(self.ptr) }, capacity)
274    }
275
276    /// Set primitive data to the child node.
277    pub fn set_child<T: Copy>(&self, data: &[T]) {
278        self.child(data.len()).copy(data);
279    }
280
281    /// Set row as null
282    pub fn set_null(&mut self, row: usize) {
283        unsafe {
284            duckdb_vector_ensure_validity_writable(self.ptr);
285            let idx = duckdb_vector_get_validity(self.ptr);
286            duckdb_validity_set_row_invalid(idx, row as u64);
287        }
288    }
289}
290
291/// A struct vector.
292pub struct StructVector {
293    ptr: duckdb_vector,
294}
295
296impl From<duckdb_vector> for StructVector {
297    fn from(ptr: duckdb_vector) -> Self {
298        Self { ptr }
299    }
300}
301
302impl StructVector {
303    /// Returns the child by idx in the list vector.
304    pub fn child(&self, idx: usize, capacity: usize) -> FlatVector {
305        FlatVector::with_capacity(
306            unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) },
307            capacity,
308        )
309    }
310
311    /// Take the child as [StructVector].
312    pub fn struct_vector_child(&self, idx: usize) -> StructVector {
313        Self::from(unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) })
314    }
315
316    /// Take the child as [ListVector].
317    pub fn list_vector_child(&self, idx: usize) -> ListVector {
318        ListVector::from(unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) })
319    }
320
321    /// Take the child as [ArrayVector].
322    pub fn array_vector_child(&self, idx: usize) -> ArrayVector {
323        ArrayVector::from(unsafe { duckdb_struct_vector_get_child(self.ptr, idx as u64) })
324    }
325
326    /// Get the logical type of this struct vector.
327    pub fn logical_type(&self) -> LogicalTypeHandle {
328        unsafe { LogicalTypeHandle::new(duckdb_vector_get_column_type(self.ptr)) }
329    }
330
331    /// Get the name of the child by idx.
332    pub fn child_name(&self, idx: usize) -> DuckDbString {
333        let logical_type = self.logical_type();
334        unsafe {
335            let child_name_ptr = duckdb_struct_type_child_name(logical_type.ptr, idx as u64);
336            DuckDbString::from_ptr(child_name_ptr)
337        }
338    }
339
340    /// Get the number of children.
341    pub fn num_children(&self) -> usize {
342        let logical_type = self.logical_type();
343        unsafe { duckdb_struct_type_child_count(logical_type.ptr) as usize }
344    }
345
346    /// Set row as null
347    pub fn set_null(&mut self, row: usize) {
348        unsafe {
349            duckdb_vector_ensure_validity_writable(self.ptr);
350            let idx = duckdb_vector_get_validity(self.ptr);
351            duckdb_validity_set_row_invalid(idx, row as u64);
352        }
353    }
354}