duckdb_extension_framework/data_chunk.rs
1use crate::duckly::{
2 duckdb_create_data_chunk, duckdb_data_chunk, duckdb_data_chunk_get_column_count,
3 duckdb_data_chunk_get_size, duckdb_data_chunk_get_vector, duckdb_data_chunk_reset,
4 duckdb_data_chunk_set_size, duckdb_destroy_data_chunk, duckdb_logical_type, idx_t,
5};
6use crate::{LogicalType, Vector};
7
8/// A Data Chunk represents a set of vectors.
9///
10/// The data chunk class is the intermediate representation used by the
11/// execution engine of DuckDB. It effectively represents a subset of a relation.
12/// It holds a set of vectors that all have the same length.
13///
14/// DataChunk is initialized using the DataChunk::Initialize function by
15/// providing it with a vector of TypeIds for the Vector members. By default,
16/// this function will also allocate a chunk of memory in the DataChunk for the
17/// vectors and all the vectors will be referencing vectors to the data owned by
18/// the chunk. The reason for this behavior is that the underlying vectors can
19/// become referencing vectors to other chunks as well (i.e. in the case an
20/// operator does not alter the data, such as a Filter operator which only adds a
21/// selection vector).
22///
23/// In addition to holding the data of the vectors, the DataChunk also owns the
24/// selection vector that underlying vectors can point to.
25#[derive(Debug)]
26pub struct DataChunk {
27 ptr: duckdb_data_chunk,
28 owned: bool,
29}
30
31impl DataChunk {
32 /// Creates an empty DataChunk with the specified set of types.
33 ///
34 /// # Arguments
35 /// - `types`: An array of types of the data chunk.
36 pub fn new(types: Vec<LogicalType>) -> Self {
37 let types: Vec<duckdb_logical_type> = types.iter().map(|x| x.typ).collect();
38 let mut types = types.into_boxed_slice();
39
40 let ptr = unsafe {
41 duckdb_create_data_chunk(types.as_mut_ptr(), types.len().try_into().unwrap())
42 };
43
44 Self { ptr, owned: true }
45 }
46
47 /// Retrieves the vector at the specified column index in the data chunk.
48 ///
49 /// The pointer to the vector is valid for as long as the chunk is alive.
50 /// It does NOT need to be destroyed.
51 ///
52 pub fn get_vector<T>(&self, column_index: idx_t) -> Vector<T> {
53 Vector::from(unsafe { duckdb_data_chunk_get_vector(self.ptr, column_index) })
54 }
55 /// Sets the current number of tuples in a data chunk.
56 pub fn set_size(&self, size: idx_t) {
57 unsafe { duckdb_data_chunk_set_size(self.ptr, size) };
58 }
59 /// Resets a data chunk, clearing the validity masks and setting the cardinality of the data chunk to 0.
60 pub fn reset(&self) {
61 unsafe { duckdb_data_chunk_reset(self.ptr) }
62 }
63 /// Retrieves the number of columns in a data chunk.
64 pub fn get_column_count(&self) -> idx_t {
65 unsafe { duckdb_data_chunk_get_column_count(self.ptr) }
66 }
67 /// Retrieves the current number of tuples in a data chunk.
68 pub fn get_size(&self) -> idx_t {
69 unsafe { duckdb_data_chunk_get_size(self.ptr) }
70 }
71}
72
73impl From<duckdb_data_chunk> for DataChunk {
74 fn from(ptr: duckdb_data_chunk) -> Self {
75 Self { ptr, owned: false }
76 }
77}
78
79impl Drop for DataChunk {
80 fn drop(&mut self) {
81 if self.owned {
82 unsafe { duckdb_destroy_data_chunk(&mut self.ptr) };
83 }
84 }
85}
86
87#[cfg(test)]
88mod test {
89 use crate::{DataChunk, LogicalType};
90
91 #[test]
92 fn test_data_chunk_construction() {
93 let dc = DataChunk::new(vec![LogicalType::new(
94 crate::constants::LogicalTypeId::Integer,
95 )]);
96
97 assert_eq!(dc.get_column_count(), 1);
98
99 drop(dc);
100 }
101}