duckdb_extension_framework/
data_chunk.rs

1use crate::duckly::{
2    duckdb_create_data_chunk, duckdb_data_chunk, duckdb_data_chunk_get_column_count,
3    duckdb_data_chunk_get_size, duckdb_data_chunk_get_vector, duckdb_data_chunk_reset,
4    duckdb_data_chunk_set_size, duckdb_destroy_data_chunk, duckdb_logical_type, idx_t,
5};
6use crate::{LogicalType, Vector};
7
8/// A Data Chunk represents a set of vectors.
9///
10/// The data chunk class is the intermediate representation used by the
11/// execution engine of DuckDB. It effectively represents a subset of a relation.
12/// It holds a set of vectors that all have the same length.
13///
14/// DataChunk is initialized using the DataChunk::Initialize function by
15/// providing it with a vector of TypeIds for the Vector members. By default,
16/// this function will also allocate a chunk of memory in the DataChunk for the
17/// vectors and all the vectors will be referencing vectors to the data owned by
18/// the chunk. The reason for this behavior is that the underlying vectors can
19/// become referencing vectors to other chunks as well (i.e. in the case an
20/// operator does not alter the data, such as a Filter operator which only adds a
21/// selection vector).
22///
23/// In addition to holding the data of the vectors, the DataChunk also owns the
24/// selection vector that underlying vectors can point to.
25#[derive(Debug)]
26pub struct DataChunk {
27    ptr: duckdb_data_chunk,
28    owned: bool,
29}
30
31impl DataChunk {
32    /// Creates an empty DataChunk with the specified set of types.
33    ///
34    /// # Arguments
35    /// - `types`: An array of types of the data chunk.
36    pub fn new(types: Vec<LogicalType>) -> Self {
37        let types: Vec<duckdb_logical_type> = types.iter().map(|x| x.typ).collect();
38        let mut types = types.into_boxed_slice();
39
40        let ptr = unsafe {
41            duckdb_create_data_chunk(types.as_mut_ptr(), types.len().try_into().unwrap())
42        };
43
44        Self { ptr, owned: true }
45    }
46
47    /// Retrieves the vector at the specified column index in the data chunk.
48    ///
49    /// The pointer to the vector is valid for as long as the chunk is alive.
50    /// It does NOT need to be destroyed.
51    ///
52    pub fn get_vector<T>(&self, column_index: idx_t) -> Vector<T> {
53        Vector::from(unsafe { duckdb_data_chunk_get_vector(self.ptr, column_index) })
54    }
55    /// Sets the current number of tuples in a data chunk.
56    pub fn set_size(&self, size: idx_t) {
57        unsafe { duckdb_data_chunk_set_size(self.ptr, size) };
58    }
59    /// Resets a data chunk, clearing the validity masks and setting the cardinality of the data chunk to 0.
60    pub fn reset(&self) {
61        unsafe { duckdb_data_chunk_reset(self.ptr) }
62    }
63    /// Retrieves the number of columns in a data chunk.
64    pub fn get_column_count(&self) -> idx_t {
65        unsafe { duckdb_data_chunk_get_column_count(self.ptr) }
66    }
67    /// Retrieves the current number of tuples in a data chunk.
68    pub fn get_size(&self) -> idx_t {
69        unsafe { duckdb_data_chunk_get_size(self.ptr) }
70    }
71}
72
73impl From<duckdb_data_chunk> for DataChunk {
74    fn from(ptr: duckdb_data_chunk) -> Self {
75        Self { ptr, owned: false }
76    }
77}
78
79impl Drop for DataChunk {
80    fn drop(&mut self) {
81        if self.owned {
82            unsafe { duckdb_destroy_data_chunk(&mut self.ptr) };
83        }
84    }
85}
86
87#[cfg(test)]
88mod test {
89    use crate::{DataChunk, LogicalType};
90
91    #[test]
92    fn test_data_chunk_construction() {
93        let dc = DataChunk::new(vec![LogicalType::new(
94            crate::constants::LogicalTypeId::Integer,
95        )]);
96
97        assert_eq!(dc.get_column_count(), 1);
98
99        drop(dc);
100    }
101}