pub struct DataChunk { /* private fields */ }Expand description
A batch of rows stored column-wise for vectorized processing.
Instead of storing rows like [(a1,b1), (a2,b2), ...], we store columns
like [a1,a2,...], [b1,b2,...]. This is cache-friendly for analytical
queries that touch few columns but many rows.
The optional SelectionVector lets you filter rows without copying data -
just mark which row indices are “selected”.
§Example
use grafeo_core::execution::DataChunk;
use grafeo_core::execution::ValueVector;
use grafeo_common::types::Value;
// Create columns
let names = ValueVector::from_values(&[Value::from("Alice"), Value::from("Bob")]);
let ages = ValueVector::from_values(&[Value::from(30i64), Value::from(25i64)]);
// Bundle into a chunk
let chunk = DataChunk::new(vec![names, ages]);
assert_eq!(chunk.len(), 2);Implementations§
Source§impl DataChunk
impl DataChunk
Sourcepub fn new(columns: Vec<ValueVector>) -> Self
pub fn new(columns: Vec<ValueVector>) -> Self
Creates a new data chunk from existing vectors.
Sourcepub fn with_schema(column_types: &[LogicalType]) -> Self
pub fn with_schema(column_types: &[LogicalType]) -> Self
Creates a new empty data chunk with the given schema.
Sourcepub fn with_capacity(column_types: &[LogicalType], capacity: usize) -> Self
pub fn with_capacity(column_types: &[LogicalType], capacity: usize) -> Self
Creates a new data chunk with the given schema and capacity.
Sourcepub fn column_count(&self) -> usize
pub fn column_count(&self) -> usize
Returns the number of columns.
Sourcepub fn columns(&self) -> &[ValueVector]
pub fn columns(&self) -> &[ValueVector]
Returns all columns.
Sourcepub fn total_row_count(&self) -> usize
pub fn total_row_count(&self) -> usize
Returns the total number of rows (ignoring selection).
Sourcepub fn column(&self, index: usize) -> Option<&ValueVector>
pub fn column(&self, index: usize) -> Option<&ValueVector>
Gets a column by index.
Sourcepub fn column_mut(&mut self, index: usize) -> Option<&mut ValueVector>
pub fn column_mut(&mut self, index: usize) -> Option<&mut ValueVector>
Gets a mutable column by index.
Sourcepub fn selection(&self) -> Option<&SelectionVector>
pub fn selection(&self) -> Option<&SelectionVector>
Returns the selection vector.
Sourcepub fn set_selection(&mut self, selection: SelectionVector)
pub fn set_selection(&mut self, selection: SelectionVector)
Sets the selection vector.
Sourcepub fn clear_selection(&mut self)
pub fn clear_selection(&mut self)
Clears the selection vector (selects all rows).
Sourcepub fn flatten(&mut self)
pub fn flatten(&mut self)
Flattens the selection by copying only selected rows.
After this operation, selection is None and count equals the previously selected row count.
Sourcepub fn selected_indices(&self) -> Box<dyn Iterator<Item = usize> + '_>
pub fn selected_indices(&self) -> Box<dyn Iterator<Item = usize> + '_>
Returns an iterator over selected row indices.
Sourcepub fn concat(chunks: &[DataChunk]) -> DataChunk
pub fn concat(chunks: &[DataChunk]) -> DataChunk
Concatenates multiple chunks into a single chunk.
All chunks must have the same schema (same number and types of columns).
Sourcepub fn filter(&self, predicate: &SelectionVector) -> DataChunk
pub fn filter(&self, predicate: &SelectionVector) -> DataChunk
Applies a filter predicate and returns a new chunk with selected rows.
Sourcepub fn slice(&self, offset: usize, count: usize) -> DataChunk
pub fn slice(&self, offset: usize, count: usize) -> DataChunk
Returns a slice of this chunk.
Returns a new DataChunk containing rows [offset, offset + count).
Sourcepub fn num_columns(&self) -> usize
pub fn num_columns(&self) -> usize
Returns the number of columns.