pub struct DataChunk { /* private fields */ }Expand description
A batch of rows stored column-wise for vectorized processing.
Instead of storing rows like [(a1,b1), (a2,b2), ...], we store columns
like [a1,a2,...], [b1,b2,...]. This is cache-friendly for analytical
queries that touch few columns but many rows.
The optional SelectionVector lets you filter rows without copying data -
just mark which row indices are “selected”.
§Example
use grafeo_core::execution::DataChunk;
use grafeo_core::execution::ValueVector;
use grafeo_common::types::Value;
// Create columns
let names = ValueVector::from_values(&[Value::from("Alix"), Value::from("Gus")]);
let ages = ValueVector::from_values(&[Value::from(30i64), Value::from(25i64)]);
// Bundle into a chunk
let chunk = DataChunk::new(vec![names, ages]);
assert_eq!(chunk.len(), 2);Implementations§
Source§impl DataChunk
impl DataChunk
Sourcepub fn new(columns: Vec<ValueVector>) -> Self
pub fn new(columns: Vec<ValueVector>) -> Self
Creates a new data chunk from existing vectors.
Sourcepub fn with_schema(column_types: &[LogicalType]) -> Self
pub fn with_schema(column_types: &[LogicalType]) -> Self
Creates a new empty data chunk with the given schema.
Sourcepub fn with_capacity(column_types: &[LogicalType], capacity: usize) -> Self
pub fn with_capacity(column_types: &[LogicalType], capacity: usize) -> Self
Creates a new data chunk with the given schema and capacity.
Sourcepub fn column_count(&self) -> usize
pub fn column_count(&self) -> usize
Returns the number of columns.
Sourcepub fn columns(&self) -> &[ValueVector]
pub fn columns(&self) -> &[ValueVector]
Returns all columns.
Sourcepub fn total_row_count(&self) -> usize
pub fn total_row_count(&self) -> usize
Returns the total number of rows (ignoring selection).
Sourcepub fn column(&self, index: usize) -> Option<&ValueVector>
pub fn column(&self, index: usize) -> Option<&ValueVector>
Gets a column by index.
Sourcepub fn column_mut(&mut self, index: usize) -> Option<&mut ValueVector>
pub fn column_mut(&mut self, index: usize) -> Option<&mut ValueVector>
Gets a mutable column by index.
Sourcepub fn selection(&self) -> Option<&SelectionVector>
pub fn selection(&self) -> Option<&SelectionVector>
Returns the selection vector.
Sourcepub fn set_selection(&mut self, selection: SelectionVector)
pub fn set_selection(&mut self, selection: SelectionVector)
Sets the selection vector.
Sourcepub fn clear_selection(&mut self)
pub fn clear_selection(&mut self)
Clears the selection vector (selects all rows).
Sourcepub fn set_zone_hints(&mut self, hints: ChunkZoneHints)
pub fn set_zone_hints(&mut self, hints: ChunkZoneHints)
Sets zone map hints for this chunk.
Zone map hints enable the filter operator to skip entire chunks when predicates can’t possibly match based on min/max statistics.
Sourcepub fn zone_hints(&self) -> Option<&ChunkZoneHints>
pub fn zone_hints(&self) -> Option<&ChunkZoneHints>
Returns zone map hints if available.
Used by the filter operator for chunk-level predicate pruning.
Sourcepub fn clear_zone_hints(&mut self)
pub fn clear_zone_hints(&mut self)
Clears zone map hints.
Sourcepub fn flatten(&mut self)
pub fn flatten(&mut self)
Flattens the selection by copying only selected rows.
After this operation, selection is None and count equals the previously selected row count.
Sourcepub fn selected_indices(&self) -> Box<dyn Iterator<Item = usize> + '_>
pub fn selected_indices(&self) -> Box<dyn Iterator<Item = usize> + '_>
Returns an iterator over selected row indices.
Sourcepub fn concat(chunks: &[DataChunk]) -> DataChunk
pub fn concat(chunks: &[DataChunk]) -> DataChunk
Concatenates multiple chunks into a single chunk.
All chunks must have the same schema (same number and types of columns).
Sourcepub fn filter(&self, predicate: &SelectionVector) -> DataChunk
pub fn filter(&self, predicate: &SelectionVector) -> DataChunk
Applies a filter predicate and returns a new chunk with selected rows.
Sourcepub fn slice(&self, offset: usize, count: usize) -> DataChunk
pub fn slice(&self, offset: usize, count: usize) -> DataChunk
Returns a slice of this chunk.
Returns a new DataChunk containing rows [offset, offset + count).
Sourcepub fn num_columns(&self) -> usize
pub fn num_columns(&self) -> usize
Returns the number of columns.
Trait Implementations§
Auto Trait Implementations§
impl Freeze for DataChunk
impl RefUnwindSafe for DataChunk
impl Send for DataChunk
impl Sync for DataChunk
impl Unpin for DataChunk
impl UnsafeUnpin for DataChunk
impl UnwindSafe for DataChunk
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more