Skip to main content

reifydb_core/value/column/data/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4//! `ColumnData` is the polymorphic accessor (type, length, encoding, nullability, statistics, value extraction) that
5//! each concrete data implements. `Canonical` is the dense default data used for primitive-typed columns. The
6//! `CompareOp` and `SearchResult` enums express search-and-compare operations the engine performs over arrays during
7//! selection (binary search and range probing).
8
9pub mod canonical;
10
11use std::{any::Any, sync::Arc};
12
13use canonical::Canonical;
14use reifydb_type::{
15	Result,
16	util::bitvec::BitVec,
17	value::{Value, r#type::Type},
18};
19
20use crate::value::column::{
21	buffer::ColumnBuffer, encoding::EncodingId, mask::RowMask, nones::NoneBitmap, stats::StatsSet,
22};
23
24#[derive(Clone, Copy, Debug, PartialEq, Eq)]
25pub enum CompareOp {
26	Eq,
27	Ne,
28	Lt,
29	LtEq,
30	Gt,
31	GtEq,
32}
33
34#[derive(Clone, Copy, Debug, PartialEq, Eq)]
35pub enum SearchResult {
36	Found(usize),
37	NotFound(usize),
38}
39
40pub trait ColumnData: Send + Sync + 'static {
41	fn ty(&self) -> Type;
42	fn len(&self) -> usize;
43	fn is_empty(&self) -> bool {
44		self.len() == 0
45	}
46	fn encoding(&self) -> EncodingId;
47
48	fn is_nullable(&self) -> bool;
49	fn nones(&self) -> Option<&NoneBitmap>;
50	fn is_defined(&self, idx: usize) -> bool {
51		!self.nones().map(|n| n.is_none(idx)).unwrap_or(false)
52	}
53
54	fn stats(&self) -> &StatsSet;
55
56	fn get_value(&self, idx: usize) -> Value;
57	fn iter(&self) -> Box<dyn Iterator<Item = Value> + '_> {
58		Box::new((0..self.len()).map(move |i| self.get_value(i)))
59	}
60	fn as_string(&self, idx: usize) -> String;
61
62	fn as_any(&self) -> &dyn Any;
63	fn as_any_mut(&mut self) -> &mut dyn Any;
64
65	fn children(&self) -> &[Column];
66	fn metadata(&self) -> &dyn Any;
67
68	fn to_canonical(&self) -> Result<Arc<Canonical>>;
69
70	fn filter(&self, mask: &RowMask) -> Result<Column> {
71		let canon = self.to_canonical()?;
72		Ok(Column::from_canonical(canonical_filter(&canon, mask)?))
73	}
74
75	fn take(&self, indices: &Column) -> Result<Column> {
76		let canon = self.to_canonical()?;
77		let idx = canon_indices(indices)?;
78		Ok(Column::from_canonical(canonical_take(&canon, &idx)?))
79	}
80
81	fn slice(&self, start: usize, end: usize) -> Result<Column> {
82		let canon = self.to_canonical()?;
83		Ok(Column::from_canonical(canonical_slice(&canon, start, end)?))
84	}
85}
86
87#[derive(Clone)]
88pub struct Column(Arc<dyn ColumnData>);
89
90impl Column {
91	pub fn from_data(data: Arc<dyn ColumnData>) -> Self {
92		Self(data)
93	}
94
95	pub fn from_canonical(canon: Canonical) -> Self {
96		Self(Arc::new(canon))
97	}
98
99	pub fn from_column_buffer(buffer: ColumnBuffer) -> Self {
100		Self::from_canonical(Canonical::from_buffer(buffer))
101	}
102
103	pub fn data(&self) -> &dyn ColumnData {
104		&*self.0
105	}
106
107	pub fn ty(&self) -> Type {
108		self.0.ty()
109	}
110
111	pub fn is_nullable(&self) -> bool {
112		self.0.is_nullable()
113	}
114
115	pub fn len(&self) -> usize {
116		self.0.len()
117	}
118
119	pub fn is_empty(&self) -> bool {
120		self.0.is_empty()
121	}
122
123	pub fn encoding(&self) -> EncodingId {
124		self.0.encoding()
125	}
126
127	pub fn stats(&self) -> &StatsSet {
128		self.0.stats()
129	}
130
131	pub fn nones(&self) -> Option<&NoneBitmap> {
132		self.0.nones()
133	}
134
135	pub fn is_defined(&self, idx: usize) -> bool {
136		self.0.is_defined(idx)
137	}
138
139	pub fn get_value(&self, idx: usize) -> Value {
140		self.0.get_value(idx)
141	}
142
143	pub fn iter(&self) -> Box<dyn Iterator<Item = Value> + '_> {
144		self.0.iter()
145	}
146
147	pub fn as_string(&self, idx: usize) -> String {
148		self.0.as_string(idx)
149	}
150
151	pub fn children(&self) -> &[Column] {
152		self.0.children()
153	}
154
155	pub fn metadata(&self) -> &dyn Any {
156		self.0.metadata()
157	}
158
159	pub fn to_canonical(&self) -> Result<Arc<Canonical>> {
160		self.0.to_canonical()
161	}
162
163	pub fn filter(&self, mask: &RowMask) -> Result<Column> {
164		self.0.filter(mask)
165	}
166
167	pub fn take(&self, indices: &Column) -> Result<Column> {
168		self.0.take(indices)
169	}
170
171	pub fn slice(&self, start: usize, end: usize) -> Result<Column> {
172		self.0.slice(start, end)
173	}
174
175	pub fn materialize(&mut self) -> Result<&mut Canonical> {
176		if Arc::get_mut(&mut self.0).map(|d| d.as_any().is::<Canonical>()).unwrap_or(false) {
177			let d = Arc::get_mut(&mut self.0).unwrap();
178			return Ok(d.as_any_mut().downcast_mut::<Canonical>().unwrap());
179		}
180		let canonical_arc = self.0.to_canonical()?;
181		let owned = Arc::try_unwrap(canonical_arc).unwrap_or_else(|arc| (*arc).clone());
182		self.0 = Arc::new(owned);
183		let d = Arc::get_mut(&mut self.0).unwrap();
184		Ok(d.as_any_mut().downcast_mut::<Canonical>().unwrap())
185	}
186}
187
188fn canonical_filter(canon: &Canonical, mask: &RowMask) -> Result<Canonical> {
189	assert_eq!(canon.len(), mask.len(), "filter: length mismatch");
190	let kept = mask.popcount();
191
192	let new_nones = canon.nones.as_ref().map(|n| {
193		let mut out = NoneBitmap::all_present(kept);
194		let mut j = 0usize;
195		for i in 0..n.len() {
196			if mask.get(i) {
197				if n.is_none(i) {
198					out.set_none(j);
199				}
200				j += 1;
201			}
202		}
203		out
204	});
205
206	let mut new_buffer = canon.buffer.clone();
207	new_buffer.filter(&row_mask_to_bitvec(mask))?;
208
209	Ok(Canonical::new(canon.ty.clone(), canon.nullable, new_nones, new_buffer))
210}
211
212fn canonical_take(canon: &Canonical, indices: &[usize]) -> Result<Canonical> {
213	let new_nones = canon.nones.as_ref().map(|n| {
214		let mut out = NoneBitmap::all_present(indices.len());
215		for (j, &i) in indices.iter().enumerate() {
216			if n.is_none(i) {
217				out.set_none(j);
218			}
219		}
220		out
221	});
222	let new_buffer = canon.buffer.gather(indices);
223	Ok(Canonical::new(canon.ty.clone(), canon.nullable, new_nones, new_buffer))
224}
225
226fn canonical_slice(canon: &Canonical, start: usize, end: usize) -> Result<Canonical> {
227	assert!(start <= end);
228	assert!(end <= canon.len());
229	let new_nones = canon.nones.as_ref().map(|n| {
230		let count = end - start;
231		let mut out = NoneBitmap::all_present(count);
232		for i in 0..count {
233			if n.is_none(start + i) {
234				out.set_none(i);
235			}
236		}
237		out
238	});
239	let new_buffer = canon.buffer.slice(start, end);
240	Ok(Canonical::new(canon.ty.clone(), canon.nullable, new_nones, new_buffer))
241}
242
243fn row_mask_to_bitvec(mask: &RowMask) -> BitVec {
244	let mut bits = Vec::with_capacity(mask.len());
245	for i in 0..mask.len() {
246		bits.push(mask.get(i));
247	}
248	BitVec::from(bits)
249}
250
251fn canon_indices(indices: &Column) -> Result<Vec<usize>> {
252	let canon = indices.to_canonical()?;
253	let len = canon.len();
254	let mut out = Vec::with_capacity(len);
255	for i in 0..len {
256		let v = canon.buffer.get_value(i);
257		let n: usize = match v {
258			Value::Uint1(n) => n as usize,
259			Value::Uint2(n) => n as usize,
260			Value::Uint4(n) => n as usize,
261			Value::Uint8(n) => n as usize,
262			Value::Int4(n) => n as usize,
263			Value::Int8(n) => n as usize,
264			_ => panic!("take: indices must be fixed-width unsigned/signed int"),
265		};
266		out.push(n);
267	}
268	Ok(out)
269}