Skip to main content

reifydb_core/value/column/array/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4pub mod canonical;
5
6use std::{any::Any, sync::Arc};
7
8use canonical::Canonical;
9use reifydb_type::{
10	Result,
11	util::bitvec::BitVec,
12	value::{Value, r#type::Type},
13};
14
15use crate::value::column::{
16	buffer::ColumnBuffer, encoding::EncodingId, mask::RowMask, nones::NoneBitmap, stats::StatsSet,
17};
18
19// Comparison operator used by `ColumnData::compare`. Kept next to the trait
20// because the read-side kernels (filter, compare) live on the trait itself.
21#[derive(Clone, Copy, Debug, PartialEq, Eq)]
22pub enum CompareOp {
23	Eq,
24	Ne,
25	Lt,
26	LtEq,
27	Gt,
28	GtEq,
29}
30
31#[derive(Clone, Copy, Debug, PartialEq, Eq)]
32pub enum SearchResult {
33	Found(usize),
34	NotFound(usize),
35}
36
37// Polymorphic read interface for any column representation. `Canonical` is the
38// identity-encoded impl (holds a `ColumnBuffer` directly); compressed encodings
39// defined in `reifydb-column` (`ColumnConstant`, `ColumnRle`, etc.) implement
40// this trait with encoding-specific specializations for the read operators.
41//
42// All read operators have default impls that materialize via `to_canonical`
43// then delegate to the canonical implementation on the inner `ColumnBuffer` -
44// compressed encodings override for fast paths that avoid materialization.
45pub trait ColumnData: Send + Sync + 'static {
46	fn ty(&self) -> Type;
47	fn len(&self) -> usize;
48	fn is_empty(&self) -> bool {
49		self.len() == 0
50	}
51	fn encoding(&self) -> EncodingId;
52
53	fn is_nullable(&self) -> bool;
54	fn nones(&self) -> Option<&NoneBitmap>;
55	fn is_defined(&self, idx: usize) -> bool {
56		!self.nones().map(|n| n.is_none(idx)).unwrap_or(false)
57	}
58
59	fn stats(&self) -> &StatsSet;
60
61	fn get_value(&self, idx: usize) -> Value;
62	fn iter(&self) -> Box<dyn Iterator<Item = Value> + '_> {
63		Box::new((0..self.len()).map(move |i| self.get_value(i)))
64	}
65	fn as_string(&self, idx: usize) -> String;
66
67	fn as_any(&self) -> &dyn Any;
68	fn as_any_mut(&mut self) -> &mut dyn Any;
69
70	fn children(&self) -> &[Column];
71	fn metadata(&self) -> &dyn Any;
72
73	fn to_canonical(&self) -> Result<Arc<Canonical>>;
74
75	// Default read operators: materialize then run the canonical algorithm over
76	// the inner `ColumnBuffer`. Compressed encodings override these for fast paths.
77	fn filter(&self, mask: &RowMask) -> Result<Column> {
78		let canon = self.to_canonical()?;
79		Ok(Column::from_canonical(canonical_filter(&canon, mask)?))
80	}
81
82	fn take(&self, indices: &Column) -> Result<Column> {
83		let canon = self.to_canonical()?;
84		let idx = canon_indices(indices)?;
85		Ok(Column::from_canonical(canonical_take(&canon, &idx)?))
86	}
87
88	fn slice(&self, start: usize, end: usize) -> Result<Column> {
89		let canon = self.to_canonical()?;
90		Ok(Column::from_canonical(canonical_slice(&canon, start, end)?))
91	}
92}
93
94#[derive(Clone)]
95pub struct Column(Arc<dyn ColumnData>);
96
97impl Column {
98	pub fn from_data(data: Arc<dyn ColumnData>) -> Self {
99		Self(data)
100	}
101
102	pub fn from_canonical(canon: Canonical) -> Self {
103		Self(Arc::new(canon))
104	}
105
106	pub fn from_column_buffer(buffer: ColumnBuffer) -> Self {
107		Self::from_canonical(Canonical::from_buffer(buffer))
108	}
109
110	pub fn data(&self) -> &dyn ColumnData {
111		&*self.0
112	}
113
114	pub fn ty(&self) -> Type {
115		self.0.ty()
116	}
117
118	pub fn is_nullable(&self) -> bool {
119		self.0.is_nullable()
120	}
121
122	pub fn len(&self) -> usize {
123		self.0.len()
124	}
125
126	pub fn is_empty(&self) -> bool {
127		self.0.is_empty()
128	}
129
130	pub fn encoding(&self) -> EncodingId {
131		self.0.encoding()
132	}
133
134	pub fn stats(&self) -> &StatsSet {
135		self.0.stats()
136	}
137
138	pub fn nones(&self) -> Option<&NoneBitmap> {
139		self.0.nones()
140	}
141
142	pub fn is_defined(&self, idx: usize) -> bool {
143		self.0.is_defined(idx)
144	}
145
146	pub fn get_value(&self, idx: usize) -> Value {
147		self.0.get_value(idx)
148	}
149
150	pub fn iter(&self) -> Box<dyn Iterator<Item = Value> + '_> {
151		self.0.iter()
152	}
153
154	pub fn as_string(&self, idx: usize) -> String {
155		self.0.as_string(idx)
156	}
157
158	pub fn children(&self) -> &[Column] {
159		self.0.children()
160	}
161
162	pub fn metadata(&self) -> &dyn Any {
163		self.0.metadata()
164	}
165
166	pub fn to_canonical(&self) -> Result<Arc<Canonical>> {
167		self.0.to_canonical()
168	}
169
170	pub fn filter(&self, mask: &RowMask) -> Result<Column> {
171		self.0.filter(mask)
172	}
173
174	pub fn take(&self, indices: &Column) -> Result<Column> {
175		self.0.take(indices)
176	}
177
178	pub fn slice(&self, start: usize, end: usize) -> Result<Column> {
179		self.0.slice(start, end)
180	}
181
182	// Return `&mut Canonical`, materializing from a compressed encoding if needed.
183	pub fn materialize(&mut self) -> Result<&mut Canonical> {
184		if Arc::get_mut(&mut self.0).map(|d| d.as_any().is::<Canonical>()).unwrap_or(false) {
185			let d = Arc::get_mut(&mut self.0).unwrap();
186			return Ok(d.as_any_mut().downcast_mut::<Canonical>().unwrap());
187		}
188		let canonical_arc = self.0.to_canonical()?;
189		let owned = Arc::try_unwrap(canonical_arc).unwrap_or_else(|arc| (*arc).clone());
190		self.0 = Arc::new(owned);
191		let d = Arc::get_mut(&mut self.0).unwrap();
192		Ok(d.as_any_mut().downcast_mut::<Canonical>().unwrap())
193	}
194}
195
196// Default compute primitives for canonical columns. These are used by the
197// default `filter`/`take`/`slice` impls on the `ColumnData` trait and are
198// self-contained within reifydb-core (no dependency on reifydb-column).
199
200fn canonical_filter(canon: &Canonical, mask: &RowMask) -> Result<Canonical> {
201	assert_eq!(canon.len(), mask.len(), "filter: length mismatch");
202	let kept = mask.popcount();
203
204	let new_nones = canon.nones.as_ref().map(|n| {
205		let mut out = NoneBitmap::all_present(kept);
206		let mut j = 0usize;
207		for i in 0..n.len() {
208			if mask.get(i) {
209				if n.is_none(i) {
210					out.set_none(j);
211				}
212				j += 1;
213			}
214		}
215		out
216	});
217
218	let mut new_buffer = canon.buffer.clone();
219	new_buffer.filter(&row_mask_to_bitvec(mask))?;
220
221	Ok(Canonical::new(canon.ty.clone(), canon.nullable, new_nones, new_buffer))
222}
223
224fn canonical_take(canon: &Canonical, indices: &[usize]) -> Result<Canonical> {
225	let new_nones = canon.nones.as_ref().map(|n| {
226		let mut out = NoneBitmap::all_present(indices.len());
227		for (j, &i) in indices.iter().enumerate() {
228			if n.is_none(i) {
229				out.set_none(j);
230			}
231		}
232		out
233	});
234	let new_buffer = canon.buffer.gather(indices);
235	Ok(Canonical::new(canon.ty.clone(), canon.nullable, new_nones, new_buffer))
236}
237
238fn canonical_slice(canon: &Canonical, start: usize, end: usize) -> Result<Canonical> {
239	assert!(start <= end);
240	assert!(end <= canon.len());
241	let new_nones = canon.nones.as_ref().map(|n| {
242		let count = end - start;
243		let mut out = NoneBitmap::all_present(count);
244		for i in 0..count {
245			if n.is_none(start + i) {
246				out.set_none(i);
247			}
248		}
249		out
250	});
251	let new_buffer = canon.buffer.slice(start, end);
252	Ok(Canonical::new(canon.ty.clone(), canon.nullable, new_nones, new_buffer))
253}
254
255fn row_mask_to_bitvec(mask: &RowMask) -> BitVec {
256	let mut bits = Vec::with_capacity(mask.len());
257	for i in 0..mask.len() {
258		bits.push(mask.get(i));
259	}
260	BitVec::from(bits)
261}
262
263fn canon_indices(indices: &Column) -> Result<Vec<usize>> {
264	let canon = indices.to_canonical()?;
265	let len = canon.len();
266	let mut out = Vec::with_capacity(len);
267	for i in 0..len {
268		let v = canon.buffer.get_value(i);
269		let n: usize = match v {
270			Value::Uint1(n) => n as usize,
271			Value::Uint2(n) => n as usize,
272			Value::Uint4(n) => n as usize,
273			Value::Uint8(n) => n as usize,
274			Value::Int4(n) => n as usize,
275			Value::Int8(n) => n as usize,
276			_ => panic!("take: indices must be fixed-width unsigned/signed int"),
277		};
278		out.push(n);
279	}
280	Ok(out)
281}