Skip to main content

reifydb_core/value/column/array/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4use std::{any::Any, sync::Arc};
5
6use reifydb_type::{
7	Result,
8	value::{Value, r#type::Type},
9};
10
11use crate::value::column::{
12	array::{Column, ColumnData},
13	buffer::ColumnBuffer,
14	encoding::EncodingId,
15	nones::NoneBitmap,
16	stats::StatsSet,
17};
18
19// Canonical (uncompressed) column representation. Wraps a `ColumnBuffer` with
20// lifted nullability: definedness lives in the outer `nones` bitmap, so the
21// inner `buffer` is never a `ColumnBuffer::Option` variant.
22//
23// The bridge between `ColumnBuffer` and `Canonical` is an Arc-bump clone on the
24// `CowVec`-backed containers plus (for nullable columns) an O(n/64) polarity
25// flip on the bitvec - `ColumnBuffer::Option.bitvec` uses set bit = defined
26// while `NoneBitmap` uses set bit = None.
27#[derive(Clone, Debug)]
28pub struct Canonical {
29	pub ty: Type,
30	pub nullable: bool,
31	pub nones: Option<NoneBitmap>,
32	pub buffer: ColumnBuffer,
33	stats: StatsSet,
34}
35
36impl Canonical {
37	pub fn new(ty: Type, nullable: bool, nones: Option<NoneBitmap>, buffer: ColumnBuffer) -> Self {
38		debug_assert!(
39			!matches!(buffer, ColumnBuffer::Option { .. }),
40			"Canonical.buffer must not be a ColumnBuffer::Option; nullability is lifted"
41		);
42		Self {
43			ty,
44			nullable,
45			nones,
46			buffer,
47			stats: StatsSet::new(),
48		}
49	}
50
51	// Owning constructor: move a `ColumnBuffer` into `Canonical`. If the buffer
52	// is `ColumnBuffer::Option`, the definedness bitvec is inverted into a
53	// `NoneBitmap` and the inner buffer is unwrapped.
54	pub fn from_buffer(b: ColumnBuffer) -> Self {
55		match b {
56			ColumnBuffer::Option {
57				inner,
58				bitvec,
59			} => {
60				let mut inner_c = Self::from_buffer(*inner);
61				inner_c.nullable = true;
62				inner_c.nones = Some(NoneBitmap::from_defined_bitvec(&bitvec));
63				inner_c
64			}
65			other => {
66				let ty = other.get_type();
67				Self {
68					ty,
69					nullable: false,
70					nones: None,
71					buffer: other,
72					stats: StatsSet::new(),
73				}
74			}
75		}
76	}
77
78	// Borrowing constructor: Arc-bump clones the inner `CowVec`s, zero data copy.
79	pub fn from_column_buffer(cd: &ColumnBuffer) -> Result<Self> {
80		Ok(Self::from_buffer(cd.clone()))
81	}
82
83	pub fn into_buffer(self) -> ColumnBuffer {
84		match self.nones {
85			None => self.buffer,
86			Some(nones) => ColumnBuffer::Option {
87				inner: Box::new(self.buffer),
88				bitvec: nones.to_defined_bitvec(),
89			},
90		}
91	}
92
93	pub fn to_buffer(&self) -> ColumnBuffer {
94		match &self.nones {
95			None => self.buffer.clone(),
96			Some(nones) => ColumnBuffer::Option {
97				inner: Box::new(self.buffer.clone()),
98				bitvec: nones.to_defined_bitvec(),
99			},
100		}
101	}
102
103	pub fn to_column_buffer(&self) -> Result<ColumnBuffer> {
104		Ok(self.to_buffer())
105	}
106
107	pub fn len(&self) -> usize {
108		self.buffer.len()
109	}
110
111	pub fn is_empty(&self) -> bool {
112		self.len() == 0
113	}
114
115	pub fn stats(&self) -> &StatsSet {
116		&self.stats
117	}
118}
119
120fn encoding_for_type(ty: &Type) -> EncodingId {
121	match ty {
122		Type::Boolean => EncodingId::CANONICAL_BOOL,
123		Type::Utf8 | Type::Blob => EncodingId::CANONICAL_VARLEN,
124		Type::Int | Type::Uint | Type::Decimal => EncodingId::CANONICAL_BIGNUM,
125		_ => EncodingId::CANONICAL_FIXED,
126	}
127}
128
129static UNIT_METADATA: () = ();
130static EMPTY_CHILDREN: Vec<Column> = Vec::new();
131
132impl ColumnData for Canonical {
133	fn ty(&self) -> Type {
134		self.ty.clone()
135	}
136
137	fn is_nullable(&self) -> bool {
138		self.nullable
139	}
140
141	fn len(&self) -> usize {
142		self.buffer.len()
143	}
144
145	fn encoding(&self) -> EncodingId {
146		encoding_for_type(&self.ty)
147	}
148
149	fn stats(&self) -> &StatsSet {
150		&self.stats
151	}
152
153	fn nones(&self) -> Option<&NoneBitmap> {
154		self.nones.as_ref()
155	}
156
157	fn get_value(&self, idx: usize) -> Value {
158		if self.nones.as_ref().map(|n| n.is_none(idx)).unwrap_or(false) {
159			Value::none_of(self.ty.clone())
160		} else {
161			self.buffer.get_value(idx)
162		}
163	}
164
165	fn as_string(&self, idx: usize) -> String {
166		self.buffer.as_string(idx)
167	}
168
169	fn as_any(&self) -> &dyn Any {
170		self
171	}
172
173	fn as_any_mut(&mut self) -> &mut dyn Any {
174		self
175	}
176
177	fn children(&self) -> &[Column] {
178		&EMPTY_CHILDREN
179	}
180
181	fn metadata(&self) -> &dyn Any {
182		&UNIT_METADATA
183	}
184
185	fn to_canonical(&self) -> Result<Arc<Canonical>> {
186		Ok(Arc::new(self.clone()))
187	}
188}