Skip to main content

reifydb_column/encoding/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4use reifydb_core::value::column::{
5	array::{Column, canonical::Canonical},
6	encoding::EncodingId,
7	stats::{Stat, StatsSet},
8};
9use reifydb_type::{Result, value::Value};
10
11use crate::{compress::CompressConfig, encoding::Encoding};
12
13pub struct CanonicalEncoding {
14	pub id: EncodingId,
15}
16
17impl CanonicalEncoding {
18	pub const BOOL: Self = Self {
19		id: EncodingId::CANONICAL_BOOL,
20	};
21	pub const FIXED: Self = Self {
22		id: EncodingId::CANONICAL_FIXED,
23	};
24	pub const VARLEN: Self = Self {
25		id: EncodingId::CANONICAL_VARLEN,
26	};
27	pub const BIGNUM: Self = Self {
28		id: EncodingId::CANONICAL_BIGNUM,
29	};
30}
31
32impl Encoding for CanonicalEncoding {
33	fn id(&self) -> EncodingId {
34		self.id
35	}
36
37	fn try_compress(&self, input: &Canonical, _cfg: &CompressConfig) -> Result<Option<Column>> {
38		Ok(Some(Column::from_canonical(input.clone())))
39	}
40
41	fn canonicalize(&self, array: &Column) -> Result<Canonical> {
42		let arc = array.to_canonical()?;
43		Ok((*arc).clone())
44	}
45
46	fn derive_stats(&self, array: &Column) -> StatsSet {
47		let mut s = StatsSet::new();
48		if let Some(nones) = array.nones() {
49			s.set(Stat::NoneCount, Value::Uint8(nones.none_count() as u64));
50		}
51		s
52	}
53}
54
55#[cfg(test)]
56mod tests {
57	use reifydb_core::value::column::buffer::ColumnBuffer;
58
59	use super::*;
60	use crate::encoding::EncodingRegistry;
61
62	#[test]
63	fn canonical_fixed_round_trips_via_try_compress_then_canonicalize() {
64		let cd = ColumnBuffer::int4([1i32, 2, 3, 4]);
65		let canon = Canonical::from_column_buffer(&cd).unwrap();
66		let compressed = CanonicalEncoding::FIXED
67			.try_compress(&canon, &CompressConfig::default())
68			.unwrap()
69			.expect("canonical try_compress always wraps");
70		assert_eq!(compressed.encoding(), EncodingId::CANONICAL_FIXED);
71		let back = CanonicalEncoding::FIXED.canonicalize(&compressed).unwrap();
72		assert_eq!(back.len(), 4);
73	}
74
75	#[test]
76	fn derive_stats_includes_none_count_when_nullable() {
77		let mut cd = ColumnBuffer::int4_with_capacity(4);
78		cd.push::<i32>(10);
79		cd.push_none();
80		cd.push::<i32>(30);
81		cd.push_none();
82		let canon = Canonical::from_column_buffer(&cd).unwrap();
83		let array = Column::from_canonical(canon);
84		let stats = CanonicalEncoding::FIXED.derive_stats(&array);
85		assert_eq!(stats.get(Stat::NoneCount), Some(&Value::Uint8(2)));
86	}
87
88	#[test]
89	fn registry_builtins_registers_all_canonical_and_compressed_encodings() {
90		let r = EncodingRegistry::builtins();
91		// 4 canonical + 9 compressed stubs = 13
92		assert_eq!(r.len(), 13);
93		for id in [
94			EncodingId::CANONICAL_BOOL,
95			EncodingId::CANONICAL_FIXED,
96			EncodingId::CANONICAL_VARLEN,
97			EncodingId::CANONICAL_BIGNUM,
98			EncodingId::CONSTANT,
99			EncodingId::ALL_NONE,
100			EncodingId::DICT,
101			EncodingId::RLE,
102			EncodingId::DELTA,
103			EncodingId::DELTA_RLE,
104			EncodingId::FOR,
105			EncodingId::BITPACK,
106			EncodingId::SPARSE,
107		] {
108			assert!(r.get(id).is_some(), "missing encoding {id:?}");
109		}
110	}
111}