Skip to main content

reifydb_column/encoding/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4pub mod canonical;
5pub mod compressed;
6
7use std::{
8	collections::HashMap,
9	sync::{Arc, OnceLock},
10};
11
12use canonical::CanonicalEncoding;
13use compressed::{
14	AllNoneEncoding, BitPackEncoding, ConstantEncoding, DeltaEncoding, DeltaRleEncoding, DictEncoding, ForEncoding,
15	RleEncoding, SparseEncoding,
16};
17use reifydb_core::value::column::{
18	array::{Column, canonical::Canonical},
19	encoding::EncodingId,
20	stats::StatsSet,
21};
22use reifydb_type::Result;
23
24use crate::{
25	compress::CompressConfig,
26	compute::{Compute, DefaultCompute},
27};
28
29// One `Encoding` per concrete encoding id. Compressed encodings will fill in
30// real `try_compress`/`canonicalize` bodies; canonical encodings perform an
31// identity wrap and return their input back.
32pub trait Encoding: Send + Sync + 'static {
33	fn id(&self) -> EncodingId;
34
35	// Try to compress the canonical input into this encoding. `Ok(None)` means
36	// "this encoding doesn't apply to this input" - the compressor will try
37	// the next candidate.
38	fn try_compress(&self, input: &Canonical, cfg: &CompressConfig) -> Result<Option<Column>>;
39
40	// Decode an array of this encoding back to its canonical form. Must be total.
41	fn canonicalize(&self, array: &Column) -> Result<Canonical>;
42
43	fn compute(&self) -> &dyn Compute {
44		&DefaultCompute
45	}
46
47	fn derive_stats(&self, _array: &Column) -> StatsSet {
48		StatsSet::new()
49	}
50}
51
52pub struct EncodingRegistry {
53	encodings: HashMap<EncodingId, Arc<dyn Encoding>>,
54}
55
56impl EncodingRegistry {
57	pub fn empty() -> Self {
58		Self {
59			encodings: HashMap::new(),
60		}
61	}
62
63	pub fn register(&mut self, encoding: Arc<dyn Encoding>) {
64		self.encodings.insert(encoding.id(), encoding);
65	}
66
67	pub fn get(&self, id: EncodingId) -> Option<&Arc<dyn Encoding>> {
68		self.encodings.get(&id)
69	}
70
71	pub fn len(&self) -> usize {
72		self.encodings.len()
73	}
74
75	pub fn is_empty(&self) -> bool {
76		self.encodings.is_empty()
77	}
78
79	pub fn builtins() -> Self {
80		let mut r = Self::empty();
81		r.register(Arc::new(CanonicalEncoding::BOOL));
82		r.register(Arc::new(CanonicalEncoding::FIXED));
83		r.register(Arc::new(CanonicalEncoding::VARLEN));
84		r.register(Arc::new(CanonicalEncoding::BIGNUM));
85		r.register(Arc::new(ConstantEncoding));
86		r.register(Arc::new(AllNoneEncoding));
87		r.register(Arc::new(DictEncoding));
88		r.register(Arc::new(RleEncoding));
89		r.register(Arc::new(DeltaEncoding));
90		r.register(Arc::new(DeltaRleEncoding));
91		r.register(Arc::new(ForEncoding));
92		r.register(Arc::new(BitPackEncoding));
93		r.register(Arc::new(SparseEncoding));
94		r
95	}
96}
97
98// Process-global registry. Built once on first access; subsequent calls
99// return the same reference, which compute dispatch and predicate evaluation
100// consult to route through encoding-specific specializations.
101static GLOBAL: OnceLock<EncodingRegistry> = OnceLock::new();
102
103pub fn global() -> &'static EncodingRegistry {
104	GLOBAL.get_or_init(EncodingRegistry::builtins)
105}