Skip to main content

reifydb_column/encoding/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4//! Per-column encoding implementations. Canonical is the dense unencoded layout; the compressed family covers
5//! all-none, bit-packed, constant, delta, delta-RLE, dictionary, frame-of-reference, run-length, and sparse forms.
6//! Each encoding produces and consumes the same encoded-bytes contract so compute kernels can be written once and
7//! work across encodings.
8//!
9//! Picking an encoding for a write batch is a heuristic decision driven by the column's statistics; choosing
10//! poorly does not change correctness, only space and read-time cost.
11
12pub mod canonical;
13pub mod compressed;
14
15use std::{
16	collections::HashMap,
17	sync::{Arc, OnceLock},
18};
19
20use canonical::CanonicalEncoding;
21use compressed::{
22	AllNoneEncoding, BitPackEncoding, ConstantEncoding, DeltaEncoding, DeltaRleEncoding, DictEncoding, ForEncoding,
23	RleEncoding, SparseEncoding,
24};
25use reifydb_core::value::column::{
26	data::{Column, canonical::Canonical},
27	encoding::EncodingId,
28	stats::StatsSet,
29};
30use reifydb_type::Result;
31
32use crate::{
33	compress::CompressConfig,
34	compute::{Compute, DefaultCompute},
35};
36
37pub trait Encoding: Send + Sync + 'static {
38	fn id(&self) -> EncodingId;
39
40	fn try_compress(&self, input: &Canonical, cfg: &CompressConfig) -> Result<Option<Column>>;
41
42	fn canonicalize(&self, array: &Column) -> Result<Canonical>;
43
44	fn compute(&self) -> &dyn Compute {
45		&DefaultCompute
46	}
47
48	fn derive_stats(&self, _array: &Column) -> StatsSet {
49		StatsSet::new()
50	}
51}
52
53pub struct EncodingRegistry {
54	encodings: HashMap<EncodingId, Arc<dyn Encoding>>,
55}
56
57impl EncodingRegistry {
58	pub fn empty() -> Self {
59		Self {
60			encodings: HashMap::new(),
61		}
62	}
63
64	pub fn register(&mut self, encoding: Arc<dyn Encoding>) {
65		self.encodings.insert(encoding.id(), encoding);
66	}
67
68	pub fn get(&self, id: EncodingId) -> Option<&Arc<dyn Encoding>> {
69		self.encodings.get(&id)
70	}
71
72	pub fn len(&self) -> usize {
73		self.encodings.len()
74	}
75
76	pub fn is_empty(&self) -> bool {
77		self.encodings.is_empty()
78	}
79
80	pub fn builtins() -> Self {
81		let mut r = Self::empty();
82		r.register(Arc::new(CanonicalEncoding::BOOL));
83		r.register(Arc::new(CanonicalEncoding::FIXED));
84		r.register(Arc::new(CanonicalEncoding::VARLEN));
85		r.register(Arc::new(CanonicalEncoding::BIGNUM));
86		r.register(Arc::new(ConstantEncoding));
87		r.register(Arc::new(AllNoneEncoding));
88		r.register(Arc::new(DictEncoding));
89		r.register(Arc::new(RleEncoding));
90		r.register(Arc::new(DeltaEncoding));
91		r.register(Arc::new(DeltaRleEncoding));
92		r.register(Arc::new(ForEncoding));
93		r.register(Arc::new(BitPackEncoding));
94		r.register(Arc::new(SparseEncoding));
95		r
96	}
97}
98
99static GLOBAL: OnceLock<EncodingRegistry> = OnceLock::new();
100
101pub fn global() -> &'static EncodingRegistry {
102	GLOBAL.get_or_init(EncodingRegistry::builtins)
103}