Skip to main content

reifydb_engine/arena/
convert.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4use std::fmt::Debug;
5
6use bumpalo::Bump as BumpAlloc;
7use reifydb_core::value::column::{ColumnWithName, buffer::ColumnBuffer};
8use reifydb_type::{
9	storage::{Cow, DataBitVec, DataVec, Storage},
10	util::{bitvec::BitVec, cowvec::CowVec},
11	value::{
12		Value,
13		container::{
14			any::AnyContainer, blob::BlobContainer, bool::BoolContainer, dictionary::DictionaryContainer,
15			identity_id::IdentityIdContainer, number::NumberContainer, temporal::TemporalContainer,
16			utf8::Utf8Container, uuid::UuidContainer,
17		},
18		dictionary::DictionaryEntryId,
19		identity::IdentityId,
20		is::{IsNumber, IsTemporal, IsUuid},
21	},
22};
23
24use super::{Bump, BumpBitVec, BumpVec};
25
26fn bitvec_to_cow<S: Storage>(src: &S::BitVec) -> BitVec {
27	let len = DataBitVec::len(src);
28	let mut dst = BitVec::with_capacity(len);
29	for i in 0..len {
30		dst.push(DataBitVec::get(src, i));
31	}
32	dst
33}
34
35fn bitvec_to_bump<'bump, S: Storage>(src: &S::BitVec, bump: &'bump BumpAlloc) -> BumpBitVec<'bump> {
36	let len = DataBitVec::len(src);
37	let mut dst = BumpBitVec::with_capacity_in(len, bump);
38	for i in 0..len {
39		DataBitVec::push(&mut dst, DataBitVec::get(src, i));
40	}
41	dst
42}
43
44fn vec_to_cow<T: Clone + PartialEq + 'static, S: Storage>(src: &S::Vec<T>) -> CowVec<T> {
45	let mut dst = CowVec::with_capacity(DataVec::len(src));
46	dst.extend_from_slice(DataVec::as_slice(src));
47	dst
48}
49
50fn vec_to_bump<'bump, T: Clone + PartialEq + 'static, S: Storage>(
51	src: &S::Vec<T>,
52	bump: &'bump BumpAlloc,
53) -> BumpVec<'bump, T> {
54	let mut dst = BumpVec::with_capacity_in(DataVec::len(src), bump);
55	DataVec::extend_from_slice(&mut dst, DataVec::as_slice(src));
56	dst
57}
58
59fn number_to_cow<T: IsNumber + Clone + Debug + Default, S: Storage>(
60	src: &NumberContainer<T, S>,
61) -> NumberContainer<T, Cow> {
62	NumberContainer::from_parts(vec_to_cow::<T, S>(src.data()))
63}
64
65fn number_to_bump<'bump, T: IsNumber + Clone + Debug + Default, S: Storage>(
66	src: &NumberContainer<T, S>,
67	bump: &'bump BumpAlloc,
68) -> NumberContainer<T, Bump<'bump>> {
69	NumberContainer::from_parts(vec_to_bump::<T, S>(src.data(), bump))
70}
71
72fn bool_to_cow<S: Storage>(src: &BoolContainer<S>) -> BoolContainer<Cow> {
73	BoolContainer::from_parts(bitvec_to_cow::<S>(src.data()))
74}
75
76fn bool_to_bump<'bump, S: Storage>(src: &BoolContainer<S>, bump: &'bump BumpAlloc) -> BoolContainer<Bump<'bump>> {
77	BoolContainer::from_parts(bitvec_to_bump::<S>(src.data(), bump))
78}
79
80fn temporal_to_cow<T: IsTemporal + Clone + Debug + Default, S: Storage>(
81	src: &TemporalContainer<T, S>,
82) -> TemporalContainer<T, Cow> {
83	TemporalContainer::from_parts(vec_to_cow::<T, S>(src.data()))
84}
85
86fn temporal_to_bump<'bump, T: IsTemporal + Clone + Debug + Default, S: Storage>(
87	src: &TemporalContainer<T, S>,
88	bump: &'bump BumpAlloc,
89) -> TemporalContainer<T, Bump<'bump>> {
90	TemporalContainer::from_parts(vec_to_bump::<T, S>(src.data(), bump))
91}
92
93fn uuid_to_cow<T: IsUuid + Clone + Debug + Default, S: Storage>(src: &UuidContainer<T, S>) -> UuidContainer<T, Cow> {
94	UuidContainer::from_parts(vec_to_cow::<T, S>(src.data()))
95}
96
97fn uuid_to_bump<'bump, T: IsUuid + Clone + Debug + Default, S: Storage>(
98	src: &UuidContainer<T, S>,
99	bump: &'bump BumpAlloc,
100) -> UuidContainer<T, Bump<'bump>> {
101	UuidContainer::from_parts(vec_to_bump::<T, S>(src.data(), bump))
102}
103
104fn utf8_to_cow<S: Storage>(src: &Utf8Container<S>) -> Utf8Container<Cow> {
105	// Copy bytes + offsets into Cow storage. Layout is preserved exactly,
106	// so reading the result via the FFI marshal path is still zero-copy
107	// against the Cow buffers.
108	let data = vec_to_cow::<u8, S>(src.data_storage());
109	let offsets = vec_to_cow::<u64, S>(src.offsets_storage());
110	Utf8Container::from_storage_parts(data, offsets)
111}
112
113fn utf8_to_bump<'bump, S: Storage>(src: &Utf8Container<S>, bump: &'bump BumpAlloc) -> Utf8Container<Bump<'bump>> {
114	// Bump-storage variant: copy bytes + offsets into bump-allocated vecs.
115	let data = vec_to_bump::<u8, S>(src.data_storage(), bump);
116	let offsets = vec_to_bump::<u64, S>(src.offsets_storage(), bump);
117	Utf8Container::from_storage_parts(data, offsets)
118}
119
120fn blob_to_cow<S: Storage>(src: &BlobContainer<S>) -> BlobContainer<Cow> {
121	let data = vec_to_cow::<u8, S>(src.data_storage());
122	let offsets = vec_to_cow::<u64, S>(src.offsets_storage());
123	BlobContainer::from_storage_parts(data, offsets)
124}
125
126fn blob_to_bump<'bump, S: Storage>(src: &BlobContainer<S>, bump: &'bump BumpAlloc) -> BlobContainer<Bump<'bump>> {
127	let data = vec_to_bump::<u8, S>(src.data_storage(), bump);
128	let offsets = vec_to_bump::<u64, S>(src.offsets_storage(), bump);
129	BlobContainer::from_storage_parts(data, offsets)
130}
131
132fn identity_id_to_cow<S: Storage>(src: &IdentityIdContainer<S>) -> IdentityIdContainer<Cow> {
133	IdentityIdContainer::from_parts(vec_to_cow::<IdentityId, S>(src.data()))
134}
135
136fn identity_id_to_bump<'bump, S: Storage>(
137	src: &IdentityIdContainer<S>,
138	bump: &'bump BumpAlloc,
139) -> IdentityIdContainer<Bump<'bump>> {
140	IdentityIdContainer::from_parts(vec_to_bump::<IdentityId, S>(src.data(), bump))
141}
142
143fn any_to_cow<S: Storage>(src: &AnyContainer<S>) -> AnyContainer<Cow> {
144	AnyContainer::from_parts(vec_to_cow::<Box<Value>, S>(src.data()))
145}
146
147fn any_to_bump<'bump, S: Storage>(src: &AnyContainer<S>, bump: &'bump BumpAlloc) -> AnyContainer<Bump<'bump>> {
148	AnyContainer::from_parts(vec_to_bump::<Box<Value>, S>(src.data(), bump))
149}
150
151fn dictionary_to_cow<S: Storage>(src: &DictionaryContainer<S>) -> DictionaryContainer<Cow> {
152	DictionaryContainer::from_parts(vec_to_cow::<DictionaryEntryId, S>(src.data()), src.dictionary_id())
153}
154
155fn dictionary_to_bump<'bump, S: Storage>(
156	src: &DictionaryContainer<S>,
157	bump: &'bump BumpAlloc,
158) -> DictionaryContainer<Bump<'bump>> {
159	DictionaryContainer::from_parts(vec_to_bump::<DictionaryEntryId, S>(src.data(), bump), src.dictionary_id())
160}
161
162pub fn column_data_to_cow<S: Storage>(src: &ColumnBuffer<S>) -> ColumnBuffer<Cow> {
163	match src {
164		ColumnBuffer::Bool(c) => ColumnBuffer::Bool(bool_to_cow(c)),
165		ColumnBuffer::Float4(c) => ColumnBuffer::Float4(number_to_cow(c)),
166		ColumnBuffer::Float8(c) => ColumnBuffer::Float8(number_to_cow(c)),
167		ColumnBuffer::Int1(c) => ColumnBuffer::Int1(number_to_cow(c)),
168		ColumnBuffer::Int2(c) => ColumnBuffer::Int2(number_to_cow(c)),
169		ColumnBuffer::Int4(c) => ColumnBuffer::Int4(number_to_cow(c)),
170		ColumnBuffer::Int8(c) => ColumnBuffer::Int8(number_to_cow(c)),
171		ColumnBuffer::Int16(c) => ColumnBuffer::Int16(number_to_cow(c)),
172		ColumnBuffer::Uint1(c) => ColumnBuffer::Uint1(number_to_cow(c)),
173		ColumnBuffer::Uint2(c) => ColumnBuffer::Uint2(number_to_cow(c)),
174		ColumnBuffer::Uint4(c) => ColumnBuffer::Uint4(number_to_cow(c)),
175		ColumnBuffer::Uint8(c) => ColumnBuffer::Uint8(number_to_cow(c)),
176		ColumnBuffer::Uint16(c) => ColumnBuffer::Uint16(number_to_cow(c)),
177		ColumnBuffer::Utf8 {
178			container,
179			max_bytes,
180		} => ColumnBuffer::Utf8 {
181			container: utf8_to_cow(container),
182			max_bytes: *max_bytes,
183		},
184		ColumnBuffer::Date(c) => ColumnBuffer::Date(temporal_to_cow(c)),
185		ColumnBuffer::DateTime(c) => ColumnBuffer::DateTime(temporal_to_cow(c)),
186		ColumnBuffer::Time(c) => ColumnBuffer::Time(temporal_to_cow(c)),
187		ColumnBuffer::Duration(c) => ColumnBuffer::Duration(temporal_to_cow(c)),
188		ColumnBuffer::IdentityId(c) => ColumnBuffer::IdentityId(identity_id_to_cow(c)),
189		ColumnBuffer::Uuid4(c) => ColumnBuffer::Uuid4(uuid_to_cow(c)),
190		ColumnBuffer::Uuid7(c) => ColumnBuffer::Uuid7(uuid_to_cow(c)),
191		ColumnBuffer::Blob {
192			container,
193			max_bytes,
194		} => ColumnBuffer::Blob {
195			container: blob_to_cow(container),
196			max_bytes: *max_bytes,
197		},
198		ColumnBuffer::Int {
199			container,
200			max_bytes,
201		} => ColumnBuffer::Int {
202			container: number_to_cow(container),
203			max_bytes: *max_bytes,
204		},
205		ColumnBuffer::Uint {
206			container,
207			max_bytes,
208		} => ColumnBuffer::Uint {
209			container: number_to_cow(container),
210			max_bytes: *max_bytes,
211		},
212		ColumnBuffer::Decimal {
213			container,
214			precision,
215			scale,
216		} => ColumnBuffer::Decimal {
217			container: number_to_cow(container),
218			precision: *precision,
219			scale: *scale,
220		},
221		ColumnBuffer::Any(c) => ColumnBuffer::Any(any_to_cow(c)),
222		ColumnBuffer::DictionaryId(c) => ColumnBuffer::DictionaryId(dictionary_to_cow(c)),
223		ColumnBuffer::Option {
224			inner,
225			bitvec,
226		} => ColumnBuffer::Option {
227			inner: Box::new(column_data_to_cow(inner)),
228			bitvec: bitvec_to_cow::<S>(bitvec),
229		},
230	}
231}
232
233pub fn column_data_to_bump<'bump, S: Storage>(
234	src: &ColumnBuffer<S>,
235	bump: &'bump BumpAlloc,
236) -> ColumnBuffer<Bump<'bump>> {
237	match src {
238		ColumnBuffer::Bool(c) => ColumnBuffer::Bool(bool_to_bump(c, bump)),
239		ColumnBuffer::Float4(c) => ColumnBuffer::Float4(number_to_bump(c, bump)),
240		ColumnBuffer::Float8(c) => ColumnBuffer::Float8(number_to_bump(c, bump)),
241		ColumnBuffer::Int1(c) => ColumnBuffer::Int1(number_to_bump(c, bump)),
242		ColumnBuffer::Int2(c) => ColumnBuffer::Int2(number_to_bump(c, bump)),
243		ColumnBuffer::Int4(c) => ColumnBuffer::Int4(number_to_bump(c, bump)),
244		ColumnBuffer::Int8(c) => ColumnBuffer::Int8(number_to_bump(c, bump)),
245		ColumnBuffer::Int16(c) => ColumnBuffer::Int16(number_to_bump(c, bump)),
246		ColumnBuffer::Uint1(c) => ColumnBuffer::Uint1(number_to_bump(c, bump)),
247		ColumnBuffer::Uint2(c) => ColumnBuffer::Uint2(number_to_bump(c, bump)),
248		ColumnBuffer::Uint4(c) => ColumnBuffer::Uint4(number_to_bump(c, bump)),
249		ColumnBuffer::Uint8(c) => ColumnBuffer::Uint8(number_to_bump(c, bump)),
250		ColumnBuffer::Uint16(c) => ColumnBuffer::Uint16(number_to_bump(c, bump)),
251		ColumnBuffer::Utf8 {
252			container,
253			max_bytes,
254		} => ColumnBuffer::Utf8 {
255			container: utf8_to_bump(container, bump),
256			max_bytes: *max_bytes,
257		},
258		ColumnBuffer::Date(c) => ColumnBuffer::Date(temporal_to_bump(c, bump)),
259		ColumnBuffer::DateTime(c) => ColumnBuffer::DateTime(temporal_to_bump(c, bump)),
260		ColumnBuffer::Time(c) => ColumnBuffer::Time(temporal_to_bump(c, bump)),
261		ColumnBuffer::Duration(c) => ColumnBuffer::Duration(temporal_to_bump(c, bump)),
262		ColumnBuffer::IdentityId(c) => ColumnBuffer::IdentityId(identity_id_to_bump(c, bump)),
263		ColumnBuffer::Uuid4(c) => ColumnBuffer::Uuid4(uuid_to_bump(c, bump)),
264		ColumnBuffer::Uuid7(c) => ColumnBuffer::Uuid7(uuid_to_bump(c, bump)),
265		ColumnBuffer::Blob {
266			container,
267			max_bytes,
268		} => ColumnBuffer::Blob {
269			container: blob_to_bump(container, bump),
270			max_bytes: *max_bytes,
271		},
272		ColumnBuffer::Int {
273			container,
274			max_bytes,
275		} => ColumnBuffer::Int {
276			container: number_to_bump(container, bump),
277			max_bytes: *max_bytes,
278		},
279		ColumnBuffer::Uint {
280			container,
281			max_bytes,
282		} => ColumnBuffer::Uint {
283			container: number_to_bump(container, bump),
284			max_bytes: *max_bytes,
285		},
286		ColumnBuffer::Decimal {
287			container,
288			precision,
289			scale,
290		} => ColumnBuffer::Decimal {
291			container: number_to_bump(container, bump),
292			precision: *precision,
293			scale: *scale,
294		},
295		ColumnBuffer::Any(c) => ColumnBuffer::Any(any_to_bump(c, bump)),
296		ColumnBuffer::DictionaryId(c) => ColumnBuffer::DictionaryId(dictionary_to_bump(c, bump)),
297		ColumnBuffer::Option {
298			inner,
299			bitvec,
300		} => ColumnBuffer::Option {
301			inner: Box::new(column_data_to_bump(inner, bump)),
302			bitvec: bitvec_to_bump::<S>(bitvec, bump),
303		},
304	}
305}
306
307pub fn column_to_cow(src: &ColumnWithName) -> ColumnWithName {
308	ColumnWithName::new(src.name().clone(), column_data_to_cow::<Cow>(src.data()))
309}
310
311pub fn column_to_bump(src: &ColumnWithName, _bump: &BumpAlloc) -> ColumnWithName {
312	// Column no longer carries a storage generic; this helper stays
313	// as a Cow-returning alias during the Phase 6 migration.
314	ColumnWithName::new(src.name().clone(), column_data_to_cow::<Cow>(src.data()))
315}
316
317#[cfg(test)]
318mod tests {
319	use reifydb_core::value::column::ColumnWithName;
320	use reifydb_type::value::r#type::Type;
321
322	use super::*;
323
324	#[test]
325	fn test_column_data_cow_roundtrip() {
326		let original = ColumnBuffer::int4(vec![10, 20, 30]);
327		let bump_alloc = BumpAlloc::new();
328
329		// Cow -> Bump
330		let bump_data = column_data_to_bump::<Cow>(&original, &bump_alloc);
331		assert_eq!(bump_data.len(), 3);
332
333		// Bump -> Cow
334		let cow_data = column_data_to_cow::<Bump>(&bump_data);
335		assert_eq!(cow_data, original);
336	}
337
338	#[test]
339	fn test_column_data_bool_roundtrip() {
340		let original = ColumnBuffer::bool(vec![true, false, true]);
341		let bump_alloc = BumpAlloc::new();
342
343		let bump_data = column_data_to_bump::<Cow>(&original, &bump_alloc);
344		let cow_data = column_data_to_cow::<Bump>(&bump_data);
345		assert_eq!(cow_data, original);
346	}
347
348	#[test]
349	fn test_column_data_utf8_roundtrip() {
350		let original = ColumnBuffer::utf8(vec![String::from("hello"), String::from("world")]);
351		let bump_alloc = BumpAlloc::new();
352
353		let bump_data = column_data_to_bump::<Cow>(&original, &bump_alloc);
354		let cow_data = column_data_to_cow::<Bump>(&bump_data);
355		assert_eq!(cow_data, original);
356	}
357
358	#[test]
359	fn test_column_data_float8_roundtrip() {
360		let original = ColumnBuffer::float8(vec![1.5, 2.7, 3.9]);
361		let bump_alloc = BumpAlloc::new();
362
363		let bump_data = column_data_to_bump::<Cow>(&original, &bump_alloc);
364		let cow_data = column_data_to_cow::<Bump>(&bump_data);
365		assert_eq!(cow_data, original);
366	}
367
368	#[test]
369	fn test_column_data_none_roundtrip() {
370		let original = ColumnBuffer::none_typed(Type::Boolean, 5);
371		let bump_alloc = BumpAlloc::new();
372
373		let bump_data = column_data_to_bump::<Cow>(&original, &bump_alloc);
374		let cow_data = column_data_to_cow::<Bump>(&bump_data);
375		assert_eq!(cow_data, original);
376	}
377
378	#[test]
379	fn test_column_roundtrip() {
380		let original = ColumnWithName::int4("age", vec![25, 30, 35]);
381		let bump_alloc = BumpAlloc::new();
382
383		let bump_col = column_to_bump(&original, &bump_alloc);
384		let cow_col = column_to_cow(&bump_col);
385		assert_eq!(cow_col, original);
386	}
387}