reifydb_core/value/container/
utf8.rs

1// Copyright (c) reifydb.com 2025
2// This file is licensed under the AGPL-3.0-or-later, see license.md file
3
4use std::ops::Deref;
5
6use reifydb_type::Value;
7use serde::{Deserialize, Serialize};
8
9use crate::{BitVec, CowVec};
10
11#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
12pub struct Utf8Container {
13	data: CowVec<String>,
14	bitvec: BitVec,
15}
16
17impl Utf8Container {
18	pub fn new(data: Vec<String>, bitvec: BitVec) -> Self {
19		debug_assert_eq!(data.len(), bitvec.len());
20		Self {
21			data: CowVec::new(data),
22			bitvec,
23		}
24	}
25
26	pub fn with_capacity(capacity: usize) -> Self {
27		Self {
28			data: CowVec::with_capacity(capacity),
29			bitvec: BitVec::with_capacity(capacity),
30		}
31	}
32
33	pub fn from_vec(data: Vec<String>) -> Self {
34		let len = data.len();
35		Self {
36			data: CowVec::new(data),
37			bitvec: BitVec::repeat(len, true),
38		}
39	}
40
41	pub fn len(&self) -> usize {
42		debug_assert_eq!(self.data.len(), self.bitvec.len());
43		self.data.len()
44	}
45
46	pub fn capacity(&self) -> usize {
47		debug_assert!(self.data.capacity() >= self.bitvec.capacity());
48		self.data.capacity().min(self.bitvec.capacity())
49	}
50
51	pub fn is_empty(&self) -> bool {
52		self.data.is_empty()
53	}
54
55	pub fn push(&mut self, value: String) {
56		self.data.push(value);
57		self.bitvec.push(true);
58	}
59
60	pub fn push_undefined(&mut self) {
61		self.data.push(String::new());
62		self.bitvec.push(false);
63	}
64
65	pub fn get(&self, index: usize) -> Option<&String> {
66		if index < self.len() && self.is_defined(index) {
67			self.data.get(index)
68		} else {
69			None
70		}
71	}
72
73	pub fn bitvec(&self) -> &BitVec {
74		&self.bitvec
75	}
76
77	pub fn bitvec_mut(&mut self) -> &mut BitVec {
78		&mut self.bitvec
79	}
80
81	pub fn is_defined(&self, idx: usize) -> bool {
82		idx < self.len() && self.bitvec.get(idx)
83	}
84
85	pub fn is_fully_defined(&self) -> bool {
86		self.bitvec.count_ones() == self.len()
87	}
88
89	pub fn data(&self) -> &CowVec<String> {
90		&self.data
91	}
92
93	pub fn data_mut(&mut self) -> &mut CowVec<String> {
94		&mut self.data
95	}
96
97	pub fn as_string(&self, index: usize) -> String {
98		if index < self.len() && self.is_defined(index) {
99			self.data[index].clone()
100		} else {
101			"Undefined".to_string()
102		}
103	}
104
105	pub fn get_value(&self, index: usize) -> Value {
106		if index < self.len() && self.is_defined(index) {
107			Value::Utf8(self.data[index].clone())
108		} else {
109			Value::Undefined
110		}
111	}
112
113	pub fn extend(&mut self, other: &Self) -> crate::Result<()> {
114		self.data.extend(other.data.iter().cloned());
115		self.bitvec.extend(&other.bitvec);
116		Ok(())
117	}
118
119	pub fn extend_from_undefined(&mut self, len: usize) {
120		self.data.extend(std::iter::repeat(String::new()).take(len));
121		self.bitvec.extend(&BitVec::repeat(len, false));
122	}
123
124	pub fn iter(&self) -> impl Iterator<Item = Option<&String>> + '_ {
125		self.data.iter().zip(self.bitvec.iter()).map(|(v, defined)| {
126			if defined {
127				Some(v)
128			} else {
129				None
130			}
131		})
132	}
133
134	pub fn slice(&self, start: usize, end: usize) -> Self {
135		let new_data: Vec<String> = self.data.iter().skip(start).take(end - start).cloned().collect();
136		let new_bitvec: Vec<bool> = self.bitvec.iter().skip(start).take(end - start).collect();
137		Self {
138			data: CowVec::new(new_data),
139			bitvec: BitVec::from_slice(&new_bitvec),
140		}
141	}
142
143	pub fn filter(&mut self, mask: &BitVec) {
144		let mut new_data = Vec::with_capacity(mask.count_ones());
145		let mut new_bitvec = BitVec::with_capacity(mask.count_ones());
146
147		for (i, keep) in mask.iter().enumerate() {
148			if keep && i < self.len() {
149				new_data.push(self.data[i].clone());
150				new_bitvec.push(self.bitvec.get(i));
151			}
152		}
153
154		self.data = CowVec::new(new_data);
155		self.bitvec = new_bitvec;
156	}
157
158	pub fn reorder(&mut self, indices: &[usize]) {
159		let mut new_data = Vec::with_capacity(indices.len());
160		let mut new_bitvec = BitVec::with_capacity(indices.len());
161
162		for &idx in indices {
163			if idx < self.len() {
164				new_data.push(self.data[idx].clone());
165				new_bitvec.push(self.bitvec.get(idx));
166			} else {
167				new_data.push(String::new());
168				new_bitvec.push(false);
169			}
170		}
171
172		self.data = CowVec::new(new_data);
173		self.bitvec = new_bitvec;
174	}
175
176	pub fn take(&self, num: usize) -> Self {
177		Self {
178			data: self.data.take(num),
179			bitvec: self.bitvec.take(num),
180		}
181	}
182}
183
184impl Deref for Utf8Container {
185	type Target = [String];
186
187	fn deref(&self) -> &Self::Target {
188		self.data.as_slice()
189	}
190}
191
192impl Default for Utf8Container {
193	fn default() -> Self {
194		Self::with_capacity(0)
195	}
196}
197
198#[cfg(test)]
199mod tests {
200	use super::*;
201	use crate::BitVec;
202
203	#[test]
204	fn test_new() {
205		let data = vec!["hello".to_string(), "world".to_string(), "test".to_string()];
206		let bitvec = BitVec::from_slice(&[true, true, true]);
207		let container = Utf8Container::new(data.clone(), bitvec);
208
209		assert_eq!(container.len(), 3);
210		assert_eq!(container.get(0), Some(&"hello".to_string()));
211		assert_eq!(container.get(1), Some(&"world".to_string()));
212		assert_eq!(container.get(2), Some(&"test".to_string()));
213	}
214
215	#[test]
216	fn test_from_vec() {
217		let data = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
218		let container = Utf8Container::from_vec(data);
219
220		assert_eq!(container.len(), 3);
221		assert_eq!(container.get(0), Some(&"foo".to_string()));
222		assert_eq!(container.get(1), Some(&"bar".to_string()));
223		assert_eq!(container.get(2), Some(&"baz".to_string()));
224
225		// All should be defined
226		for i in 0..3 {
227			assert!(container.is_defined(i));
228		}
229	}
230
231	#[test]
232	fn test_with_capacity() {
233		let container = Utf8Container::with_capacity(10);
234		assert_eq!(container.len(), 0);
235		assert!(container.is_empty());
236		assert!(container.capacity() >= 10);
237	}
238
239	#[test]
240	fn test_push() {
241		let mut container = Utf8Container::with_capacity(3);
242
243		container.push("first".to_string());
244		container.push("second".to_string());
245		container.push_undefined();
246
247		assert_eq!(container.len(), 3);
248		assert_eq!(container.get(0), Some(&"first".to_string()));
249		assert_eq!(container.get(1), Some(&"second".to_string()));
250		assert_eq!(container.get(2), None); // undefined
251
252		assert!(container.is_defined(0));
253		assert!(container.is_defined(1));
254		assert!(!container.is_defined(2));
255	}
256
257	#[test]
258	fn test_extend() {
259		let mut container1 = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
260		let container2 = Utf8Container::from_vec(vec!["c".to_string(), "d".to_string()]);
261
262		container1.extend(&container2).unwrap();
263
264		assert_eq!(container1.len(), 4);
265		assert_eq!(container1.get(0), Some(&"a".to_string()));
266		assert_eq!(container1.get(1), Some(&"b".to_string()));
267		assert_eq!(container1.get(2), Some(&"c".to_string()));
268		assert_eq!(container1.get(3), Some(&"d".to_string()));
269	}
270
271	#[test]
272	fn test_extend_from_undefined() {
273		let mut container = Utf8Container::from_vec(vec!["test".to_string()]);
274		container.extend_from_undefined(2);
275
276		assert_eq!(container.len(), 3);
277		assert_eq!(container.get(0), Some(&"test".to_string()));
278		assert_eq!(container.get(1), None); // undefined
279		assert_eq!(container.get(2), None); // undefined
280	}
281
282	#[test]
283	fn test_iter() {
284		let data = vec!["x".to_string(), "y".to_string(), "z".to_string()];
285		let bitvec = BitVec::from_slice(&[true, false, true]); // middle value undefined
286		let container = Utf8Container::new(data, bitvec);
287
288		let collected: Vec<Option<&String>> = container.iter().collect();
289		assert_eq!(collected, vec![Some(&"x".to_string()), None, Some(&"z".to_string())]);
290	}
291
292	#[test]
293	fn test_slice() {
294		let container = Utf8Container::from_vec(vec![
295			"one".to_string(),
296			"two".to_string(),
297			"three".to_string(),
298			"four".to_string(),
299		]);
300		let sliced = container.slice(1, 3);
301
302		assert_eq!(sliced.len(), 2);
303		assert_eq!(sliced.get(0), Some(&"two".to_string()));
304		assert_eq!(sliced.get(1), Some(&"three".to_string()));
305	}
306
307	#[test]
308	fn test_filter() {
309		let mut container = Utf8Container::from_vec(vec![
310			"keep".to_string(),
311			"drop".to_string(),
312			"keep".to_string(),
313			"drop".to_string(),
314		]);
315		let mask = BitVec::from_slice(&[true, false, true, false]);
316
317		container.filter(&mask);
318
319		assert_eq!(container.len(), 2);
320		assert_eq!(container.get(0), Some(&"keep".to_string()));
321		assert_eq!(container.get(1), Some(&"keep".to_string()));
322	}
323
324	#[test]
325	fn test_reorder() {
326		let mut container =
327			Utf8Container::from_vec(vec!["first".to_string(), "second".to_string(), "third".to_string()]);
328		let indices = [2, 0, 1];
329
330		container.reorder(&indices);
331
332		assert_eq!(container.len(), 3);
333		assert_eq!(container.get(0), Some(&"third".to_string())); // was index 2
334		assert_eq!(container.get(1), Some(&"first".to_string())); // was index 0
335		assert_eq!(container.get(2), Some(&"second".to_string())); // was index 1
336	}
337
338	#[test]
339	fn test_reorder_with_out_of_bounds() {
340		let mut container = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
341		let indices = [1, 5, 0]; // index 5 is out of bounds
342
343		container.reorder(&indices);
344
345		assert_eq!(container.len(), 3);
346		assert_eq!(container.get(0), Some(&"b".to_string())); // was index 1
347		assert_eq!(container.get(1), None); // out of bounds -> undefined
348		assert_eq!(container.get(2), Some(&"a".to_string())); // was index 0
349	}
350
351	#[test]
352	fn test_empty_strings() {
353		let mut container = Utf8Container::with_capacity(2);
354		container.push("".to_string()); // empty string
355		container.push_undefined();
356
357		assert_eq!(container.len(), 2);
358		assert_eq!(container.get(0), Some(&"".to_string()));
359		assert_eq!(container.get(1), None);
360
361		assert!(container.is_defined(0));
362		assert!(!container.is_defined(1));
363	}
364
365	#[test]
366	fn test_default() {
367		let container = Utf8Container::default();
368		assert_eq!(container.len(), 0);
369		assert!(container.is_empty());
370	}
371}