reifydb_type/value/container/
utf8.rs

1// Copyright (c) reifydb.com 2025
2// This file is licensed under the MIT, see license.md file
3
4use std::ops::Deref;
5
6use serde::{Deserialize, Serialize};
7
8use crate::{BitVec, CowVec, Value};
9
10#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
11pub struct Utf8Container {
12	data: CowVec<String>,
13	bitvec: BitVec,
14}
15
16impl Utf8Container {
17	pub fn new(data: Vec<String>, bitvec: BitVec) -> Self {
18		debug_assert_eq!(data.len(), bitvec.len());
19		Self {
20			data: CowVec::new(data),
21			bitvec,
22		}
23	}
24
25	pub fn with_capacity(capacity: usize) -> Self {
26		Self {
27			data: CowVec::with_capacity(capacity),
28			bitvec: BitVec::with_capacity(capacity),
29		}
30	}
31
32	pub fn from_vec(data: Vec<String>) -> Self {
33		let len = data.len();
34		Self {
35			data: CowVec::new(data),
36			bitvec: BitVec::repeat(len, true),
37		}
38	}
39
40	pub fn len(&self) -> usize {
41		debug_assert_eq!(self.data.len(), self.bitvec.len());
42		self.data.len()
43	}
44
45	pub fn capacity(&self) -> usize {
46		debug_assert!(self.data.capacity() >= self.bitvec.capacity());
47		self.data.capacity().min(self.bitvec.capacity())
48	}
49
50	pub fn is_empty(&self) -> bool {
51		self.data.is_empty()
52	}
53
54	pub fn push(&mut self, value: String) {
55		self.data.push(value);
56		self.bitvec.push(true);
57	}
58
59	pub fn push_undefined(&mut self) {
60		self.data.push(String::new());
61		self.bitvec.push(false);
62	}
63
64	pub fn get(&self, index: usize) -> Option<&String> {
65		if index < self.len() && self.is_defined(index) {
66			self.data.get(index)
67		} else {
68			None
69		}
70	}
71
72	pub fn bitvec(&self) -> &BitVec {
73		&self.bitvec
74	}
75
76	pub fn bitvec_mut(&mut self) -> &mut BitVec {
77		&mut self.bitvec
78	}
79
80	pub fn is_defined(&self, idx: usize) -> bool {
81		idx < self.len() && self.bitvec.get(idx)
82	}
83
84	pub fn is_fully_defined(&self) -> bool {
85		self.bitvec.count_ones() == self.len()
86	}
87
88	pub fn data(&self) -> &CowVec<String> {
89		&self.data
90	}
91
92	pub fn data_mut(&mut self) -> &mut CowVec<String> {
93		&mut self.data
94	}
95
96	pub fn as_string(&self, index: usize) -> String {
97		if index < self.len() && self.is_defined(index) {
98			self.data[index].clone()
99		} else {
100			"Undefined".to_string()
101		}
102	}
103
104	pub fn get_value(&self, index: usize) -> Value {
105		if index < self.len() && self.is_defined(index) {
106			Value::Utf8(self.data[index].clone())
107		} else {
108			Value::Undefined
109		}
110	}
111
112	pub fn extend(&mut self, other: &Self) -> crate::Result<()> {
113		self.data.extend(other.data.iter().cloned());
114		self.bitvec.extend(&other.bitvec);
115		Ok(())
116	}
117
118	pub fn extend_from_undefined(&mut self, len: usize) {
119		self.data.extend(std::iter::repeat(String::new()).take(len));
120		self.bitvec.extend(&BitVec::repeat(len, false));
121	}
122
123	pub fn iter(&self) -> impl Iterator<Item = Option<&String>> + '_ {
124		self.data.iter().zip(self.bitvec.iter()).map(|(v, defined)| {
125			if defined {
126				Some(v)
127			} else {
128				None
129			}
130		})
131	}
132
133	pub fn slice(&self, start: usize, end: usize) -> Self {
134		let new_data: Vec<String> = self.data.iter().skip(start).take(end - start).cloned().collect();
135		let new_bitvec: Vec<bool> = self.bitvec.iter().skip(start).take(end - start).collect();
136		Self {
137			data: CowVec::new(new_data),
138			bitvec: BitVec::from_slice(&new_bitvec),
139		}
140	}
141
142	pub fn filter(&mut self, mask: &BitVec) {
143		let mut new_data = Vec::with_capacity(mask.count_ones());
144		let mut new_bitvec = BitVec::with_capacity(mask.count_ones());
145
146		for (i, keep) in mask.iter().enumerate() {
147			if keep && i < self.len() {
148				new_data.push(self.data[i].clone());
149				new_bitvec.push(self.bitvec.get(i));
150			}
151		}
152
153		self.data = CowVec::new(new_data);
154		self.bitvec = new_bitvec;
155	}
156
157	pub fn reorder(&mut self, indices: &[usize]) {
158		let mut new_data = Vec::with_capacity(indices.len());
159		let mut new_bitvec = BitVec::with_capacity(indices.len());
160
161		for &idx in indices {
162			if idx < self.len() {
163				new_data.push(self.data[idx].clone());
164				new_bitvec.push(self.bitvec.get(idx));
165			} else {
166				new_data.push(String::new());
167				new_bitvec.push(false);
168			}
169		}
170
171		self.data = CowVec::new(new_data);
172		self.bitvec = new_bitvec;
173	}
174
175	pub fn take(&self, num: usize) -> Self {
176		Self {
177			data: self.data.take(num),
178			bitvec: self.bitvec.take(num),
179		}
180	}
181}
182
183impl Deref for Utf8Container {
184	type Target = [String];
185
186	fn deref(&self) -> &Self::Target {
187		self.data.as_slice()
188	}
189}
190
191impl Default for Utf8Container {
192	fn default() -> Self {
193		Self::with_capacity(0)
194	}
195}
196
197#[cfg(test)]
198mod tests {
199	use super::*;
200	use crate::BitVec;
201
202	#[test]
203	fn test_new() {
204		let data = vec!["hello".to_string(), "world".to_string(), "test".to_string()];
205		let bitvec = BitVec::from_slice(&[true, true, true]);
206		let container = Utf8Container::new(data.clone(), bitvec);
207
208		assert_eq!(container.len(), 3);
209		assert_eq!(container.get(0), Some(&"hello".to_string()));
210		assert_eq!(container.get(1), Some(&"world".to_string()));
211		assert_eq!(container.get(2), Some(&"test".to_string()));
212	}
213
214	#[test]
215	fn test_from_vec() {
216		let data = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
217		let container = Utf8Container::from_vec(data);
218
219		assert_eq!(container.len(), 3);
220		assert_eq!(container.get(0), Some(&"foo".to_string()));
221		assert_eq!(container.get(1), Some(&"bar".to_string()));
222		assert_eq!(container.get(2), Some(&"baz".to_string()));
223
224		// All should be defined
225		for i in 0..3 {
226			assert!(container.is_defined(i));
227		}
228	}
229
230	#[test]
231	fn test_with_capacity() {
232		let container = Utf8Container::with_capacity(10);
233		assert_eq!(container.len(), 0);
234		assert!(container.is_empty());
235		assert!(container.capacity() >= 10);
236	}
237
238	#[test]
239	fn test_push() {
240		let mut container = Utf8Container::with_capacity(3);
241
242		container.push("first".to_string());
243		container.push("second".to_string());
244		container.push_undefined();
245
246		assert_eq!(container.len(), 3);
247		assert_eq!(container.get(0), Some(&"first".to_string()));
248		assert_eq!(container.get(1), Some(&"second".to_string()));
249		assert_eq!(container.get(2), None); // undefined
250
251		assert!(container.is_defined(0));
252		assert!(container.is_defined(1));
253		assert!(!container.is_defined(2));
254	}
255
256	#[test]
257	fn test_extend() {
258		let mut container1 = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
259		let container2 = Utf8Container::from_vec(vec!["c".to_string(), "d".to_string()]);
260
261		container1.extend(&container2).unwrap();
262
263		assert_eq!(container1.len(), 4);
264		assert_eq!(container1.get(0), Some(&"a".to_string()));
265		assert_eq!(container1.get(1), Some(&"b".to_string()));
266		assert_eq!(container1.get(2), Some(&"c".to_string()));
267		assert_eq!(container1.get(3), Some(&"d".to_string()));
268	}
269
270	#[test]
271	fn test_extend_from_undefined() {
272		let mut container = Utf8Container::from_vec(vec!["test".to_string()]);
273		container.extend_from_undefined(2);
274
275		assert_eq!(container.len(), 3);
276		assert_eq!(container.get(0), Some(&"test".to_string()));
277		assert_eq!(container.get(1), None); // undefined
278		assert_eq!(container.get(2), None); // undefined
279	}
280
281	#[test]
282	fn test_iter() {
283		let data = vec!["x".to_string(), "y".to_string(), "z".to_string()];
284		let bitvec = BitVec::from_slice(&[true, false, true]); // middle value undefined
285		let container = Utf8Container::new(data, bitvec);
286
287		let collected: Vec<Option<&String>> = container.iter().collect();
288		assert_eq!(collected, vec![Some(&"x".to_string()), None, Some(&"z".to_string())]);
289	}
290
291	#[test]
292	fn test_slice() {
293		let container = Utf8Container::from_vec(vec![
294			"one".to_string(),
295			"two".to_string(),
296			"three".to_string(),
297			"four".to_string(),
298		]);
299		let sliced = container.slice(1, 3);
300
301		assert_eq!(sliced.len(), 2);
302		assert_eq!(sliced.get(0), Some(&"two".to_string()));
303		assert_eq!(sliced.get(1), Some(&"three".to_string()));
304	}
305
306	#[test]
307	fn test_filter() {
308		let mut container = Utf8Container::from_vec(vec![
309			"keep".to_string(),
310			"drop".to_string(),
311			"keep".to_string(),
312			"drop".to_string(),
313		]);
314		let mask = BitVec::from_slice(&[true, false, true, false]);
315
316		container.filter(&mask);
317
318		assert_eq!(container.len(), 2);
319		assert_eq!(container.get(0), Some(&"keep".to_string()));
320		assert_eq!(container.get(1), Some(&"keep".to_string()));
321	}
322
323	#[test]
324	fn test_reorder() {
325		let mut container =
326			Utf8Container::from_vec(vec!["first".to_string(), "second".to_string(), "third".to_string()]);
327		let indices = [2, 0, 1];
328
329		container.reorder(&indices);
330
331		assert_eq!(container.len(), 3);
332		assert_eq!(container.get(0), Some(&"third".to_string())); // was index 2
333		assert_eq!(container.get(1), Some(&"first".to_string())); // was index 0
334		assert_eq!(container.get(2), Some(&"second".to_string())); // was index 1
335	}
336
337	#[test]
338	fn test_reorder_with_out_of_bounds() {
339		let mut container = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
340		let indices = [1, 5, 0]; // index 5 is out of bounds
341
342		container.reorder(&indices);
343
344		assert_eq!(container.len(), 3);
345		assert_eq!(container.get(0), Some(&"b".to_string())); // was index 1
346		assert_eq!(container.get(1), None); // out of bounds -> undefined
347		assert_eq!(container.get(2), Some(&"a".to_string())); // was index 0
348	}
349
350	#[test]
351	fn test_empty_strings() {
352		let mut container = Utf8Container::with_capacity(2);
353		container.push("".to_string()); // empty string
354		container.push_undefined();
355
356		assert_eq!(container.len(), 2);
357		assert_eq!(container.get(0), Some(&"".to_string()));
358		assert_eq!(container.get(1), None);
359
360		assert!(container.is_defined(0));
361		assert!(!container.is_defined(1));
362	}
363
364	#[test]
365	fn test_default() {
366		let container = Utf8Container::default();
367		assert_eq!(container.len(), 0);
368		assert!(container.is_empty());
369	}
370}