Skip to main content

reifydb_type/value/container/
utf8.rs

1// SPDX-License-Identifier: MIT
2// Copyright (c) 2025 ReifyDB
3
4use std::{
5	fmt::{self, Debug},
6	ops::Deref,
7};
8
9use serde::{Deserialize, Deserializer, Serialize, Serializer};
10
11use crate::{
12	storage::{Cow, DataBitVec, DataVec, Storage},
13	util::cowvec::CowVec,
14	value::{Value, r#type::Type},
15};
16
17pub struct Utf8Container<S: Storage = Cow> {
18	data: S::Vec<String>,
19}
20
21impl<S: Storage> Clone for Utf8Container<S> {
22	fn clone(&self) -> Self {
23		Self {
24			data: self.data.clone(),
25		}
26	}
27}
28
29impl<S: Storage> Debug for Utf8Container<S>
30where
31	S::Vec<String>: Debug,
32{
33	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
34		f.debug_struct("Utf8Container").field("data", &self.data).finish()
35	}
36}
37
38impl<S: Storage> PartialEq for Utf8Container<S>
39where
40	S::Vec<String>: PartialEq,
41{
42	fn eq(&self, other: &Self) -> bool {
43		self.data == other.data
44	}
45}
46
47impl Serialize for Utf8Container<Cow> {
48	fn serialize<Ser: Serializer>(&self, serializer: Ser) -> Result<Ser::Ok, Ser::Error> {
49		#[derive(Serialize)]
50		struct Helper<'a> {
51			data: &'a CowVec<String>,
52		}
53		Helper {
54			data: &self.data,
55		}
56		.serialize(serializer)
57	}
58}
59
60impl<'de> Deserialize<'de> for Utf8Container<Cow> {
61	fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
62		#[derive(Deserialize)]
63		struct Helper {
64			data: CowVec<String>,
65		}
66		let h = Helper::deserialize(deserializer)?;
67		Ok(Utf8Container {
68			data: h.data,
69		})
70	}
71}
72
73impl<S: Storage> Deref for Utf8Container<S> {
74	type Target = [String];
75
76	fn deref(&self) -> &Self::Target {
77		self.data.as_slice()
78	}
79}
80
81impl Utf8Container<Cow> {
82	pub fn new(data: Vec<String>) -> Self {
83		Self {
84			data: CowVec::new(data),
85		}
86	}
87
88	pub fn with_capacity(capacity: usize) -> Self {
89		Self {
90			data: CowVec::with_capacity(capacity),
91		}
92	}
93
94	/// Reconstruct from raw parts previously obtained via `try_into_raw_parts`.
95	pub fn from_raw_parts(data: Vec<String>) -> Self {
96		Self {
97			data: CowVec::new(data),
98		}
99	}
100
101	/// Try to decompose into raw Vec for recycling.
102	/// Returns `None` if the inner storage is shared.
103	pub fn try_into_raw_parts(self) -> Option<Vec<String>> {
104		match self.data.try_into_vec() {
105			Ok(v) => Some(v),
106			Err(_) => None,
107		}
108	}
109
110	pub fn from_vec(data: Vec<String>) -> Self {
111		Self {
112			data: CowVec::new(data),
113		}
114	}
115}
116
117impl<S: Storage> Utf8Container<S> {
118	pub fn from_parts(data: S::Vec<String>) -> Self {
119		Self {
120			data,
121		}
122	}
123
124	pub fn len(&self) -> usize {
125		DataVec::len(&self.data)
126	}
127
128	pub fn capacity(&self) -> usize {
129		DataVec::capacity(&self.data)
130	}
131
132	pub fn is_empty(&self) -> bool {
133		DataVec::is_empty(&self.data)
134	}
135
136	pub fn clear(&mut self) {
137		DataVec::clear(&mut self.data);
138	}
139
140	pub fn push(&mut self, value: String) {
141		DataVec::push(&mut self.data, value);
142	}
143
144	pub fn push_default(&mut self) {
145		DataVec::push(&mut self.data, String::new());
146	}
147
148	pub fn get(&self, index: usize) -> Option<&String> {
149		if index < self.len() {
150			DataVec::get(&self.data, index)
151		} else {
152			None
153		}
154	}
155
156	pub fn is_defined(&self, idx: usize) -> bool {
157		idx < self.len()
158	}
159
160	pub fn is_fully_defined(&self) -> bool {
161		true
162	}
163
164	pub fn data(&self) -> &S::Vec<String> {
165		&self.data
166	}
167
168	pub fn data_mut(&mut self) -> &mut S::Vec<String> {
169		&mut self.data
170	}
171
172	pub fn as_string(&self, index: usize) -> String {
173		if index < self.len() {
174			self.data[index].clone()
175		} else {
176			"none".to_string()
177		}
178	}
179
180	pub fn get_value(&self, index: usize) -> Value {
181		if index < self.len() {
182			Value::Utf8(self.data[index].clone())
183		} else {
184			Value::none_of(Type::Utf8)
185		}
186	}
187
188	pub fn extend(&mut self, other: &Self) -> crate::Result<()> {
189		DataVec::extend_iter(&mut self.data, other.data.iter().cloned());
190		Ok(())
191	}
192
193	pub fn iter(&self) -> impl Iterator<Item = Option<&String>> + '_ {
194		self.data.iter().map(|v| Some(v))
195	}
196
197	pub fn slice(&self, start: usize, end: usize) -> Self {
198		let count = (end - start).min(self.len().saturating_sub(start));
199		let mut new_data = DataVec::spawn(&self.data, count);
200		for i in start..(start + count) {
201			DataVec::push(&mut new_data, self.data[i].clone());
202		}
203		Self {
204			data: new_data,
205		}
206	}
207
208	pub fn filter(&mut self, mask: &S::BitVec) {
209		let mut new_data = DataVec::spawn(&self.data, DataBitVec::count_ones(mask));
210
211		for (i, keep) in DataBitVec::iter(mask).enumerate() {
212			if keep && i < self.len() {
213				DataVec::push(&mut new_data, self.data[i].clone());
214			}
215		}
216
217		self.data = new_data;
218	}
219
220	pub fn reorder(&mut self, indices: &[usize]) {
221		let mut new_data = DataVec::spawn(&self.data, indices.len());
222
223		for &idx in indices {
224			if idx < self.len() {
225				DataVec::push(&mut new_data, self.data[idx].clone());
226			} else {
227				DataVec::push(&mut new_data, String::new());
228			}
229		}
230
231		self.data = new_data;
232	}
233
234	pub fn take(&self, num: usize) -> Self {
235		Self {
236			data: DataVec::take(&self.data, num),
237		}
238	}
239}
240
241impl Default for Utf8Container<Cow> {
242	fn default() -> Self {
243		Self::with_capacity(0)
244	}
245}
246
247#[cfg(test)]
248pub mod tests {
249	use super::*;
250
251	#[test]
252	fn test_new() {
253		let data = vec!["hello".to_string(), "world".to_string(), "test".to_string()];
254		let container = Utf8Container::new(data.clone());
255
256		assert_eq!(container.len(), 3);
257		assert_eq!(container.get(0), Some(&"hello".to_string()));
258		assert_eq!(container.get(1), Some(&"world".to_string()));
259		assert_eq!(container.get(2), Some(&"test".to_string()));
260	}
261
262	#[test]
263	fn test_from_vec() {
264		let data = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
265		let container = Utf8Container::from_vec(data);
266
267		assert_eq!(container.len(), 3);
268		assert_eq!(container.get(0), Some(&"foo".to_string()));
269		assert_eq!(container.get(1), Some(&"bar".to_string()));
270		assert_eq!(container.get(2), Some(&"baz".to_string()));
271
272		// All should be defined
273		for i in 0..3 {
274			assert!(container.is_defined(i));
275		}
276	}
277
278	#[test]
279	fn test_with_capacity() {
280		let container = Utf8Container::with_capacity(10);
281		assert_eq!(container.len(), 0);
282		assert!(container.is_empty());
283		assert!(container.capacity() >= 10);
284	}
285
286	#[test]
287	fn test_push() {
288		let mut container = Utf8Container::with_capacity(3);
289
290		container.push("first".to_string());
291		container.push("second".to_string());
292		container.push_default();
293
294		assert_eq!(container.len(), 3);
295		assert_eq!(container.get(0), Some(&"first".to_string()));
296		assert_eq!(container.get(1), Some(&"second".to_string()));
297		assert_eq!(container.get(2), Some(&"".to_string())); // push_default pushes default
298
299		assert!(container.is_defined(0));
300		assert!(container.is_defined(1));
301		assert!(container.is_defined(2));
302	}
303
304	#[test]
305	fn test_extend() {
306		let mut container1 = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
307		let container2 = Utf8Container::from_vec(vec!["c".to_string(), "d".to_string()]);
308
309		container1.extend(&container2).unwrap();
310
311		assert_eq!(container1.len(), 4);
312		assert_eq!(container1.get(0), Some(&"a".to_string()));
313		assert_eq!(container1.get(1), Some(&"b".to_string()));
314		assert_eq!(container1.get(2), Some(&"c".to_string()));
315		assert_eq!(container1.get(3), Some(&"d".to_string()));
316	}
317
318	#[test]
319	fn test_iter() {
320		let data = vec!["x".to_string(), "y".to_string(), "z".to_string()];
321		let container = Utf8Container::new(data);
322
323		let collected: Vec<Option<&String>> = container.iter().collect();
324		assert_eq!(collected, vec![Some(&"x".to_string()), Some(&"y".to_string()), Some(&"z".to_string())]);
325	}
326
327	#[test]
328	fn test_slice() {
329		let container = Utf8Container::from_vec(vec![
330			"one".to_string(),
331			"two".to_string(),
332			"three".to_string(),
333			"four".to_string(),
334		]);
335		let sliced = container.slice(1, 3);
336
337		assert_eq!(sliced.len(), 2);
338		assert_eq!(sliced.get(0), Some(&"two".to_string()));
339		assert_eq!(sliced.get(1), Some(&"three".to_string()));
340	}
341
342	#[test]
343	fn test_filter() {
344		use crate::util::bitvec::BitVec;
345		let mut container = Utf8Container::from_vec(vec![
346			"keep".to_string(),
347			"drop".to_string(),
348			"keep".to_string(),
349			"drop".to_string(),
350		]);
351		let mask = BitVec::from_slice(&[true, false, true, false]);
352
353		container.filter(&mask);
354
355		assert_eq!(container.len(), 2);
356		assert_eq!(container.get(0), Some(&"keep".to_string()));
357		assert_eq!(container.get(1), Some(&"keep".to_string()));
358	}
359
360	#[test]
361	fn test_reorder() {
362		let mut container =
363			Utf8Container::from_vec(vec!["first".to_string(), "second".to_string(), "third".to_string()]);
364		let indices = [2, 0, 1];
365
366		container.reorder(&indices);
367
368		assert_eq!(container.len(), 3);
369		assert_eq!(container.get(0), Some(&"third".to_string())); // was index 2
370		assert_eq!(container.get(1), Some(&"first".to_string())); // was index 0
371		assert_eq!(container.get(2), Some(&"second".to_string())); // was index 1
372	}
373
374	#[test]
375	fn test_reorder_with_out_of_bounds() {
376		let mut container = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
377		let indices = [1, 5, 0]; // index 5 is out of bounds
378
379		container.reorder(&indices);
380
381		assert_eq!(container.len(), 3);
382		assert_eq!(container.get(0), Some(&"b".to_string())); // was index 1
383		assert_eq!(container.get(1), Some(&"".to_string())); // out of bounds -> default
384		assert_eq!(container.get(2), Some(&"a".to_string())); // was index 0
385	}
386
387	#[test]
388	fn test_empty_strings() {
389		let mut container = Utf8Container::with_capacity(2);
390		container.push("".to_string()); // empty string
391		container.push_default();
392
393		assert_eq!(container.len(), 2);
394		assert_eq!(container.get(0), Some(&"".to_string()));
395		assert_eq!(container.get(1), Some(&"".to_string()));
396
397		assert!(container.is_defined(0));
398		assert!(container.is_defined(1));
399	}
400
401	#[test]
402	fn test_default() {
403		let container = Utf8Container::default();
404		assert_eq!(container.len(), 0);
405		assert!(container.is_empty());
406	}
407}