Skip to main content

reifydb_type/value/container/
utf8.rs

1// SPDX-License-Identifier: MIT
2// Copyright (c) 2025 ReifyDB
3
4use std::{
5	fmt::{self, Debug},
6	ops::Deref,
7	result::Result as StdResult,
8};
9
10use serde::{Deserialize, Deserializer, Serialize, Serializer};
11
12use crate::{
13	Result,
14	storage::{Cow, DataBitVec, DataVec, Storage},
15	util::cowvec::CowVec,
16	value::{Value, r#type::Type},
17};
18
19pub struct Utf8Container<S: Storage = Cow> {
20	data: S::Vec<String>,
21}
22
23impl<S: Storage> Clone for Utf8Container<S> {
24	fn clone(&self) -> Self {
25		Self {
26			data: self.data.clone(),
27		}
28	}
29}
30
31impl<S: Storage> Debug for Utf8Container<S>
32where
33	S::Vec<String>: Debug,
34{
35	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
36		f.debug_struct("Utf8Container").field("data", &self.data).finish()
37	}
38}
39
40impl<S: Storage> PartialEq for Utf8Container<S>
41where
42	S::Vec<String>: PartialEq,
43{
44	fn eq(&self, other: &Self) -> bool {
45		self.data == other.data
46	}
47}
48
49impl Serialize for Utf8Container<Cow> {
50	fn serialize<Ser: Serializer>(&self, serializer: Ser) -> StdResult<Ser::Ok, Ser::Error> {
51		#[derive(Serialize)]
52		struct Helper<'a> {
53			data: &'a CowVec<String>,
54		}
55		Helper {
56			data: &self.data,
57		}
58		.serialize(serializer)
59	}
60}
61
62impl<'de> Deserialize<'de> for Utf8Container<Cow> {
63	fn deserialize<D: Deserializer<'de>>(deserializer: D) -> StdResult<Self, D::Error> {
64		#[derive(Deserialize)]
65		struct Helper {
66			data: CowVec<String>,
67		}
68		let h = Helper::deserialize(deserializer)?;
69		Ok(Utf8Container {
70			data: h.data,
71		})
72	}
73}
74
75impl<S: Storage> Deref for Utf8Container<S> {
76	type Target = [String];
77
78	fn deref(&self) -> &Self::Target {
79		self.data.as_slice()
80	}
81}
82
83impl Utf8Container<Cow> {
84	pub fn new(data: Vec<String>) -> Self {
85		Self {
86			data: CowVec::new(data),
87		}
88	}
89
90	pub fn with_capacity(capacity: usize) -> Self {
91		Self {
92			data: CowVec::with_capacity(capacity),
93		}
94	}
95
96	/// Reconstruct from raw parts previously obtained via `try_into_raw_parts`.
97	pub fn from_raw_parts(data: Vec<String>) -> Self {
98		Self {
99			data: CowVec::new(data),
100		}
101	}
102
103	/// Try to decompose into raw Vec for recycling.
104	/// Returns `None` if the inner storage is shared.
105	pub fn try_into_raw_parts(self) -> Option<Vec<String>> {
106		match self.data.try_into_vec() {
107			Ok(v) => Some(v),
108			Err(_) => None,
109		}
110	}
111
112	pub fn from_vec(data: Vec<String>) -> Self {
113		Self {
114			data: CowVec::new(data),
115		}
116	}
117}
118
119impl<S: Storage> Utf8Container<S> {
120	pub fn from_parts(data: S::Vec<String>) -> Self {
121		Self {
122			data,
123		}
124	}
125
126	pub fn len(&self) -> usize {
127		DataVec::len(&self.data)
128	}
129
130	pub fn capacity(&self) -> usize {
131		DataVec::capacity(&self.data)
132	}
133
134	pub fn is_empty(&self) -> bool {
135		DataVec::is_empty(&self.data)
136	}
137
138	pub fn clear(&mut self) {
139		DataVec::clear(&mut self.data);
140	}
141
142	pub fn push(&mut self, value: String) {
143		DataVec::push(&mut self.data, value);
144	}
145
146	pub fn push_default(&mut self) {
147		DataVec::push(&mut self.data, String::new());
148	}
149
150	pub fn get(&self, index: usize) -> Option<&String> {
151		if index < self.len() {
152			DataVec::get(&self.data, index)
153		} else {
154			None
155		}
156	}
157
158	pub fn is_defined(&self, idx: usize) -> bool {
159		idx < self.len()
160	}
161
162	pub fn is_fully_defined(&self) -> bool {
163		true
164	}
165
166	pub fn data(&self) -> &S::Vec<String> {
167		&self.data
168	}
169
170	pub fn data_mut(&mut self) -> &mut S::Vec<String> {
171		&mut self.data
172	}
173
174	pub fn as_string(&self, index: usize) -> String {
175		if index < self.len() {
176			self.data[index].clone()
177		} else {
178			"none".to_string()
179		}
180	}
181
182	pub fn get_value(&self, index: usize) -> Value {
183		if index < self.len() {
184			Value::Utf8(self.data[index].clone())
185		} else {
186			Value::none_of(Type::Utf8)
187		}
188	}
189
190	pub fn extend(&mut self, other: &Self) -> Result<()> {
191		DataVec::extend_iter(&mut self.data, other.data.iter().cloned());
192		Ok(())
193	}
194
195	pub fn iter(&self) -> impl Iterator<Item = Option<&String>> + '_ {
196		self.data.iter().map(|v| Some(v))
197	}
198
199	pub fn slice(&self, start: usize, end: usize) -> Self {
200		let count = (end - start).min(self.len().saturating_sub(start));
201		let mut new_data = DataVec::spawn(&self.data, count);
202		for i in start..(start + count) {
203			DataVec::push(&mut new_data, self.data[i].clone());
204		}
205		Self {
206			data: new_data,
207		}
208	}
209
210	pub fn filter(&mut self, mask: &S::BitVec) {
211		let mut new_data = DataVec::spawn(&self.data, DataBitVec::count_ones(mask));
212
213		for (i, keep) in DataBitVec::iter(mask).enumerate() {
214			if keep && i < self.len() {
215				DataVec::push(&mut new_data, self.data[i].clone());
216			}
217		}
218
219		self.data = new_data;
220	}
221
222	pub fn reorder(&mut self, indices: &[usize]) {
223		let mut new_data = DataVec::spawn(&self.data, indices.len());
224
225		for &idx in indices {
226			if idx < self.len() {
227				DataVec::push(&mut new_data, self.data[idx].clone());
228			} else {
229				DataVec::push(&mut new_data, String::new());
230			}
231		}
232
233		self.data = new_data;
234	}
235
236	pub fn take(&self, num: usize) -> Self {
237		Self {
238			data: DataVec::take(&self.data, num),
239		}
240	}
241}
242
243impl Default for Utf8Container<Cow> {
244	fn default() -> Self {
245		Self::with_capacity(0)
246	}
247}
248
249#[cfg(test)]
250pub mod tests {
251	use super::*;
252	use crate::util::bitvec::BitVec;
253
254	#[test]
255	fn test_new() {
256		let data = vec!["hello".to_string(), "world".to_string(), "test".to_string()];
257		let container = Utf8Container::new(data.clone());
258
259		assert_eq!(container.len(), 3);
260		assert_eq!(container.get(0), Some(&"hello".to_string()));
261		assert_eq!(container.get(1), Some(&"world".to_string()));
262		assert_eq!(container.get(2), Some(&"test".to_string()));
263	}
264
265	#[test]
266	fn test_from_vec() {
267		let data = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
268		let container = Utf8Container::from_vec(data);
269
270		assert_eq!(container.len(), 3);
271		assert_eq!(container.get(0), Some(&"foo".to_string()));
272		assert_eq!(container.get(1), Some(&"bar".to_string()));
273		assert_eq!(container.get(2), Some(&"baz".to_string()));
274
275		// All should be defined
276		for i in 0..3 {
277			assert!(container.is_defined(i));
278		}
279	}
280
281	#[test]
282	fn test_with_capacity() {
283		let container = Utf8Container::with_capacity(10);
284		assert_eq!(container.len(), 0);
285		assert!(container.is_empty());
286		assert!(container.capacity() >= 10);
287	}
288
289	#[test]
290	fn test_push() {
291		let mut container = Utf8Container::with_capacity(3);
292
293		container.push("first".to_string());
294		container.push("second".to_string());
295		container.push_default();
296
297		assert_eq!(container.len(), 3);
298		assert_eq!(container.get(0), Some(&"first".to_string()));
299		assert_eq!(container.get(1), Some(&"second".to_string()));
300		assert_eq!(container.get(2), Some(&"".to_string())); // push_default pushes default
301
302		assert!(container.is_defined(0));
303		assert!(container.is_defined(1));
304		assert!(container.is_defined(2));
305	}
306
307	#[test]
308	fn test_extend() {
309		let mut container1 = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
310		let container2 = Utf8Container::from_vec(vec!["c".to_string(), "d".to_string()]);
311
312		container1.extend(&container2).unwrap();
313
314		assert_eq!(container1.len(), 4);
315		assert_eq!(container1.get(0), Some(&"a".to_string()));
316		assert_eq!(container1.get(1), Some(&"b".to_string()));
317		assert_eq!(container1.get(2), Some(&"c".to_string()));
318		assert_eq!(container1.get(3), Some(&"d".to_string()));
319	}
320
321	#[test]
322	fn test_iter() {
323		let data = vec!["x".to_string(), "y".to_string(), "z".to_string()];
324		let container = Utf8Container::new(data);
325
326		let collected: Vec<Option<&String>> = container.iter().collect();
327		assert_eq!(collected, vec![Some(&"x".to_string()), Some(&"y".to_string()), Some(&"z".to_string())]);
328	}
329
330	#[test]
331	fn test_slice() {
332		let container = Utf8Container::from_vec(vec![
333			"one".to_string(),
334			"two".to_string(),
335			"three".to_string(),
336			"four".to_string(),
337		]);
338		let sliced = container.slice(1, 3);
339
340		assert_eq!(sliced.len(), 2);
341		assert_eq!(sliced.get(0), Some(&"two".to_string()));
342		assert_eq!(sliced.get(1), Some(&"three".to_string()));
343	}
344
345	#[test]
346	fn test_filter() {
347		let mut container = Utf8Container::from_vec(vec![
348			"keep".to_string(),
349			"drop".to_string(),
350			"keep".to_string(),
351			"drop".to_string(),
352		]);
353		let mask = BitVec::from_slice(&[true, false, true, false]);
354
355		container.filter(&mask);
356
357		assert_eq!(container.len(), 2);
358		assert_eq!(container.get(0), Some(&"keep".to_string()));
359		assert_eq!(container.get(1), Some(&"keep".to_string()));
360	}
361
362	#[test]
363	fn test_reorder() {
364		let mut container =
365			Utf8Container::from_vec(vec!["first".to_string(), "second".to_string(), "third".to_string()]);
366		let indices = [2, 0, 1];
367
368		container.reorder(&indices);
369
370		assert_eq!(container.len(), 3);
371		assert_eq!(container.get(0), Some(&"third".to_string())); // was index 2
372		assert_eq!(container.get(1), Some(&"first".to_string())); // was index 0
373		assert_eq!(container.get(2), Some(&"second".to_string())); // was index 1
374	}
375
376	#[test]
377	fn test_reorder_with_out_of_bounds() {
378		let mut container = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
379		let indices = [1, 5, 0]; // index 5 is out of bounds
380
381		container.reorder(&indices);
382
383		assert_eq!(container.len(), 3);
384		assert_eq!(container.get(0), Some(&"b".to_string())); // was index 1
385		assert_eq!(container.get(1), Some(&"".to_string())); // out of bounds -> default
386		assert_eq!(container.get(2), Some(&"a".to_string())); // was index 0
387	}
388
389	#[test]
390	fn test_empty_strings() {
391		let mut container = Utf8Container::with_capacity(2);
392		container.push("".to_string()); // empty string
393		container.push_default();
394
395		assert_eq!(container.len(), 2);
396		assert_eq!(container.get(0), Some(&"".to_string()));
397		assert_eq!(container.get(1), Some(&"".to_string()));
398
399		assert!(container.is_defined(0));
400		assert!(container.is_defined(1));
401	}
402
403	#[test]
404	fn test_default() {
405		let container = Utf8Container::default();
406		assert_eq!(container.len(), 0);
407		assert!(container.is_empty());
408	}
409}