Skip to main content

reifydb_type/value/container/
utf8.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2025 ReifyDB
3
4use std::{
5	fmt::{self, Debug},
6	ops::Deref,
7	result::Result as StdResult,
8};
9
10use serde::{Deserialize, Deserializer, Serialize, Serializer};
11
12use crate::{
13	Result,
14	storage::{Cow, DataBitVec, DataVec, Storage},
15	util::cowvec::CowVec,
16	value::{Value, r#type::Type},
17};
18
19pub struct Utf8Container<S: Storage = Cow> {
20	data: S::Vec<String>,
21}
22
23impl<S: Storage> Clone for Utf8Container<S> {
24	fn clone(&self) -> Self {
25		Self {
26			data: self.data.clone(),
27		}
28	}
29}
30
31impl<S: Storage> Debug for Utf8Container<S>
32where
33	S::Vec<String>: Debug,
34{
35	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
36		f.debug_struct("Utf8Container").field("data", &self.data).finish()
37	}
38}
39
40impl<S: Storage> PartialEq for Utf8Container<S>
41where
42	S::Vec<String>: PartialEq,
43{
44	fn eq(&self, other: &Self) -> bool {
45		self.data == other.data
46	}
47}
48
49impl Serialize for Utf8Container<Cow> {
50	fn serialize<Ser: Serializer>(&self, serializer: Ser) -> StdResult<Ser::Ok, Ser::Error> {
51		#[derive(Serialize)]
52		struct Helper<'a> {
53			data: &'a CowVec<String>,
54		}
55		Helper {
56			data: &self.data,
57		}
58		.serialize(serializer)
59	}
60}
61
62impl<'de> Deserialize<'de> for Utf8Container<Cow> {
63	fn deserialize<D: Deserializer<'de>>(deserializer: D) -> StdResult<Self, D::Error> {
64		#[derive(Deserialize)]
65		struct Helper {
66			data: CowVec<String>,
67		}
68		let h = Helper::deserialize(deserializer)?;
69		Ok(Utf8Container {
70			data: h.data,
71		})
72	}
73}
74
75impl<S: Storage> Deref for Utf8Container<S> {
76	type Target = [String];
77
78	fn deref(&self) -> &Self::Target {
79		self.data.as_slice()
80	}
81}
82
83impl Utf8Container<Cow> {
84	pub fn new(data: Vec<String>) -> Self {
85		Self {
86			data: CowVec::new(data),
87		}
88	}
89
90	pub fn with_capacity(capacity: usize) -> Self {
91		Self {
92			data: CowVec::with_capacity(capacity),
93		}
94	}
95
96	/// Reconstruct from raw parts previously obtained via `try_into_raw_parts`.
97	pub fn from_raw_parts(data: Vec<String>) -> Self {
98		Self {
99			data: CowVec::new(data),
100		}
101	}
102
103	/// Try to decompose into raw Vec for recycling.
104	/// Returns `None` if the inner storage is shared.
105	pub fn try_into_raw_parts(self) -> Option<Vec<String>> {
106		self.data.try_into_vec().ok()
107	}
108
109	pub fn from_vec(data: Vec<String>) -> Self {
110		Self {
111			data: CowVec::new(data),
112		}
113	}
114}
115
116impl<S: Storage> Utf8Container<S> {
117	pub fn from_parts(data: S::Vec<String>) -> Self {
118		Self {
119			data,
120		}
121	}
122
123	pub fn len(&self) -> usize {
124		DataVec::len(&self.data)
125	}
126
127	pub fn capacity(&self) -> usize {
128		DataVec::capacity(&self.data)
129	}
130
131	pub fn is_empty(&self) -> bool {
132		DataVec::is_empty(&self.data)
133	}
134
135	pub fn clear(&mut self) {
136		DataVec::clear(&mut self.data);
137	}
138
139	pub fn push(&mut self, value: String) {
140		DataVec::push(&mut self.data, value);
141	}
142
143	pub fn push_default(&mut self) {
144		DataVec::push(&mut self.data, String::new());
145	}
146
147	pub fn get(&self, index: usize) -> Option<&String> {
148		if index < self.len() {
149			DataVec::get(&self.data, index)
150		} else {
151			None
152		}
153	}
154
155	pub fn is_defined(&self, idx: usize) -> bool {
156		idx < self.len()
157	}
158
159	pub fn is_fully_defined(&self) -> bool {
160		true
161	}
162
163	pub fn data(&self) -> &S::Vec<String> {
164		&self.data
165	}
166
167	pub fn data_mut(&mut self) -> &mut S::Vec<String> {
168		&mut self.data
169	}
170
171	pub fn as_string(&self, index: usize) -> String {
172		if index < self.len() {
173			self.data[index].clone()
174		} else {
175			"none".to_string()
176		}
177	}
178
179	pub fn get_value(&self, index: usize) -> Value {
180		if index < self.len() {
181			Value::Utf8(self.data[index].clone())
182		} else {
183			Value::none_of(Type::Utf8)
184		}
185	}
186
187	pub fn extend(&mut self, other: &Self) -> Result<()> {
188		DataVec::extend_iter(&mut self.data, other.data.iter().cloned());
189		Ok(())
190	}
191
192	pub fn iter(&self) -> impl Iterator<Item = Option<&String>> + '_ {
193		self.data.iter().map(Some)
194	}
195
196	pub fn slice(&self, start: usize, end: usize) -> Self {
197		let count = (end - start).min(self.len().saturating_sub(start));
198		let mut new_data = DataVec::spawn(&self.data, count);
199		for i in start..(start + count) {
200			DataVec::push(&mut new_data, self.data[i].clone());
201		}
202		Self {
203			data: new_data,
204		}
205	}
206
207	pub fn filter(&mut self, mask: &S::BitVec) {
208		let mut new_data = DataVec::spawn(&self.data, DataBitVec::count_ones(mask));
209
210		for (i, keep) in DataBitVec::iter(mask).enumerate() {
211			if keep && i < self.len() {
212				DataVec::push(&mut new_data, self.data[i].clone());
213			}
214		}
215
216		self.data = new_data;
217	}
218
219	pub fn reorder(&mut self, indices: &[usize]) {
220		let mut new_data = DataVec::spawn(&self.data, indices.len());
221
222		for &idx in indices {
223			if idx < self.len() {
224				DataVec::push(&mut new_data, self.data[idx].clone());
225			} else {
226				DataVec::push(&mut new_data, String::new());
227			}
228		}
229
230		self.data = new_data;
231	}
232
233	pub fn take(&self, num: usize) -> Self {
234		Self {
235			data: DataVec::take(&self.data, num),
236		}
237	}
238}
239
240impl Default for Utf8Container<Cow> {
241	fn default() -> Self {
242		Self::with_capacity(0)
243	}
244}
245
246#[cfg(test)]
247pub mod tests {
248	use super::*;
249	use crate::util::bitvec::BitVec;
250
251	#[test]
252	fn test_new() {
253		let data = vec!["hello".to_string(), "world".to_string(), "test".to_string()];
254		let container = Utf8Container::new(data.clone());
255
256		assert_eq!(container.len(), 3);
257		assert_eq!(container.get(0), Some(&"hello".to_string()));
258		assert_eq!(container.get(1), Some(&"world".to_string()));
259		assert_eq!(container.get(2), Some(&"test".to_string()));
260	}
261
262	#[test]
263	fn test_from_vec() {
264		let data = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
265		let container = Utf8Container::from_vec(data);
266
267		assert_eq!(container.len(), 3);
268		assert_eq!(container.get(0), Some(&"foo".to_string()));
269		assert_eq!(container.get(1), Some(&"bar".to_string()));
270		assert_eq!(container.get(2), Some(&"baz".to_string()));
271
272		// All should be defined
273		for i in 0..3 {
274			assert!(container.is_defined(i));
275		}
276	}
277
278	#[test]
279	fn test_with_capacity() {
280		let container = Utf8Container::with_capacity(10);
281		assert_eq!(container.len(), 0);
282		assert!(container.is_empty());
283		assert!(container.capacity() >= 10);
284	}
285
286	#[test]
287	fn test_push() {
288		let mut container = Utf8Container::with_capacity(3);
289
290		container.push("first".to_string());
291		container.push("second".to_string());
292		container.push_default();
293
294		assert_eq!(container.len(), 3);
295		assert_eq!(container.get(0), Some(&"first".to_string()));
296		assert_eq!(container.get(1), Some(&"second".to_string()));
297		assert_eq!(container.get(2), Some(&"".to_string())); // push_default pushes default
298
299		assert!(container.is_defined(0));
300		assert!(container.is_defined(1));
301		assert!(container.is_defined(2));
302	}
303
304	#[test]
305	fn test_extend() {
306		let mut container1 = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
307		let container2 = Utf8Container::from_vec(vec!["c".to_string(), "d".to_string()]);
308
309		container1.extend(&container2).unwrap();
310
311		assert_eq!(container1.len(), 4);
312		assert_eq!(container1.get(0), Some(&"a".to_string()));
313		assert_eq!(container1.get(1), Some(&"b".to_string()));
314		assert_eq!(container1.get(2), Some(&"c".to_string()));
315		assert_eq!(container1.get(3), Some(&"d".to_string()));
316	}
317
318	#[test]
319	fn test_iter() {
320		let data = vec!["x".to_string(), "y".to_string(), "z".to_string()];
321		let container = Utf8Container::new(data);
322
323		let collected: Vec<Option<&String>> = container.iter().collect();
324		assert_eq!(collected, vec![Some(&"x".to_string()), Some(&"y".to_string()), Some(&"z".to_string())]);
325	}
326
327	#[test]
328	fn test_slice() {
329		let container = Utf8Container::from_vec(vec![
330			"one".to_string(),
331			"two".to_string(),
332			"three".to_string(),
333			"four".to_string(),
334		]);
335		let sliced = container.slice(1, 3);
336
337		assert_eq!(sliced.len(), 2);
338		assert_eq!(sliced.get(0), Some(&"two".to_string()));
339		assert_eq!(sliced.get(1), Some(&"three".to_string()));
340	}
341
342	#[test]
343	fn test_filter() {
344		let mut container = Utf8Container::from_vec(vec![
345			"keep".to_string(),
346			"drop".to_string(),
347			"keep".to_string(),
348			"drop".to_string(),
349		]);
350		let mask = BitVec::from_slice(&[true, false, true, false]);
351
352		container.filter(&mask);
353
354		assert_eq!(container.len(), 2);
355		assert_eq!(container.get(0), Some(&"keep".to_string()));
356		assert_eq!(container.get(1), Some(&"keep".to_string()));
357	}
358
359	#[test]
360	fn test_reorder() {
361		let mut container =
362			Utf8Container::from_vec(vec!["first".to_string(), "second".to_string(), "third".to_string()]);
363		let indices = [2, 0, 1];
364
365		container.reorder(&indices);
366
367		assert_eq!(container.len(), 3);
368		assert_eq!(container.get(0), Some(&"third".to_string())); // was index 2
369		assert_eq!(container.get(1), Some(&"first".to_string())); // was index 0
370		assert_eq!(container.get(2), Some(&"second".to_string())); // was index 1
371	}
372
373	#[test]
374	fn test_reorder_with_out_of_bounds() {
375		let mut container = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
376		let indices = [1, 5, 0]; // index 5 is out of bounds
377
378		container.reorder(&indices);
379
380		assert_eq!(container.len(), 3);
381		assert_eq!(container.get(0), Some(&"b".to_string())); // was index 1
382		assert_eq!(container.get(1), Some(&"".to_string())); // out of bounds -> default
383		assert_eq!(container.get(2), Some(&"a".to_string())); // was index 0
384	}
385
386	#[test]
387	fn test_empty_strings() {
388		let mut container = Utf8Container::with_capacity(2);
389		container.push("".to_string()); // empty string
390		container.push_default();
391
392		assert_eq!(container.len(), 2);
393		assert_eq!(container.get(0), Some(&"".to_string()));
394		assert_eq!(container.get(1), Some(&"".to_string()));
395
396		assert!(container.is_defined(0));
397		assert!(container.is_defined(1));
398	}
399
400	#[test]
401	fn testault() {
402		let container = Utf8Container::default();
403		assert_eq!(container.len(), 0);
404		assert!(container.is_empty());
405	}
406}