reifydb_type/value/container/
utf8.rs1use std::ops::Deref;
5
6use serde::{Deserialize, Serialize};
7
8use crate::{BitVec, CowVec, Value};
9
10#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
11pub struct Utf8Container {
12 data: CowVec<String>,
13 bitvec: BitVec,
14}
15
16impl Utf8Container {
17 pub fn new(data: Vec<String>, bitvec: BitVec) -> Self {
18 debug_assert_eq!(data.len(), bitvec.len());
19 Self {
20 data: CowVec::new(data),
21 bitvec,
22 }
23 }
24
25 pub fn with_capacity(capacity: usize) -> Self {
26 Self {
27 data: CowVec::with_capacity(capacity),
28 bitvec: BitVec::with_capacity(capacity),
29 }
30 }
31
32 pub fn from_vec(data: Vec<String>) -> Self {
33 let len = data.len();
34 Self {
35 data: CowVec::new(data),
36 bitvec: BitVec::repeat(len, true),
37 }
38 }
39
40 pub fn len(&self) -> usize {
41 debug_assert_eq!(self.data.len(), self.bitvec.len());
42 self.data.len()
43 }
44
45 pub fn capacity(&self) -> usize {
46 debug_assert!(self.data.capacity() >= self.bitvec.capacity());
47 self.data.capacity().min(self.bitvec.capacity())
48 }
49
50 pub fn is_empty(&self) -> bool {
51 self.data.is_empty()
52 }
53
54 pub fn push(&mut self, value: String) {
55 self.data.push(value);
56 self.bitvec.push(true);
57 }
58
59 pub fn push_undefined(&mut self) {
60 self.data.push(String::new());
61 self.bitvec.push(false);
62 }
63
64 pub fn get(&self, index: usize) -> Option<&String> {
65 if index < self.len() && self.is_defined(index) {
66 self.data.get(index)
67 } else {
68 None
69 }
70 }
71
72 pub fn bitvec(&self) -> &BitVec {
73 &self.bitvec
74 }
75
76 pub fn bitvec_mut(&mut self) -> &mut BitVec {
77 &mut self.bitvec
78 }
79
80 pub fn is_defined(&self, idx: usize) -> bool {
81 idx < self.len() && self.bitvec.get(idx)
82 }
83
84 pub fn is_fully_defined(&self) -> bool {
85 self.bitvec.count_ones() == self.len()
86 }
87
88 pub fn data(&self) -> &CowVec<String> {
89 &self.data
90 }
91
92 pub fn data_mut(&mut self) -> &mut CowVec<String> {
93 &mut self.data
94 }
95
96 pub fn as_string(&self, index: usize) -> String {
97 if index < self.len() && self.is_defined(index) {
98 self.data[index].clone()
99 } else {
100 "Undefined".to_string()
101 }
102 }
103
104 pub fn get_value(&self, index: usize) -> Value {
105 if index < self.len() && self.is_defined(index) {
106 Value::Utf8(self.data[index].clone())
107 } else {
108 Value::Undefined
109 }
110 }
111
112 pub fn extend(&mut self, other: &Self) -> crate::Result<()> {
113 self.data.extend(other.data.iter().cloned());
114 self.bitvec.extend(&other.bitvec);
115 Ok(())
116 }
117
118 pub fn extend_from_undefined(&mut self, len: usize) {
119 self.data.extend(std::iter::repeat(String::new()).take(len));
120 self.bitvec.extend(&BitVec::repeat(len, false));
121 }
122
123 pub fn iter(&self) -> impl Iterator<Item = Option<&String>> + '_ {
124 self.data.iter().zip(self.bitvec.iter()).map(|(v, defined)| {
125 if defined {
126 Some(v)
127 } else {
128 None
129 }
130 })
131 }
132
133 pub fn slice(&self, start: usize, end: usize) -> Self {
134 let new_data: Vec<String> = self.data.iter().skip(start).take(end - start).cloned().collect();
135 let new_bitvec: Vec<bool> = self.bitvec.iter().skip(start).take(end - start).collect();
136 Self {
137 data: CowVec::new(new_data),
138 bitvec: BitVec::from_slice(&new_bitvec),
139 }
140 }
141
142 pub fn filter(&mut self, mask: &BitVec) {
143 let mut new_data = Vec::with_capacity(mask.count_ones());
144 let mut new_bitvec = BitVec::with_capacity(mask.count_ones());
145
146 for (i, keep) in mask.iter().enumerate() {
147 if keep && i < self.len() {
148 new_data.push(self.data[i].clone());
149 new_bitvec.push(self.bitvec.get(i));
150 }
151 }
152
153 self.data = CowVec::new(new_data);
154 self.bitvec = new_bitvec;
155 }
156
157 pub fn reorder(&mut self, indices: &[usize]) {
158 let mut new_data = Vec::with_capacity(indices.len());
159 let mut new_bitvec = BitVec::with_capacity(indices.len());
160
161 for &idx in indices {
162 if idx < self.len() {
163 new_data.push(self.data[idx].clone());
164 new_bitvec.push(self.bitvec.get(idx));
165 } else {
166 new_data.push(String::new());
167 new_bitvec.push(false);
168 }
169 }
170
171 self.data = CowVec::new(new_data);
172 self.bitvec = new_bitvec;
173 }
174
175 pub fn take(&self, num: usize) -> Self {
176 Self {
177 data: self.data.take(num),
178 bitvec: self.bitvec.take(num),
179 }
180 }
181}
182
183impl Deref for Utf8Container {
184 type Target = [String];
185
186 fn deref(&self) -> &Self::Target {
187 self.data.as_slice()
188 }
189}
190
191impl Default for Utf8Container {
192 fn default() -> Self {
193 Self::with_capacity(0)
194 }
195}
196
197#[cfg(test)]
198mod tests {
199 use super::*;
200 use crate::BitVec;
201
202 #[test]
203 fn test_new() {
204 let data = vec!["hello".to_string(), "world".to_string(), "test".to_string()];
205 let bitvec = BitVec::from_slice(&[true, true, true]);
206 let container = Utf8Container::new(data.clone(), bitvec);
207
208 assert_eq!(container.len(), 3);
209 assert_eq!(container.get(0), Some(&"hello".to_string()));
210 assert_eq!(container.get(1), Some(&"world".to_string()));
211 assert_eq!(container.get(2), Some(&"test".to_string()));
212 }
213
214 #[test]
215 fn test_from_vec() {
216 let data = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
217 let container = Utf8Container::from_vec(data);
218
219 assert_eq!(container.len(), 3);
220 assert_eq!(container.get(0), Some(&"foo".to_string()));
221 assert_eq!(container.get(1), Some(&"bar".to_string()));
222 assert_eq!(container.get(2), Some(&"baz".to_string()));
223
224 for i in 0..3 {
226 assert!(container.is_defined(i));
227 }
228 }
229
230 #[test]
231 fn test_with_capacity() {
232 let container = Utf8Container::with_capacity(10);
233 assert_eq!(container.len(), 0);
234 assert!(container.is_empty());
235 assert!(container.capacity() >= 10);
236 }
237
238 #[test]
239 fn test_push() {
240 let mut container = Utf8Container::with_capacity(3);
241
242 container.push("first".to_string());
243 container.push("second".to_string());
244 container.push_undefined();
245
246 assert_eq!(container.len(), 3);
247 assert_eq!(container.get(0), Some(&"first".to_string()));
248 assert_eq!(container.get(1), Some(&"second".to_string()));
249 assert_eq!(container.get(2), None); assert!(container.is_defined(0));
252 assert!(container.is_defined(1));
253 assert!(!container.is_defined(2));
254 }
255
256 #[test]
257 fn test_extend() {
258 let mut container1 = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
259 let container2 = Utf8Container::from_vec(vec!["c".to_string(), "d".to_string()]);
260
261 container1.extend(&container2).unwrap();
262
263 assert_eq!(container1.len(), 4);
264 assert_eq!(container1.get(0), Some(&"a".to_string()));
265 assert_eq!(container1.get(1), Some(&"b".to_string()));
266 assert_eq!(container1.get(2), Some(&"c".to_string()));
267 assert_eq!(container1.get(3), Some(&"d".to_string()));
268 }
269
270 #[test]
271 fn test_extend_from_undefined() {
272 let mut container = Utf8Container::from_vec(vec!["test".to_string()]);
273 container.extend_from_undefined(2);
274
275 assert_eq!(container.len(), 3);
276 assert_eq!(container.get(0), Some(&"test".to_string()));
277 assert_eq!(container.get(1), None); assert_eq!(container.get(2), None); }
280
281 #[test]
282 fn test_iter() {
283 let data = vec!["x".to_string(), "y".to_string(), "z".to_string()];
284 let bitvec = BitVec::from_slice(&[true, false, true]); let container = Utf8Container::new(data, bitvec);
286
287 let collected: Vec<Option<&String>> = container.iter().collect();
288 assert_eq!(collected, vec![Some(&"x".to_string()), None, Some(&"z".to_string())]);
289 }
290
291 #[test]
292 fn test_slice() {
293 let container = Utf8Container::from_vec(vec![
294 "one".to_string(),
295 "two".to_string(),
296 "three".to_string(),
297 "four".to_string(),
298 ]);
299 let sliced = container.slice(1, 3);
300
301 assert_eq!(sliced.len(), 2);
302 assert_eq!(sliced.get(0), Some(&"two".to_string()));
303 assert_eq!(sliced.get(1), Some(&"three".to_string()));
304 }
305
306 #[test]
307 fn test_filter() {
308 let mut container = Utf8Container::from_vec(vec![
309 "keep".to_string(),
310 "drop".to_string(),
311 "keep".to_string(),
312 "drop".to_string(),
313 ]);
314 let mask = BitVec::from_slice(&[true, false, true, false]);
315
316 container.filter(&mask);
317
318 assert_eq!(container.len(), 2);
319 assert_eq!(container.get(0), Some(&"keep".to_string()));
320 assert_eq!(container.get(1), Some(&"keep".to_string()));
321 }
322
323 #[test]
324 fn test_reorder() {
325 let mut container =
326 Utf8Container::from_vec(vec!["first".to_string(), "second".to_string(), "third".to_string()]);
327 let indices = [2, 0, 1];
328
329 container.reorder(&indices);
330
331 assert_eq!(container.len(), 3);
332 assert_eq!(container.get(0), Some(&"third".to_string())); assert_eq!(container.get(1), Some(&"first".to_string())); assert_eq!(container.get(2), Some(&"second".to_string())); }
336
337 #[test]
338 fn test_reorder_with_out_of_bounds() {
339 let mut container = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
340 let indices = [1, 5, 0]; container.reorder(&indices);
343
344 assert_eq!(container.len(), 3);
345 assert_eq!(container.get(0), Some(&"b".to_string())); assert_eq!(container.get(1), None); assert_eq!(container.get(2), Some(&"a".to_string())); }
349
350 #[test]
351 fn test_empty_strings() {
352 let mut container = Utf8Container::with_capacity(2);
353 container.push("".to_string()); container.push_undefined();
355
356 assert_eq!(container.len(), 2);
357 assert_eq!(container.get(0), Some(&"".to_string()));
358 assert_eq!(container.get(1), None);
359
360 assert!(container.is_defined(0));
361 assert!(!container.is_defined(1));
362 }
363
364 #[test]
365 fn test_default() {
366 let container = Utf8Container::default();
367 assert_eq!(container.len(), 0);
368 assert!(container.is_empty());
369 }
370}