reifydb_core/value/container/
utf8.rs1use std::ops::Deref;
5
6use reifydb_type::Value;
7use serde::{Deserialize, Serialize};
8
9use crate::{BitVec, CowVec};
10
11#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
12pub struct Utf8Container {
13 data: CowVec<String>,
14 bitvec: BitVec,
15}
16
17impl Utf8Container {
18 pub fn new(data: Vec<String>, bitvec: BitVec) -> Self {
19 debug_assert_eq!(data.len(), bitvec.len());
20 Self {
21 data: CowVec::new(data),
22 bitvec,
23 }
24 }
25
26 pub fn with_capacity(capacity: usize) -> Self {
27 Self {
28 data: CowVec::with_capacity(capacity),
29 bitvec: BitVec::with_capacity(capacity),
30 }
31 }
32
33 pub fn from_vec(data: Vec<String>) -> Self {
34 let len = data.len();
35 Self {
36 data: CowVec::new(data),
37 bitvec: BitVec::repeat(len, true),
38 }
39 }
40
41 pub fn len(&self) -> usize {
42 debug_assert_eq!(self.data.len(), self.bitvec.len());
43 self.data.len()
44 }
45
46 pub fn capacity(&self) -> usize {
47 debug_assert!(self.data.capacity() >= self.bitvec.capacity());
48 self.data.capacity().min(self.bitvec.capacity())
49 }
50
51 pub fn is_empty(&self) -> bool {
52 self.data.is_empty()
53 }
54
55 pub fn push(&mut self, value: String) {
56 self.data.push(value);
57 self.bitvec.push(true);
58 }
59
60 pub fn push_undefined(&mut self) {
61 self.data.push(String::new());
62 self.bitvec.push(false);
63 }
64
65 pub fn get(&self, index: usize) -> Option<&String> {
66 if index < self.len() && self.is_defined(index) {
67 self.data.get(index)
68 } else {
69 None
70 }
71 }
72
73 pub fn bitvec(&self) -> &BitVec {
74 &self.bitvec
75 }
76
77 pub fn bitvec_mut(&mut self) -> &mut BitVec {
78 &mut self.bitvec
79 }
80
81 pub fn is_defined(&self, idx: usize) -> bool {
82 idx < self.len() && self.bitvec.get(idx)
83 }
84
85 pub fn is_fully_defined(&self) -> bool {
86 self.bitvec.count_ones() == self.len()
87 }
88
89 pub fn data(&self) -> &CowVec<String> {
90 &self.data
91 }
92
93 pub fn data_mut(&mut self) -> &mut CowVec<String> {
94 &mut self.data
95 }
96
97 pub fn as_string(&self, index: usize) -> String {
98 if index < self.len() && self.is_defined(index) {
99 self.data[index].clone()
100 } else {
101 "Undefined".to_string()
102 }
103 }
104
105 pub fn get_value(&self, index: usize) -> Value {
106 if index < self.len() && self.is_defined(index) {
107 Value::Utf8(self.data[index].clone())
108 } else {
109 Value::Undefined
110 }
111 }
112
113 pub fn extend(&mut self, other: &Self) -> crate::Result<()> {
114 self.data.extend(other.data.iter().cloned());
115 self.bitvec.extend(&other.bitvec);
116 Ok(())
117 }
118
119 pub fn extend_from_undefined(&mut self, len: usize) {
120 self.data.extend(std::iter::repeat(String::new()).take(len));
121 self.bitvec.extend(&BitVec::repeat(len, false));
122 }
123
124 pub fn iter(&self) -> impl Iterator<Item = Option<&String>> + '_ {
125 self.data.iter().zip(self.bitvec.iter()).map(|(v, defined)| {
126 if defined {
127 Some(v)
128 } else {
129 None
130 }
131 })
132 }
133
134 pub fn slice(&self, start: usize, end: usize) -> Self {
135 let new_data: Vec<String> = self.data.iter().skip(start).take(end - start).cloned().collect();
136 let new_bitvec: Vec<bool> = self.bitvec.iter().skip(start).take(end - start).collect();
137 Self {
138 data: CowVec::new(new_data),
139 bitvec: BitVec::from_slice(&new_bitvec),
140 }
141 }
142
143 pub fn filter(&mut self, mask: &BitVec) {
144 let mut new_data = Vec::with_capacity(mask.count_ones());
145 let mut new_bitvec = BitVec::with_capacity(mask.count_ones());
146
147 for (i, keep) in mask.iter().enumerate() {
148 if keep && i < self.len() {
149 new_data.push(self.data[i].clone());
150 new_bitvec.push(self.bitvec.get(i));
151 }
152 }
153
154 self.data = CowVec::new(new_data);
155 self.bitvec = new_bitvec;
156 }
157
158 pub fn reorder(&mut self, indices: &[usize]) {
159 let mut new_data = Vec::with_capacity(indices.len());
160 let mut new_bitvec = BitVec::with_capacity(indices.len());
161
162 for &idx in indices {
163 if idx < self.len() {
164 new_data.push(self.data[idx].clone());
165 new_bitvec.push(self.bitvec.get(idx));
166 } else {
167 new_data.push(String::new());
168 new_bitvec.push(false);
169 }
170 }
171
172 self.data = CowVec::new(new_data);
173 self.bitvec = new_bitvec;
174 }
175
176 pub fn take(&self, num: usize) -> Self {
177 Self {
178 data: self.data.take(num),
179 bitvec: self.bitvec.take(num),
180 }
181 }
182}
183
184impl Deref for Utf8Container {
185 type Target = [String];
186
187 fn deref(&self) -> &Self::Target {
188 self.data.as_slice()
189 }
190}
191
192impl Default for Utf8Container {
193 fn default() -> Self {
194 Self::with_capacity(0)
195 }
196}
197
198#[cfg(test)]
199mod tests {
200 use super::*;
201 use crate::BitVec;
202
203 #[test]
204 fn test_new() {
205 let data = vec!["hello".to_string(), "world".to_string(), "test".to_string()];
206 let bitvec = BitVec::from_slice(&[true, true, true]);
207 let container = Utf8Container::new(data.clone(), bitvec);
208
209 assert_eq!(container.len(), 3);
210 assert_eq!(container.get(0), Some(&"hello".to_string()));
211 assert_eq!(container.get(1), Some(&"world".to_string()));
212 assert_eq!(container.get(2), Some(&"test".to_string()));
213 }
214
215 #[test]
216 fn test_from_vec() {
217 let data = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
218 let container = Utf8Container::from_vec(data);
219
220 assert_eq!(container.len(), 3);
221 assert_eq!(container.get(0), Some(&"foo".to_string()));
222 assert_eq!(container.get(1), Some(&"bar".to_string()));
223 assert_eq!(container.get(2), Some(&"baz".to_string()));
224
225 for i in 0..3 {
227 assert!(container.is_defined(i));
228 }
229 }
230
231 #[test]
232 fn test_with_capacity() {
233 let container = Utf8Container::with_capacity(10);
234 assert_eq!(container.len(), 0);
235 assert!(container.is_empty());
236 assert!(container.capacity() >= 10);
237 }
238
239 #[test]
240 fn test_push() {
241 let mut container = Utf8Container::with_capacity(3);
242
243 container.push("first".to_string());
244 container.push("second".to_string());
245 container.push_undefined();
246
247 assert_eq!(container.len(), 3);
248 assert_eq!(container.get(0), Some(&"first".to_string()));
249 assert_eq!(container.get(1), Some(&"second".to_string()));
250 assert_eq!(container.get(2), None); assert!(container.is_defined(0));
253 assert!(container.is_defined(1));
254 assert!(!container.is_defined(2));
255 }
256
257 #[test]
258 fn test_extend() {
259 let mut container1 = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
260 let container2 = Utf8Container::from_vec(vec!["c".to_string(), "d".to_string()]);
261
262 container1.extend(&container2).unwrap();
263
264 assert_eq!(container1.len(), 4);
265 assert_eq!(container1.get(0), Some(&"a".to_string()));
266 assert_eq!(container1.get(1), Some(&"b".to_string()));
267 assert_eq!(container1.get(2), Some(&"c".to_string()));
268 assert_eq!(container1.get(3), Some(&"d".to_string()));
269 }
270
271 #[test]
272 fn test_extend_from_undefined() {
273 let mut container = Utf8Container::from_vec(vec!["test".to_string()]);
274 container.extend_from_undefined(2);
275
276 assert_eq!(container.len(), 3);
277 assert_eq!(container.get(0), Some(&"test".to_string()));
278 assert_eq!(container.get(1), None); assert_eq!(container.get(2), None); }
281
282 #[test]
283 fn test_iter() {
284 let data = vec!["x".to_string(), "y".to_string(), "z".to_string()];
285 let bitvec = BitVec::from_slice(&[true, false, true]); let container = Utf8Container::new(data, bitvec);
287
288 let collected: Vec<Option<&String>> = container.iter().collect();
289 assert_eq!(collected, vec![Some(&"x".to_string()), None, Some(&"z".to_string())]);
290 }
291
292 #[test]
293 fn test_slice() {
294 let container = Utf8Container::from_vec(vec![
295 "one".to_string(),
296 "two".to_string(),
297 "three".to_string(),
298 "four".to_string(),
299 ]);
300 let sliced = container.slice(1, 3);
301
302 assert_eq!(sliced.len(), 2);
303 assert_eq!(sliced.get(0), Some(&"two".to_string()));
304 assert_eq!(sliced.get(1), Some(&"three".to_string()));
305 }
306
307 #[test]
308 fn test_filter() {
309 let mut container = Utf8Container::from_vec(vec![
310 "keep".to_string(),
311 "drop".to_string(),
312 "keep".to_string(),
313 "drop".to_string(),
314 ]);
315 let mask = BitVec::from_slice(&[true, false, true, false]);
316
317 container.filter(&mask);
318
319 assert_eq!(container.len(), 2);
320 assert_eq!(container.get(0), Some(&"keep".to_string()));
321 assert_eq!(container.get(1), Some(&"keep".to_string()));
322 }
323
324 #[test]
325 fn test_reorder() {
326 let mut container =
327 Utf8Container::from_vec(vec!["first".to_string(), "second".to_string(), "third".to_string()]);
328 let indices = [2, 0, 1];
329
330 container.reorder(&indices);
331
332 assert_eq!(container.len(), 3);
333 assert_eq!(container.get(0), Some(&"third".to_string())); assert_eq!(container.get(1), Some(&"first".to_string())); assert_eq!(container.get(2), Some(&"second".to_string())); }
337
338 #[test]
339 fn test_reorder_with_out_of_bounds() {
340 let mut container = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
341 let indices = [1, 5, 0]; container.reorder(&indices);
344
345 assert_eq!(container.len(), 3);
346 assert_eq!(container.get(0), Some(&"b".to_string())); assert_eq!(container.get(1), None); assert_eq!(container.get(2), Some(&"a".to_string())); }
350
351 #[test]
352 fn test_empty_strings() {
353 let mut container = Utf8Container::with_capacity(2);
354 container.push("".to_string()); container.push_undefined();
356
357 assert_eq!(container.len(), 2);
358 assert_eq!(container.get(0), Some(&"".to_string()));
359 assert_eq!(container.get(1), None);
360
361 assert!(container.is_defined(0));
362 assert!(!container.is_defined(1));
363 }
364
365 #[test]
366 fn test_default() {
367 let container = Utf8Container::default();
368 assert_eq!(container.len(), 0);
369 assert!(container.is_empty());
370 }
371}