reifydb_type/value/container/
utf8.rs1use std::{
5 fmt::{self, Debug},
6 result::Result as StdResult,
7 str,
8};
9
10use serde::{Deserialize, Deserializer, Serialize, Serializer};
11
12use crate::{
13 Result,
14 storage::{Cow, Storage},
15 value::{Value, container::varlen::VarlenContainer, r#type::Type},
16};
17
18pub struct Utf8Container<S: Storage = Cow> {
19 inner: VarlenContainer<S>,
20}
21
22impl<S: Storage> Clone for Utf8Container<S> {
23 fn clone(&self) -> Self {
24 Self {
25 inner: self.inner.clone(),
26 }
27 }
28}
29
30impl<S: Storage> Debug for Utf8Container<S>
31where
32 VarlenContainer<S>: Debug,
33{
34 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
35 f.debug_struct("Utf8Container").field("inner", &self.inner).finish()
36 }
37}
38
39impl<S: Storage> PartialEq for Utf8Container<S>
40where
41 VarlenContainer<S>: PartialEq,
42{
43 fn eq(&self, other: &Self) -> bool {
44 self.inner == other.inner
45 }
46}
47
48impl Serialize for Utf8Container<Cow> {
49 fn serialize<Ser: Serializer>(&self, serializer: Ser) -> StdResult<Ser::Ok, Ser::Error> {
50 self.inner.serialize(serializer)
55 }
56}
57
58impl<'de> Deserialize<'de> for Utf8Container<Cow> {
59 fn deserialize<D: Deserializer<'de>>(deserializer: D) -> StdResult<Self, D::Error> {
60 let inner = VarlenContainer::deserialize(deserializer)?;
61 Ok(Self {
62 inner,
63 })
64 }
65}
66
67impl Utf8Container<Cow> {
68 pub fn new(data: Vec<String>) -> Self {
69 Self::from_vec(data)
70 }
71
72 pub fn from_vec(data: Vec<String>) -> Self {
73 let inner = VarlenContainer::from_byte_slices(data.iter().map(|s| s.as_bytes()));
74 Self {
75 inner,
76 }
77 }
78
79 pub fn with_capacity(capacity: usize) -> Self {
80 Self {
84 inner: VarlenContainer::with_capacity(capacity, capacity * 16),
85 }
86 }
87
88 pub fn from_raw_parts(data: Vec<String>) -> Self {
91 Self::from_vec(data)
92 }
93
94 pub fn from_bytes_offsets(data: Vec<u8>, offsets: Vec<u64>) -> Self {
98 debug_assert!(str::from_utf8(&data).is_ok(), "Utf8Container data must be valid UTF-8");
99 Self {
100 inner: VarlenContainer::from_raw_parts(data, offsets),
101 }
102 }
103
104 pub fn try_into_raw_parts(self) -> Option<Vec<String>> {
107 Some(self.iter().map(|s| s.unwrap().to_string()).collect())
108 }
109}
110
111impl<S: Storage> Utf8Container<S> {
112 pub fn from_inner(inner: VarlenContainer<S>) -> Self {
113 Self {
114 inner,
115 }
116 }
117
118 pub fn from_storage_parts(data: S::Vec<u8>, offsets: S::Vec<u64>) -> Self {
123 Self {
124 inner: VarlenContainer::from_storage_parts(data, offsets),
125 }
126 }
127
128 pub fn data_storage(&self) -> &S::Vec<u8> {
130 self.inner.data()
131 }
132
133 pub fn offsets_storage(&self) -> &S::Vec<u64> {
134 self.inner.offsets_data()
135 }
136
137 pub fn len(&self) -> usize {
138 self.inner.len()
139 }
140
141 pub fn capacity(&self) -> usize {
142 self.inner.capacity()
143 }
144
145 pub fn is_empty(&self) -> bool {
146 self.inner.is_empty()
147 }
148
149 pub fn clear(&mut self) {
151 self.inner.clear_generic();
152 }
153
154 pub fn get(&self, index: usize) -> Option<&str> {
156 let bytes = self.inner.get_bytes(index)?;
157 Some(unsafe { str::from_utf8_unchecked(bytes) })
161 }
162
163 pub fn is_defined(&self, idx: usize) -> bool {
164 idx < self.len()
165 }
166
167 pub fn is_fully_defined(&self) -> bool {
168 true
169 }
170
171 pub fn data_bytes(&self) -> &[u8] {
174 self.inner.data_bytes()
175 }
176
177 pub fn offsets(&self) -> &[u64] {
179 self.inner.offsets()
180 }
181
182 pub fn inner(&self) -> &VarlenContainer<S> {
184 &self.inner
185 }
186
187 pub fn as_string(&self, index: usize) -> String {
188 self.get(index).map(str::to_string).unwrap_or_else(|| "none".to_string())
189 }
190
191 pub fn get_value(&self, index: usize) -> Value {
192 match self.get(index) {
193 Some(s) => Value::Utf8(s.to_string()),
194 None => Value::none_of(Type::Utf8),
195 }
196 }
197
198 pub fn iter(&self) -> impl Iterator<Item = Option<&str>> + '_ {
200 (0..self.len()).map(|i| self.get(i))
201 }
202
203 pub fn iter_str(&self) -> impl Iterator<Item = &str> + '_ {
205 (0..self.len()).map(|i| self.get(i).unwrap())
206 }
207}
208
209impl Utf8Container<Cow> {
210 pub fn push(&mut self, value: String) {
211 self.inner.push_bytes(value.as_bytes());
212 }
213
214 pub fn push_str(&mut self, value: &str) {
215 self.inner.push_bytes(value.as_bytes());
216 }
217
218 pub fn push_default(&mut self) {
219 self.inner.push_bytes(&[]);
220 }
221
222 pub fn extend(&mut self, other: &Self) -> Result<()> {
223 self.inner.extend_from(&other.inner);
224 Ok(())
225 }
226
227 pub fn slice(&self, start: usize, end: usize) -> Self {
228 Self {
229 inner: self.inner.slice(start, end),
230 }
231 }
232
233 pub fn filter(&mut self, mask: &<Cow as Storage>::BitVec) {
234 let bits: Vec<bool> = mask.iter().collect();
235 self.inner.filter_in_place(|i| bits.get(i).copied().unwrap_or(false));
236 }
237
238 pub fn reorder(&mut self, indices: &[usize]) {
239 self.inner.reorder_in_place(indices);
240 }
241
242 pub fn take(&self, num: usize) -> Self {
243 Self {
244 inner: self.inner.take_n(num),
245 }
246 }
247}
248
249impl Default for Utf8Container<Cow> {
250 fn default() -> Self {
251 Self::with_capacity(0)
252 }
253}
254
255#[cfg(test)]
256pub mod tests {
257 use postcard::to_allocvec as postcard_to_allocvec;
258
259 use super::*;
260 use crate::util::bitvec::BitVec;
261
262 #[test]
263 fn test_new() {
264 let data = vec!["hello".to_string(), "world".to_string(), "test".to_string()];
265 let container = Utf8Container::new(data.clone());
266
267 assert_eq!(container.len(), 3);
268 assert_eq!(container.get(0), Some("hello"));
269 assert_eq!(container.get(1), Some("world"));
270 assert_eq!(container.get(2), Some("test"));
271 }
272
273 #[test]
274 fn test_from_vec() {
275 let data = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
276 let container = Utf8Container::from_vec(data);
277
278 assert_eq!(container.len(), 3);
279 assert_eq!(container.get(0), Some("foo"));
280 assert_eq!(container.get(1), Some("bar"));
281 assert_eq!(container.get(2), Some("baz"));
282
283 for i in 0..3 {
284 assert!(container.is_defined(i));
285 }
286 }
287
288 #[test]
289 fn test_with_capacity() {
290 let container = Utf8Container::with_capacity(10);
291 assert_eq!(container.len(), 0);
292 assert!(container.is_empty());
293 assert!(container.capacity() >= 10);
294 }
295
296 #[test]
297 fn test_push() {
298 let mut container = Utf8Container::with_capacity(3);
299
300 container.push("first".to_string());
301 container.push("second".to_string());
302 container.push_default();
303
304 assert_eq!(container.len(), 3);
305 assert_eq!(container.get(0), Some("first"));
306 assert_eq!(container.get(1), Some("second"));
307 assert_eq!(container.get(2), Some(""));
308
309 assert!(container.is_defined(0));
310 assert!(container.is_defined(1));
311 assert!(container.is_defined(2));
312 }
313
314 #[test]
315 fn test_extend() {
316 let mut container1 = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
317 let container2 = Utf8Container::from_vec(vec!["c".to_string(), "d".to_string()]);
318
319 container1.extend(&container2).unwrap();
320
321 assert_eq!(container1.len(), 4);
322 assert_eq!(container1.get(0), Some("a"));
323 assert_eq!(container1.get(1), Some("b"));
324 assert_eq!(container1.get(2), Some("c"));
325 assert_eq!(container1.get(3), Some("d"));
326 }
327
328 #[test]
329 fn test_iter() {
330 let data = vec!["x".to_string(), "y".to_string(), "z".to_string()];
331 let container = Utf8Container::new(data);
332
333 let collected: Vec<Option<&str>> = container.iter().collect();
334 assert_eq!(collected, vec![Some("x"), Some("y"), Some("z")]);
335 }
336
337 #[test]
338 fn test_slice() {
339 let container = Utf8Container::from_vec(vec![
340 "one".to_string(),
341 "two".to_string(),
342 "three".to_string(),
343 "four".to_string(),
344 ]);
345 let sliced = container.slice(1, 3);
346
347 assert_eq!(sliced.len(), 2);
348 assert_eq!(sliced.get(0), Some("two"));
349 assert_eq!(sliced.get(1), Some("three"));
350 }
351
352 #[test]
353 fn test_filter() {
354 let mut container = Utf8Container::from_vec(vec![
355 "keep".to_string(),
356 "drop".to_string(),
357 "keep".to_string(),
358 "drop".to_string(),
359 ]);
360 let mask = BitVec::from_slice(&[true, false, true, false]);
361
362 container.filter(&mask);
363
364 assert_eq!(container.len(), 2);
365 assert_eq!(container.get(0), Some("keep"));
366 assert_eq!(container.get(1), Some("keep"));
367 }
368
369 #[test]
370 fn test_reorder() {
371 let mut container =
372 Utf8Container::from_vec(vec!["first".to_string(), "second".to_string(), "third".to_string()]);
373 let indices = [2, 0, 1];
374
375 container.reorder(&indices);
376
377 assert_eq!(container.len(), 3);
378 assert_eq!(container.get(0), Some("third"));
379 assert_eq!(container.get(1), Some("first"));
380 assert_eq!(container.get(2), Some("second"));
381 }
382
383 #[test]
384 fn test_reorder_with_out_of_bounds() {
385 let mut container = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
386 let indices = [1, 5, 0];
387
388 container.reorder(&indices);
389
390 assert_eq!(container.len(), 3);
391 assert_eq!(container.get(0), Some("b"));
392 assert_eq!(container.get(1), Some(""));
393 assert_eq!(container.get(2), Some("a"));
394 }
395
396 #[test]
397 fn test_empty_strings() {
398 let mut container = Utf8Container::with_capacity(2);
399 container.push("".to_string());
400 container.push_default();
401
402 assert_eq!(container.len(), 2);
403 assert_eq!(container.get(0), Some(""));
404 assert_eq!(container.get(1), Some(""));
405
406 assert!(container.is_defined(0));
407 assert!(container.is_defined(1));
408 }
409
410 #[test]
411 fn testault() {
412 let container = Utf8Container::default();
413 assert_eq!(container.len(), 0);
414 assert!(container.is_empty());
415 }
416
417 #[test]
418 fn test_data_bytes_and_offsets_match_zero_copy_layout() {
419 let container = Utf8Container::from_vec(vec!["aa".to_string(), "bb".to_string()]);
420 assert_eq!(container.data_bytes(), b"aabb");
421 assert_eq!(container.offsets(), &[0u64, 2, 4]);
422 }
423
424 #[test]
425 fn test_postcard_wire_compat() {
426 let strings = vec!["hello".to_string(), "world".to_string()];
429 let strings_bytes: Vec<u8> = postcard_to_allocvec(&strings).unwrap();
430
431 let container = Utf8Container::from_vec(strings.clone());
432 let container_bytes: Vec<u8> = postcard_to_allocvec(&container).unwrap();
433
434 assert_eq!(strings_bytes, container_bytes);
435 }
436}