reifydb_type/value/container/
utf8.rs1use std::{
5 fmt::{self, Debug},
6 result::Result as StdResult,
7 str,
8};
9
10use serde::{Deserialize, Deserializer, Serialize, Serializer};
11
12use crate::{
13 Result,
14 storage::{Cow, Storage},
15 value::{Value, container::varlen::VarlenContainer, r#type::Type},
16};
17
18pub struct Utf8Container<S: Storage = Cow> {
19 inner: VarlenContainer<S>,
20}
21
22impl<S: Storage> Clone for Utf8Container<S> {
23 fn clone(&self) -> Self {
24 Self {
25 inner: self.inner.clone(),
26 }
27 }
28}
29
30impl<S: Storage> Debug for Utf8Container<S>
31where
32 VarlenContainer<S>: Debug,
33{
34 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
35 f.debug_struct("Utf8Container").field("inner", &self.inner).finish()
36 }
37}
38
39impl<S: Storage> PartialEq for Utf8Container<S>
40where
41 VarlenContainer<S>: PartialEq,
42{
43 fn eq(&self, other: &Self) -> bool {
44 self.inner == other.inner
45 }
46}
47
48impl Serialize for Utf8Container<Cow> {
49 fn serialize<Ser: Serializer>(&self, serializer: Ser) -> StdResult<Ser::Ok, Ser::Error> {
50 self.inner.serialize(serializer)
51 }
52}
53
54impl<'de> Deserialize<'de> for Utf8Container<Cow> {
55 fn deserialize<D: Deserializer<'de>>(deserializer: D) -> StdResult<Self, D::Error> {
56 let inner = VarlenContainer::deserialize(deserializer)?;
57 Ok(Self {
58 inner,
59 })
60 }
61}
62
63impl Utf8Container<Cow> {
64 pub fn new(data: Vec<String>) -> Self {
65 Self::from_vec(data)
66 }
67
68 pub fn from_vec(data: Vec<String>) -> Self {
69 let inner = VarlenContainer::from_byte_slices(data.iter().map(|s| s.as_bytes()));
70 Self {
71 inner,
72 }
73 }
74
75 pub fn with_capacity(capacity: usize) -> Self {
76 Self {
77 inner: VarlenContainer::with_capacity(capacity, capacity * 16),
78 }
79 }
80
81 pub fn from_raw_parts(data: Vec<String>) -> Self {
82 Self::from_vec(data)
83 }
84
85 pub fn from_bytes_offsets(data: Vec<u8>, offsets: Vec<u64>) -> Self {
86 debug_assert!(str::from_utf8(&data).is_ok(), "Utf8Container data must be valid UTF-8");
87 Self {
88 inner: VarlenContainer::from_raw_parts(data, offsets),
89 }
90 }
91
92 pub fn try_into_raw_parts(self) -> Option<Vec<String>> {
93 Some(self.iter().map(|s| s.unwrap().to_string()).collect())
94 }
95}
96
97impl<S: Storage> Utf8Container<S> {
98 pub fn from_inner(inner: VarlenContainer<S>) -> Self {
99 Self {
100 inner,
101 }
102 }
103
104 pub fn from_storage_parts(data: S::Vec<u8>, offsets: S::Vec<u64>) -> Self {
105 Self {
106 inner: VarlenContainer::from_storage_parts(data, offsets),
107 }
108 }
109
110 pub fn data_storage(&self) -> &S::Vec<u8> {
111 self.inner.data()
112 }
113
114 pub fn offsets_storage(&self) -> &S::Vec<u64> {
115 self.inner.offsets_data()
116 }
117
118 pub fn len(&self) -> usize {
119 self.inner.len()
120 }
121
122 pub fn capacity(&self) -> usize {
123 self.inner.capacity()
124 }
125
126 pub fn is_empty(&self) -> bool {
127 self.inner.is_empty()
128 }
129
130 pub fn clear(&mut self) {
131 self.inner.clear_generic();
132 }
133
134 pub fn get(&self, index: usize) -> Option<&str> {
135 let bytes = self.inner.get_bytes(index)?;
136 Some(unsafe { str::from_utf8_unchecked(bytes) })
139 }
140
141 pub fn is_defined(&self, idx: usize) -> bool {
142 idx < self.len()
143 }
144
145 pub fn is_fully_defined(&self) -> bool {
146 true
147 }
148
149 pub fn data_bytes(&self) -> &[u8] {
150 self.inner.data_bytes()
151 }
152
153 pub fn offsets(&self) -> &[u64] {
154 self.inner.offsets()
155 }
156
157 pub fn inner(&self) -> &VarlenContainer<S> {
158 &self.inner
159 }
160
161 pub fn as_string(&self, index: usize) -> String {
162 self.get(index).map(str::to_string).unwrap_or_else(|| "none".to_string())
163 }
164
165 pub fn get_value(&self, index: usize) -> Value {
166 match self.get(index) {
167 Some(s) => Value::Utf8(s.to_string()),
168 None => Value::none_of(Type::Utf8),
169 }
170 }
171
172 pub fn iter(&self) -> impl Iterator<Item = Option<&str>> + '_ {
173 (0..self.len()).map(|i| self.get(i))
174 }
175
176 pub fn iter_str(&self) -> impl Iterator<Item = &str> + '_ {
177 (0..self.len()).map(|i| self.get(i).unwrap())
178 }
179}
180
181impl Utf8Container<Cow> {
182 pub fn push(&mut self, value: String) {
183 self.inner.push_bytes(value.as_bytes());
184 }
185
186 pub fn push_str(&mut self, value: &str) {
187 self.inner.push_bytes(value.as_bytes());
188 }
189
190 pub fn push_default(&mut self) {
191 self.inner.push_bytes(&[]);
192 }
193
194 pub fn extend(&mut self, other: &Self) -> Result<()> {
195 self.inner.extend_from(&other.inner);
196 Ok(())
197 }
198
199 pub fn slice(&self, start: usize, end: usize) -> Self {
200 Self {
201 inner: self.inner.slice(start, end),
202 }
203 }
204
205 pub fn filter(&mut self, mask: &<Cow as Storage>::BitVec) {
206 let bits: Vec<bool> = mask.iter().collect();
207 self.inner.filter_in_place(|i| bits.get(i).copied().unwrap_or(false));
208 }
209
210 pub fn reorder(&mut self, indices: &[usize]) {
211 self.inner.reorder_in_place(indices);
212 }
213
214 pub fn take(&self, num: usize) -> Self {
215 Self {
216 inner: self.inner.take_n(num),
217 }
218 }
219}
220
221impl Default for Utf8Container<Cow> {
222 fn default() -> Self {
223 Self::with_capacity(0)
224 }
225}
226
227#[cfg(test)]
228pub mod tests {
229 use postcard::to_allocvec as postcard_to_allocvec;
230
231 use super::*;
232 use crate::util::bitvec::BitVec;
233
234 #[test]
235 fn test_new() {
236 let data = vec!["hello".to_string(), "world".to_string(), "test".to_string()];
237 let container = Utf8Container::new(data.clone());
238
239 assert_eq!(container.len(), 3);
240 assert_eq!(container.get(0), Some("hello"));
241 assert_eq!(container.get(1), Some("world"));
242 assert_eq!(container.get(2), Some("test"));
243 }
244
245 #[test]
246 fn test_from_vec() {
247 let data = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
248 let container = Utf8Container::from_vec(data);
249
250 assert_eq!(container.len(), 3);
251 assert_eq!(container.get(0), Some("foo"));
252 assert_eq!(container.get(1), Some("bar"));
253 assert_eq!(container.get(2), Some("baz"));
254
255 for i in 0..3 {
256 assert!(container.is_defined(i));
257 }
258 }
259
260 #[test]
261 fn test_with_capacity() {
262 let container = Utf8Container::with_capacity(10);
263 assert_eq!(container.len(), 0);
264 assert!(container.is_empty());
265 assert!(container.capacity() >= 10);
266 }
267
268 #[test]
269 fn test_push() {
270 let mut container = Utf8Container::with_capacity(3);
271
272 container.push("first".to_string());
273 container.push("second".to_string());
274 container.push_default();
275
276 assert_eq!(container.len(), 3);
277 assert_eq!(container.get(0), Some("first"));
278 assert_eq!(container.get(1), Some("second"));
279 assert_eq!(container.get(2), Some(""));
280
281 assert!(container.is_defined(0));
282 assert!(container.is_defined(1));
283 assert!(container.is_defined(2));
284 }
285
286 #[test]
287 fn test_extend() {
288 let mut container1 = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
289 let container2 = Utf8Container::from_vec(vec!["c".to_string(), "d".to_string()]);
290
291 container1.extend(&container2).unwrap();
292
293 assert_eq!(container1.len(), 4);
294 assert_eq!(container1.get(0), Some("a"));
295 assert_eq!(container1.get(1), Some("b"));
296 assert_eq!(container1.get(2), Some("c"));
297 assert_eq!(container1.get(3), Some("d"));
298 }
299
300 #[test]
301 fn test_iter() {
302 let data = vec!["x".to_string(), "y".to_string(), "z".to_string()];
303 let container = Utf8Container::new(data);
304
305 let collected: Vec<Option<&str>> = container.iter().collect();
306 assert_eq!(collected, vec![Some("x"), Some("y"), Some("z")]);
307 }
308
309 #[test]
310 fn test_slice() {
311 let container = Utf8Container::from_vec(vec![
312 "one".to_string(),
313 "two".to_string(),
314 "three".to_string(),
315 "four".to_string(),
316 ]);
317 let sliced = container.slice(1, 3);
318
319 assert_eq!(sliced.len(), 2);
320 assert_eq!(sliced.get(0), Some("two"));
321 assert_eq!(sliced.get(1), Some("three"));
322 }
323
324 #[test]
325 fn test_filter() {
326 let mut container = Utf8Container::from_vec(vec![
327 "keep".to_string(),
328 "drop".to_string(),
329 "keep".to_string(),
330 "drop".to_string(),
331 ]);
332 let mask = BitVec::from_slice(&[true, false, true, false]);
333
334 container.filter(&mask);
335
336 assert_eq!(container.len(), 2);
337 assert_eq!(container.get(0), Some("keep"));
338 assert_eq!(container.get(1), Some("keep"));
339 }
340
341 #[test]
342 fn test_reorder() {
343 let mut container =
344 Utf8Container::from_vec(vec!["first".to_string(), "second".to_string(), "third".to_string()]);
345 let indices = [2, 0, 1];
346
347 container.reorder(&indices);
348
349 assert_eq!(container.len(), 3);
350 assert_eq!(container.get(0), Some("third"));
351 assert_eq!(container.get(1), Some("first"));
352 assert_eq!(container.get(2), Some("second"));
353 }
354
355 #[test]
356 fn test_reorder_with_out_of_bounds() {
357 let mut container = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
358 let indices = [1, 5, 0];
359
360 container.reorder(&indices);
361
362 assert_eq!(container.len(), 3);
363 assert_eq!(container.get(0), Some("b"));
364 assert_eq!(container.get(1), Some(""));
365 assert_eq!(container.get(2), Some("a"));
366 }
367
368 #[test]
369 fn test_empty_strings() {
370 let mut container = Utf8Container::with_capacity(2);
371 container.push("".to_string());
372 container.push_default();
373
374 assert_eq!(container.len(), 2);
375 assert_eq!(container.get(0), Some(""));
376 assert_eq!(container.get(1), Some(""));
377
378 assert!(container.is_defined(0));
379 assert!(container.is_defined(1));
380 }
381
382 #[test]
383 fn testault() {
384 let container = Utf8Container::default();
385 assert_eq!(container.len(), 0);
386 assert!(container.is_empty());
387 }
388
389 #[test]
390 fn test_data_bytes_and_offsets_match_zero_copy_layout() {
391 let container = Utf8Container::from_vec(vec!["aa".to_string(), "bb".to_string()]);
392 assert_eq!(container.data_bytes(), b"aabb");
393 assert_eq!(container.offsets(), &[0u64, 2, 4]);
394 }
395
396 #[test]
397 fn test_postcard_wire_compat() {
398 let strings = vec!["hello".to_string(), "world".to_string()];
401 let strings_bytes: Vec<u8> = postcard_to_allocvec(&strings).unwrap();
402
403 let container = Utf8Container::from_vec(strings.clone());
404 let container_bytes: Vec<u8> = postcard_to_allocvec(&container).unwrap();
405
406 assert_eq!(strings_bytes, container_bytes);
407 }
408}