reifydb_value/value/container/
utf8.rs1use std::{
5 fmt::{self, Debug},
6 result::Result as StdResult,
7 str,
8};
9
10use serde::{Deserialize, Deserializer, Serialize, Serializer};
11
12use crate::{
13 Result,
14 storage::{Cow, Storage},
15 value::{Value, container::varlen::VarlenContainer, value_type::ValueType},
16};
17
18pub struct Utf8Container<S: Storage = Cow> {
19 inner: VarlenContainer<S>,
20}
21
22impl<S: Storage> Clone for Utf8Container<S> {
23 fn clone(&self) -> Self {
24 Self {
25 inner: self.inner.clone(),
26 }
27 }
28}
29
30impl<S: Storage> Debug for Utf8Container<S>
31where
32 VarlenContainer<S>: Debug,
33{
34 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
35 f.debug_struct("Utf8Container").field("inner", &self.inner).finish()
36 }
37}
38
39impl<S: Storage> PartialEq for Utf8Container<S>
40where
41 VarlenContainer<S>: PartialEq,
42{
43 fn eq(&self, other: &Self) -> bool {
44 self.inner == other.inner
45 }
46}
47
48impl Serialize for Utf8Container<Cow> {
49 fn serialize<Ser: Serializer>(&self, serializer: Ser) -> StdResult<Ser::Ok, Ser::Error> {
50 self.inner.serialize(serializer)
51 }
52}
53
54impl<'de> Deserialize<'de> for Utf8Container<Cow> {
55 fn deserialize<D: Deserializer<'de>>(deserializer: D) -> StdResult<Self, D::Error> {
56 let inner = VarlenContainer::deserialize(deserializer)?;
57 Ok(Self {
58 inner,
59 })
60 }
61}
62
63impl Utf8Container<Cow> {
64 pub fn new(data: Vec<String>) -> Self {
65 Self::from_vec(data)
66 }
67
68 pub fn from_vec(data: Vec<String>) -> Self {
69 let inner = VarlenContainer::from_byte_slices(data.iter().map(|s| s.as_bytes()));
70 Self {
71 inner,
72 }
73 }
74
75 pub fn from_repeated_str(value: &str, count: usize) -> Self {
76 Self {
77 inner: VarlenContainer::from_repeated_bytes(value.as_bytes(), count),
78 }
79 }
80
81 pub fn with_capacity(capacity: usize) -> Self {
82 Self {
83 inner: VarlenContainer::with_capacity(capacity, capacity * 16),
84 }
85 }
86
87 pub fn from_raw_parts(data: Vec<String>) -> Self {
88 Self::from_vec(data)
89 }
90
91 pub fn from_bytes_offsets(data: Vec<u8>, offsets: Vec<u64>) -> Self {
92 debug_assert!(str::from_utf8(&data).is_ok(), "Utf8Container data must be valid UTF-8");
93 Self {
94 inner: VarlenContainer::from_raw_parts(data, offsets),
95 }
96 }
97
98 pub fn try_into_raw_parts(self) -> Option<Vec<String>> {
99 Some(self.iter().map(|s| s.unwrap().to_string()).collect())
100 }
101}
102
103impl<S: Storage> Utf8Container<S> {
104 pub fn from_inner(inner: VarlenContainer<S>) -> Self {
105 Self {
106 inner,
107 }
108 }
109
110 pub fn from_storage_parts(data: S::Vec<u8>, offsets: S::Vec<u64>) -> Self {
111 Self {
112 inner: VarlenContainer::from_storage_parts(data, offsets),
113 }
114 }
115
116 pub fn data_storage(&self) -> &S::Vec<u8> {
117 self.inner.data()
118 }
119
120 pub fn offsets_storage(&self) -> &S::Vec<u64> {
121 self.inner.offsets_data()
122 }
123
124 pub fn len(&self) -> usize {
125 self.inner.len()
126 }
127
128 pub fn capacity(&self) -> usize {
129 self.inner.capacity()
130 }
131
132 pub fn is_empty(&self) -> bool {
133 self.inner.is_empty()
134 }
135
136 pub fn clear(&mut self) {
137 self.inner.clear_generic();
138 }
139
140 pub fn get(&self, index: usize) -> Option<&str> {
141 let bytes = self.inner.get_bytes(index)?;
142 Some(unsafe { str::from_utf8_unchecked(bytes) })
145 }
146
147 pub fn is_defined(&self, idx: usize) -> bool {
148 idx < self.len()
149 }
150
151 pub fn is_fully_defined(&self) -> bool {
152 true
153 }
154
155 pub fn data_bytes(&self) -> &[u8] {
156 self.inner.data_bytes()
157 }
158
159 pub fn offsets(&self) -> &[u64] {
160 self.inner.offsets()
161 }
162
163 pub fn inner(&self) -> &VarlenContainer<S> {
164 &self.inner
165 }
166
167 pub fn as_string(&self, index: usize) -> String {
168 self.get(index).map(str::to_string).unwrap_or_else(|| "none".to_string())
169 }
170
171 pub fn get_value(&self, index: usize) -> Value {
172 match self.get(index) {
173 Some(s) => Value::Utf8(s.to_string()),
174 None => Value::none_of(ValueType::Utf8),
175 }
176 }
177
178 pub fn iter(&self) -> impl Iterator<Item = Option<&str>> + '_ {
179 (0..self.len()).map(|i| self.get(i))
180 }
181
182 pub fn iter_str(&self) -> impl Iterator<Item = &str> + '_ {
183 (0..self.len()).map(|i| self.get(i).unwrap())
184 }
185}
186
187impl Utf8Container<Cow> {
188 pub fn push(&mut self, value: String) {
189 self.inner.push_bytes(value.as_bytes());
190 }
191
192 pub fn push_str(&mut self, value: &str) {
193 self.inner.push_bytes(value.as_bytes());
194 }
195
196 pub fn push_default(&mut self) {
197 self.inner.push_bytes(&[]);
198 }
199
200 pub fn extend(&mut self, other: &Self) -> Result<()> {
201 self.inner.extend_from(&other.inner);
202 Ok(())
203 }
204
205 pub fn slice(&self, start: usize, end: usize) -> Self {
206 Self {
207 inner: self.inner.slice(start, end),
208 }
209 }
210
211 pub fn filter(&mut self, mask: &<Cow as Storage>::BitVec) {
212 let bits: Vec<bool> = mask.iter().collect();
213 self.inner.filter_in_place(|i| bits.get(i).copied().unwrap_or(false));
214 }
215
216 pub fn reorder(&mut self, indices: &[usize]) {
217 self.inner.reorder_in_place(indices);
218 }
219
220 pub fn take(&self, num: usize) -> Self {
221 Self {
222 inner: self.inner.take_n(num),
223 }
224 }
225}
226
227impl Default for Utf8Container<Cow> {
228 fn default() -> Self {
229 Self::with_capacity(0)
230 }
231}
232
233#[cfg(test)]
234pub mod tests {
235 use postcard::to_allocvec as postcard_to_allocvec;
236
237 use super::*;
238 use crate::util::bitvec::BitVec;
239
240 #[test]
241 fn test_new() {
242 let data = vec!["hello".to_string(), "world".to_string(), "test".to_string()];
243 let container = Utf8Container::new(data.clone());
244
245 assert_eq!(container.len(), 3);
246 assert_eq!(container.get(0), Some("hello"));
247 assert_eq!(container.get(1), Some("world"));
248 assert_eq!(container.get(2), Some("test"));
249 }
250
251 #[test]
252 fn test_from_vec() {
253 let data = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
254 let container = Utf8Container::from_vec(data);
255
256 assert_eq!(container.len(), 3);
257 assert_eq!(container.get(0), Some("foo"));
258 assert_eq!(container.get(1), Some("bar"));
259 assert_eq!(container.get(2), Some("baz"));
260
261 for i in 0..3 {
262 assert!(container.is_defined(i));
263 }
264 }
265
266 #[test]
267 fn test_from_repeated_str() {
268 let container = Utf8Container::from_repeated_str("mint", 3);
269 let explicit =
270 Utf8Container::from_vec(vec!["mint".to_string(), "mint".to_string(), "mint".to_string()]);
271 assert_eq!(container, explicit);
272 assert_eq!(container.len(), 3);
273 assert_eq!(container.get(0), Some("mint"));
274 assert_eq!(container.get(2), Some("mint"));
275 for i in 0..3 {
276 assert!(container.is_defined(i));
277 }
278 }
279
280 #[test]
281 fn test_with_capacity() {
282 let container = Utf8Container::with_capacity(10);
283 assert_eq!(container.len(), 0);
284 assert!(container.is_empty());
285 assert!(container.capacity() >= 10);
286 }
287
288 #[test]
289 fn test_push() {
290 let mut container = Utf8Container::with_capacity(3);
291
292 container.push("first".to_string());
293 container.push("second".to_string());
294 container.push_default();
295
296 assert_eq!(container.len(), 3);
297 assert_eq!(container.get(0), Some("first"));
298 assert_eq!(container.get(1), Some("second"));
299 assert_eq!(container.get(2), Some(""));
300
301 assert!(container.is_defined(0));
302 assert!(container.is_defined(1));
303 assert!(container.is_defined(2));
304 }
305
306 #[test]
307 fn test_extend() {
308 let mut container1 = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
309 let container2 = Utf8Container::from_vec(vec!["c".to_string(), "d".to_string()]);
310
311 container1.extend(&container2).unwrap();
312
313 assert_eq!(container1.len(), 4);
314 assert_eq!(container1.get(0), Some("a"));
315 assert_eq!(container1.get(1), Some("b"));
316 assert_eq!(container1.get(2), Some("c"));
317 assert_eq!(container1.get(3), Some("d"));
318 }
319
320 #[test]
321 fn test_iter() {
322 let data = vec!["x".to_string(), "y".to_string(), "z".to_string()];
323 let container = Utf8Container::new(data);
324
325 let collected: Vec<Option<&str>> = container.iter().collect();
326 assert_eq!(collected, vec![Some("x"), Some("y"), Some("z")]);
327 }
328
329 #[test]
330 fn test_slice() {
331 let container = Utf8Container::from_vec(vec![
332 "one".to_string(),
333 "two".to_string(),
334 "three".to_string(),
335 "four".to_string(),
336 ]);
337 let sliced = container.slice(1, 3);
338
339 assert_eq!(sliced.len(), 2);
340 assert_eq!(sliced.get(0), Some("two"));
341 assert_eq!(sliced.get(1), Some("three"));
342 }
343
344 #[test]
345 fn test_filter() {
346 let mut container = Utf8Container::from_vec(vec![
347 "keep".to_string(),
348 "drop".to_string(),
349 "keep".to_string(),
350 "drop".to_string(),
351 ]);
352 let mask = BitVec::from_slice(&[true, false, true, false]);
353
354 container.filter(&mask);
355
356 assert_eq!(container.len(), 2);
357 assert_eq!(container.get(0), Some("keep"));
358 assert_eq!(container.get(1), Some("keep"));
359 }
360
361 #[test]
362 fn test_reorder() {
363 let mut container =
364 Utf8Container::from_vec(vec!["first".to_string(), "second".to_string(), "third".to_string()]);
365 let indices = [2, 0, 1];
366
367 container.reorder(&indices);
368
369 assert_eq!(container.len(), 3);
370 assert_eq!(container.get(0), Some("third"));
371 assert_eq!(container.get(1), Some("first"));
372 assert_eq!(container.get(2), Some("second"));
373 }
374
375 #[test]
376 fn test_reorder_with_out_of_bounds() {
377 let mut container = Utf8Container::from_vec(vec!["a".to_string(), "b".to_string()]);
378 let indices = [1, 5, 0];
379
380 container.reorder(&indices);
381
382 assert_eq!(container.len(), 3);
383 assert_eq!(container.get(0), Some("b"));
384 assert_eq!(container.get(1), Some(""));
385 assert_eq!(container.get(2), Some("a"));
386 }
387
388 #[test]
389 fn test_empty_strings() {
390 let mut container = Utf8Container::with_capacity(2);
391 container.push("".to_string());
392 container.push_default();
393
394 assert_eq!(container.len(), 2);
395 assert_eq!(container.get(0), Some(""));
396 assert_eq!(container.get(1), Some(""));
397
398 assert!(container.is_defined(0));
399 assert!(container.is_defined(1));
400 }
401
402 #[test]
403 fn testault() {
404 let container = Utf8Container::default();
405 assert_eq!(container.len(), 0);
406 assert!(container.is_empty());
407 }
408
409 #[test]
410 fn test_data_bytes_and_offsets_match_zero_copy_layout() {
411 let container = Utf8Container::from_vec(vec!["aa".to_string(), "bb".to_string()]);
412 assert_eq!(container.data_bytes(), b"aabb");
413 assert_eq!(container.offsets(), &[0u64, 2, 4]);
414 }
415
416 #[test]
417 fn test_postcard_wire_compat() {
418 let strings = vec!["hello".to_string(), "world".to_string()];
421 let strings_bytes: Vec<u8> = postcard_to_allocvec(&strings).unwrap();
422
423 let container = Utf8Container::from_vec(strings.clone());
424 let container_bytes: Vec<u8> = postcard_to_allocvec(&container).unwrap();
425
426 assert_eq!(strings_bytes, container_bytes);
427 }
428}