Skip to main content

smart_string/str_stack/
str_list_ref.rs

1use std::fmt;
2use std::str::from_utf8_unchecked;
3
4use super::str_list::StrListIter;
5use super::StrStack;
6
7/// Error returned by [`StrListRef::new`] when the input data/ends are invalid.
8#[derive(Debug, Clone)]
9pub enum StrListValidationError {
10    /// A boundary value exceeds the data length.
11    BoundaryOutOfRange {
12        index: usize,
13        value: u32,
14        data_len: usize,
15    },
16    /// Boundary values are not monotonically non-decreasing.
17    BoundaryNotMonotonic {
18        index: usize,
19        prev: u32,
20        current: u32,
21    },
22    /// A segment between two boundaries is not valid UTF-8.
23    InvalidUtf8 { index: usize, start: u32, end: u32 },
24}
25
26impl fmt::Display for StrListValidationError {
27    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
28        match self {
29            Self::BoundaryOutOfRange {
30                index,
31                value,
32                data_len,
33            } => {
34                write!(
35                    f,
36                    "boundary out of range at index {}: value {} exceeds data length {}",
37                    index, value, data_len
38                )
39            }
40            Self::BoundaryNotMonotonic {
41                index,
42                prev,
43                current,
44            } => {
45                write!(
46                    f,
47                    "boundaries not monotonic at index {}: {} > {}",
48                    index, prev, current
49                )
50            }
51            Self::InvalidUtf8 { index, start, end } => {
52                write!(
53                    f,
54                    "invalid UTF-8 in segment {} (bytes {}..{})",
55                    index, start, end
56                )
57            }
58        }
59    }
60}
61
62/// A borrowed, read-only view over string list data.
63///
64/// `StrListRef` borrows `&[u8]` (data) and `&[u32]` (boundary table) and provides
65/// the same read-only API as [`StrList`](super::str_list::StrList). Use it for
66/// zero-copy views over external buffers (e.g., memory-mapped files).
67///
68/// # Invariants
69///
70/// Same as `StrList`: data is valid UTF-8 and ends are valid boundaries.
71/// When constructed via [`new`](Self::new), these invariants are validated.
72/// When constructed via `From<&StrStack>` or `From<&StrList>`, they are inherited.
73#[derive(Clone, Copy, PartialEq, Eq)]
74pub struct StrListRef<'a> {
75    data: &'a [u8],
76    ends: &'a [u32],
77}
78
79impl<'a> StrListRef<'a> {
80    /// Creates a `StrListRef` from raw data and boundary slices, validating
81    /// UTF-8 and boundary consistency.
82    ///
83    /// Validation checks (O(total bytes)):
84    /// - `ends` values are monotonically non-decreasing
85    /// - Last value does not exceed `data.len()`
86    /// - Each segment `data[ends[i-1]..ends[i]]` (with `ends[-1] = 0`) is valid UTF-8
87    pub fn new(data: &'a [u8], ends: &'a [u32]) -> Result<Self, StrListValidationError> {
88        let mut prev: u32 = 0;
89        for (i, &end) in ends.iter().enumerate() {
90            if end < prev {
91                return Err(StrListValidationError::BoundaryNotMonotonic {
92                    index: i,
93                    prev,
94                    current: end,
95                });
96            }
97            if (end as usize) > data.len() {
98                return Err(StrListValidationError::BoundaryOutOfRange {
99                    index: i,
100                    value: end,
101                    data_len: data.len(),
102                });
103            }
104            if std::str::from_utf8(&data[prev as usize..end as usize]).is_err() {
105                return Err(StrListValidationError::InvalidUtf8 {
106                    index: i,
107                    start: prev,
108                    end,
109                });
110            }
111            prev = end;
112        }
113        Ok(Self { data, ends })
114    }
115
116    /// Creates a `StrListRef` from trusted internal data. No validation.
117    ///
118    /// Used by `From<&StrStack>`, `From<&StrList>`, and `StrList::as_ref()`.
119    #[inline]
120    pub(super) fn from_raw_parts(data: &'a [u8], ends: &'a [u32]) -> Self {
121        Self { data, ends }
122    }
123
124    /// Returns the number of string segments.
125    #[inline]
126    pub fn len(&self) -> usize {
127        self.ends.len()
128    }
129
130    /// Returns `true` if the list contains no segments.
131    #[inline]
132    pub fn is_empty(&self) -> bool {
133        self.ends.is_empty()
134    }
135
136    /// Returns the total byte length of the data buffer.
137    #[inline]
138    pub fn bytes_len(&self) -> u32 {
139        self.data.len() as u32
140    }
141
142    /// Returns the concatenation of all segments as a single `&str`.
143    #[inline]
144    pub fn as_str(&self) -> &'a str {
145        // SAFETY: data is always valid UTF-8 (validated in new() or inherited from StrStack/StrList).
146        unsafe { from_utf8_unchecked(self.data) }
147    }
148
149    /// Returns the segment at `index`, or `None` if out of bounds.
150    #[inline]
151    pub fn get(&self, index: usize) -> Option<&'a str> {
152        if index >= self.ends.len() {
153            return None;
154        }
155        let start = if index > 0 {
156            self.ends[index - 1] as usize
157        } else {
158            0
159        };
160        let end = self.ends[index] as usize;
161        // SAFETY: bounds validated by ends; data is valid UTF-8.
162        Some(unsafe { from_utf8_unchecked(self.data.get_unchecked(start..end)) })
163    }
164
165    /// Returns the byte offset bounds `(start, end)` for the segment at `index`.
166    #[inline]
167    pub fn get_bounds(&self, index: usize) -> Option<(u32, u32)> {
168        if index >= self.ends.len() {
169            return None;
170        }
171        let start = if index > 0 { self.ends[index - 1] } else { 0 };
172        Some((start, self.ends[index]))
173    }
174
175    /// Returns the last segment, or `None` if empty.
176    #[inline]
177    pub fn last(&self) -> Option<&'a str> {
178        if self.ends.is_empty() {
179            None
180        } else {
181            self.get(self.ends.len() - 1)
182        }
183    }
184
185    /// Returns an iterator over the string segments.
186    #[inline]
187    pub fn iter(&self) -> StrListIter<'a> {
188        StrListIter {
189            data: self.data,
190            ends: self.ends,
191            index: 0,
192            back_index: self.ends.len(),
193        }
194    }
195}
196
197impl<'a> From<&'a StrStack> for StrListRef<'a> {
198    #[inline]
199    fn from(stack: &'a StrStack) -> Self {
200        Self::from_raw_parts(stack.data_as_slice(), stack.ends_as_slice())
201    }
202}
203
204impl<'a> From<&'a super::str_list::StrList> for StrListRef<'a> {
205    #[inline]
206    fn from(list: &'a super::str_list::StrList) -> Self {
207        list.as_ref()
208    }
209}
210
211impl<'a> IntoIterator for StrListRef<'a> {
212    type Item = &'a str;
213    type IntoIter = StrListIter<'a>;
214
215    #[inline]
216    fn into_iter(self) -> Self::IntoIter {
217        self.iter()
218    }
219}
220
221impl<'a> fmt::Debug for StrListRef<'a> {
222    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
223        f.debug_list().entries(self.iter()).finish()
224    }
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230
231    #[test]
232    fn test_from_stack() {
233        let mut stack = StrStack::new();
234        stack.push("hello");
235        stack.push("world");
236        let view = StrListRef::from(&stack);
237        assert_eq!(view.len(), 2);
238        assert_eq!(view.get(0), Some("hello"));
239        assert_eq!(view.get(1), Some("world"));
240        assert_eq!(view.as_str(), "helloworld");
241    }
242
243    #[test]
244    fn test_new_valid() {
245        let data = b"helloworld";
246        let ends = [5u32, 10u32];
247        let view = StrListRef::new(data, &ends).unwrap();
248        assert_eq!(view.len(), 2);
249        assert_eq!(view.get(0), Some("hello"));
250        assert_eq!(view.get(1), Some("world"));
251    }
252
253    #[test]
254    fn test_new_empty() {
255        let view = StrListRef::new(b"", &[]).unwrap();
256        assert!(view.is_empty());
257        assert_eq!(view.as_str(), "");
258    }
259
260    #[test]
261    fn test_new_invalid_boundary_out_of_range() {
262        let data = b"hello";
263        let ends = [10u32]; // 10 > 5
264        let err = StrListRef::new(data, &ends).unwrap_err();
265        assert!(matches!(
266            err,
267            StrListValidationError::BoundaryOutOfRange { .. }
268        ));
269    }
270
271    #[test]
272    fn test_new_invalid_not_monotonic() {
273        let data = b"helloworld";
274        let ends = [5u32, 3u32]; // 3 < 5
275        let err = StrListRef::new(data, &ends).unwrap_err();
276        assert!(matches!(
277            err,
278            StrListValidationError::BoundaryNotMonotonic { .. }
279        ));
280    }
281
282    #[test]
283    fn test_new_invalid_utf8() {
284        let data = &[0xff, 0xfe, 0xfd];
285        let ends = [3u32];
286        let err = StrListRef::new(data, &ends).unwrap_err();
287        assert!(matches!(err, StrListValidationError::InvalidUtf8 { .. }));
288    }
289
290    #[test]
291    fn test_new_unicode() {
292        let data = "你好😊".as_bytes();
293        let ends = [6u32, 10u32]; // 你好 = 6 bytes, 😊 = 4 bytes
294        let view = StrListRef::new(data, &ends).unwrap();
295        assert_eq!(view.get(0), Some("你好"));
296        assert_eq!(view.get(1), Some("😊"));
297    }
298
299    #[test]
300    fn test_get_bounds() {
301        let data = b"abcdef";
302        let ends = [3u32, 6u32];
303        let view = StrListRef::new(data, &ends).unwrap();
304        assert_eq!(view.get_bounds(0), Some((0, 3)));
305        assert_eq!(view.get_bounds(1), Some((3, 6)));
306        assert_eq!(view.get_bounds(2), None);
307    }
308
309    #[test]
310    fn test_last() {
311        let data = b"abc";
312        let ends = [1u32, 2u32, 3u32];
313        let view = StrListRef::new(data, &ends).unwrap();
314        assert_eq!(view.last(), Some("c"));
315    }
316
317    #[test]
318    fn test_iter() {
319        let data = b"abcdef";
320        let ends = [2u32, 4u32, 6u32];
321        let view = StrListRef::new(data, &ends).unwrap();
322        let collected: Vec<&str> = view.iter().collect();
323        assert_eq!(collected, vec!["ab", "cd", "ef"]);
324    }
325
326    #[test]
327    fn test_iter_rev() {
328        let data = b"abcdef";
329        let ends = [2u32, 4u32, 6u32];
330        let view = StrListRef::new(data, &ends).unwrap();
331        let collected: Vec<&str> = view.iter().rev().collect();
332        assert_eq!(collected, vec!["ef", "cd", "ab"]);
333    }
334
335    #[test]
336    fn test_empty_segments() {
337        let data = b"abc";
338        let ends = [0u32, 3u32, 3u32]; // "", "abc", ""
339        let view = StrListRef::new(data, &ends).unwrap();
340        assert_eq!(view.len(), 3);
341        assert_eq!(view.get(0), Some(""));
342        assert_eq!(view.get(1), Some("abc"));
343        assert_eq!(view.get(2), Some(""));
344    }
345
346    #[test]
347    fn test_copy() {
348        let data = b"hello";
349        let ends = [5u32];
350        let view = StrListRef::new(data, &ends).unwrap();
351        let view2 = view; // Copy
352        assert_eq!(view, view2);
353    }
354
355    #[test]
356    fn test_debug() {
357        let data = b"helloworld";
358        let ends = [5u32, 10u32];
359        let view = StrListRef::new(data, &ends).unwrap();
360        let debug = format!("{:?}", view);
361        assert_eq!(debug, r#"["hello", "world"]"#);
362    }
363}