Skip to main content

smart_string/str_stack/
str_list.rs

1use std::str::from_utf8_unchecked;
2
3use super::str_list_ref::StrListRef;
4use super::StrStack;
5
6/// An immutable, compact representation of a list of string slices.
7///
8/// `StrList` is the "frozen" counterpart to [`StrStack`]. It stores the same data
9/// (a contiguous UTF-8 byte buffer plus a `u32` boundary table) but uses boxed slices
10/// instead of `Vec`s, shedding excess capacity.
11///
12/// Typically created by converting a finished [`StrStack`] via `From<StrStack>` or
13/// by collecting an iterator of `&str`.
14///
15/// # Invariants
16///
17/// - `data` is always valid UTF-8.
18/// - `ends` values are monotonically non-decreasing and the last value does not
19///   exceed `data.len()`.
20#[derive(Clone, PartialEq, Eq)]
21pub struct StrList {
22    data: Box<[u8]>,
23    ends: Box<[u32]>,
24}
25
26impl StrList {
27    /// Returns the number of string segments.
28    #[inline]
29    pub fn len(&self) -> usize {
30        self.ends.len()
31    }
32
33    /// Returns `true` if the list contains no segments.
34    #[inline]
35    pub fn is_empty(&self) -> bool {
36        self.ends.is_empty()
37    }
38
39    /// Returns the total byte length of the data buffer.
40    #[inline]
41    pub fn bytes_len(&self) -> u32 {
42        self.data.len() as u32
43    }
44
45    /// Returns the concatenation of all segments as a single `&str`.
46    #[inline]
47    pub fn as_str(&self) -> &str {
48        // SAFETY: `data` is always valid UTF-8 (invariant from StrStack or FromIterator).
49        unsafe { from_utf8_unchecked(&self.data) }
50    }
51
52    /// Returns the segment at `index`, or `None` if out of bounds.
53    #[inline]
54    pub fn get(&self, index: usize) -> Option<&str> {
55        let (start, end) = self.get_bounds_usize(index)?;
56        // SAFETY: bounds validated by `get_bounds_usize`; data is valid UTF-8.
57        Some(unsafe { from_utf8_unchecked(self.data.get_unchecked(start..end)) })
58    }
59
60    /// Returns the byte offset bounds `(start, end)` for the segment at `index`.
61    #[inline]
62    pub fn get_bounds(&self, index: usize) -> Option<(u32, u32)> {
63        if index >= self.ends.len() {
64            return None;
65        }
66        let start = if index > 0 { self.ends[index - 1] } else { 0 };
67        let end = self.ends[index];
68        debug_assert!(start <= end);
69        debug_assert!((end as usize) <= self.data.len());
70        Some((start, end))
71    }
72
73    /// Returns the last segment, or `None` if empty.
74    #[inline]
75    pub fn last(&self) -> Option<&str> {
76        if self.ends.is_empty() {
77            None
78        } else {
79            self.get(self.ends.len() - 1)
80        }
81    }
82
83    /// Returns a borrowed view over this list.
84    #[inline]
85    pub fn as_ref(&self) -> StrListRef<'_> {
86        StrListRef::from_raw_parts(&self.data, &self.ends)
87    }
88
89    /// Returns an iterator over the string segments.
90    #[inline]
91    pub fn iter(&self) -> StrListIter<'_> {
92        StrListIter {
93            data: &self.data,
94            ends: &self.ends,
95            index: 0,
96            back_index: self.ends.len(),
97        }
98    }
99
100    #[inline]
101    fn get_bounds_usize(&self, index: usize) -> Option<(usize, usize)> {
102        if index >= self.ends.len() {
103            return None;
104        }
105        let start = if index > 0 {
106            self.ends[index - 1] as usize
107        } else {
108            0
109        };
110        let end = self.ends[index] as usize;
111        Some((start, end))
112    }
113}
114
115impl From<StrStack> for StrList {
116    #[inline]
117    fn from(stack: StrStack) -> Self {
118        Self {
119            data: stack.data_as_slice().to_vec().into_boxed_slice(),
120            ends: stack.ends_as_slice().to_vec().into_boxed_slice(),
121        }
122    }
123}
124
125impl<'a> FromIterator<&'a str> for StrList {
126    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
127        let mut data = Vec::new();
128        let mut ends = Vec::new();
129        for s in iter {
130            data.extend_from_slice(s.as_bytes());
131            let end: u32 = data
132                .len()
133                .try_into()
134                .expect("StrList: total byte length exceeds u32::MAX");
135            ends.push(end);
136        }
137        Self {
138            data: data.into_boxed_slice(),
139            ends: ends.into_boxed_slice(),
140        }
141    }
142}
143
144impl<'a> IntoIterator for &'a StrList {
145    type Item = &'a str;
146    type IntoIter = StrListIter<'a>;
147
148    #[inline]
149    fn into_iter(self) -> Self::IntoIter {
150        self.iter()
151    }
152}
153
154impl std::fmt::Debug for StrList {
155    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
156        f.debug_list().entries(self.iter()).finish()
157    }
158}
159
160#[cfg(feature = "serde")]
161impl serde::Serialize for StrList {
162    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
163        use serde::ser::SerializeSeq;
164        let mut seq = serializer.serialize_seq(Some(self.len()))?;
165        for s in self.iter() {
166            seq.serialize_element(s)?;
167        }
168        seq.end()
169    }
170}
171
172#[cfg(feature = "serde")]
173impl<'de> serde::Deserialize<'de> for StrList {
174    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
175    where
176        D: serde::Deserializer<'de>,
177    {
178        // Deserialize as Vec<String>, then collect into StrList
179        let strings: Vec<String> = serde::Deserialize::deserialize(deserializer)?;
180        Ok(strings.iter().map(|s| s.as_str()).collect())
181    }
182}
183
184/// Iterator over `StrList` or `StrListRef` segments.
185pub struct StrListIter<'a> {
186    pub(super) data: &'a [u8],
187    pub(super) ends: &'a [u32],
188    pub(super) index: usize,
189    pub(super) back_index: usize,
190}
191
192impl<'a> Iterator for StrListIter<'a> {
193    type Item = &'a str;
194
195    #[inline]
196    fn next(&mut self) -> Option<Self::Item> {
197        if self.index >= self.back_index {
198            return None;
199        }
200        let start = if self.index > 0 {
201            self.ends[self.index - 1] as usize
202        } else {
203            0
204        };
205        let end = self.ends[self.index] as usize;
206        self.index += 1;
207        // SAFETY: bounds come from `ends` which are valid boundaries; data is valid UTF-8.
208        Some(unsafe { from_utf8_unchecked(self.data.get_unchecked(start..end)) })
209    }
210
211    #[inline]
212    fn size_hint(&self) -> (usize, Option<usize>) {
213        let len = self.back_index - self.index;
214        (len, Some(len))
215    }
216}
217
218impl<'a> DoubleEndedIterator for StrListIter<'a> {
219    #[inline]
220    fn next_back(&mut self) -> Option<Self::Item> {
221        if self.back_index <= self.index {
222            return None;
223        }
224        self.back_index -= 1;
225        let start = if self.back_index > 0 {
226            self.ends[self.back_index - 1] as usize
227        } else {
228            0
229        };
230        let end = self.ends[self.back_index] as usize;
231        // SAFETY: bounds come from `ends` which are valid boundaries; data is valid UTF-8.
232        Some(unsafe { from_utf8_unchecked(self.data.get_unchecked(start..end)) })
233    }
234}
235
236impl<'a> ExactSizeIterator for StrListIter<'a> {
237    #[inline]
238    fn len(&self) -> usize {
239        self.back_index - self.index
240    }
241}
242
243impl<'a> std::iter::FusedIterator for StrListIter<'a> {}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248
249    #[test]
250    fn test_from_stack() {
251        let mut stack = StrStack::new();
252        stack.push("hello");
253        stack.push("world");
254        let list = StrList::from(stack);
255        assert_eq!(list.len(), 2);
256        assert_eq!(list.get(0), Some("hello"));
257        assert_eq!(list.get(1), Some("world"));
258        assert_eq!(list.as_str(), "helloworld");
259    }
260
261    #[test]
262    fn test_from_iterator() {
263        let list: StrList = ["aaa", "bbb", "ccc"].iter().copied().collect();
264        assert_eq!(list.len(), 3);
265        assert_eq!(list.get(0), Some("aaa"));
266        assert_eq!(list.get(1), Some("bbb"));
267        assert_eq!(list.get(2), Some("ccc"));
268    }
269
270    #[test]
271    fn test_empty() {
272        let list: StrList = std::iter::empty::<&str>().collect();
273        assert!(list.is_empty());
274        assert_eq!(list.len(), 0);
275        assert_eq!(list.as_str(), "");
276        assert_eq!(list.last(), None);
277        assert_eq!(list.get(0), None);
278    }
279
280    #[test]
281    fn test_last() {
282        let list: StrList = ["a", "b", "c"].iter().copied().collect();
283        assert_eq!(list.last(), Some("c"));
284    }
285
286    #[test]
287    fn test_get_bounds() {
288        let list: StrList = ["abc", "€", "😊"].iter().copied().collect();
289        assert_eq!(list.get_bounds(0), Some((0, 3)));
290        assert_eq!(list.get_bounds(1), Some((3, 6)));
291        assert_eq!(list.get_bounds(2), Some((6, 10)));
292        assert_eq!(list.get_bounds(3), None);
293    }
294
295    #[test]
296    fn test_bytes_len() {
297        let list: StrList = ["abc", "€"].iter().copied().collect();
298        assert_eq!(list.bytes_len(), 6);
299    }
300
301    #[test]
302    fn test_iter_forward() {
303        let list: StrList = ["a", "b", "c"].iter().copied().collect();
304        let collected: Vec<&str> = list.iter().collect();
305        assert_eq!(collected, vec!["a", "b", "c"]);
306    }
307
308    #[test]
309    fn test_iter_reverse() {
310        let list: StrList = ["a", "b", "c"].iter().copied().collect();
311        let collected: Vec<&str> = list.iter().rev().collect();
312        assert_eq!(collected, vec!["c", "b", "a"]);
313    }
314
315    #[test]
316    fn test_iter_exact_size() {
317        let list: StrList = ["a", "b", "c"].iter().copied().collect();
318        let mut it = list.iter();
319        assert_eq!(it.len(), 3);
320        it.next();
321        assert_eq!(it.len(), 2);
322        it.next_back();
323        assert_eq!(it.len(), 1);
324    }
325
326    #[test]
327    fn test_iter_double_ended_meet_in_middle() {
328        let list: StrList = ["a", "b", "c", "d"].iter().copied().collect();
329        let mut it = list.iter();
330        assert_eq!(it.next(), Some("a"));
331        assert_eq!(it.next_back(), Some("d"));
332        assert_eq!(it.next(), Some("b"));
333        assert_eq!(it.next_back(), Some("c"));
334        assert_eq!(it.next(), None);
335        assert_eq!(it.next_back(), None);
336    }
337
338    #[test]
339    fn test_as_ref() {
340        let list: StrList = ["hello", "world"].iter().copied().collect();
341        let view = list.as_ref();
342        assert_eq!(view.len(), 2);
343        assert_eq!(view.get(0), Some("hello"));
344        assert_eq!(view.get(1), Some("world"));
345    }
346
347    #[test]
348    fn test_clone_eq() {
349        let list: StrList = ["a", "b"].iter().copied().collect();
350        let list2 = list.clone();
351        assert_eq!(list, list2);
352    }
353
354    #[test]
355    fn test_debug() {
356        let list: StrList = ["hello", "world"].iter().copied().collect();
357        let debug = format!("{:?}", list);
358        assert_eq!(debug, r#"["hello", "world"]"#);
359    }
360
361    #[test]
362    fn test_empty_segments() {
363        let list: StrList = ["", "abc", ""].iter().copied().collect();
364        assert_eq!(list.len(), 3);
365        assert_eq!(list.get(0), Some(""));
366        assert_eq!(list.get(1), Some("abc"));
367        assert_eq!(list.get(2), Some(""));
368    }
369
370    #[cfg(feature = "serde")]
371    #[test]
372    fn test_serde_roundtrip() {
373        let list: StrList = ["hello", "world", "€"].iter().copied().collect();
374        let json = serde_json::to_string(&list).unwrap();
375        assert_eq!(json, r#"["hello","world","€"]"#);
376        let deserialized: StrList = serde_json::from_str(&json).unwrap();
377        assert_eq!(list, deserialized);
378    }
379
380    #[cfg(feature = "serde")]
381    #[test]
382    fn test_serde_matches_strstack() {
383        let mut stack = StrStack::new();
384        stack.push("aaa");
385        stack.push("bbb");
386        stack.push("ccc");
387
388        let stack_json = serde_json::to_string(&stack).unwrap();
389        let list = StrList::from(stack);
390        let list_json = serde_json::to_string(&list).unwrap();
391        assert_eq!(stack_json, list_json);
392    }
393}