aimcal_ical/
string_storage.rs

1// SPDX-FileCopyrightText: 2025-2026 Zexin Yuan <aim@yzx9.xyz>
2//
3// SPDX-License-Identifier: Apache-2.0
4
5//! String storage abstraction for zero-copy and owned string representations.
6
7use std::borrow::Cow;
8use std::fmt::{self, Debug, Display};
9use std::hash::Hash;
10use std::iter::Peekable;
11use std::ops::Range;
12use std::str::CharIndices;
13
14use chumsky::span::SimpleSpan;
15
16/// Trait for string storage types.
17///
18/// This trait abstracts over different string storage strategies, enabling
19/// both zero-copy parsing (with borrowed data) and owned data representations.
20///
21/// # Implementors
22///
23/// - `String` - Owned string data
24/// - `Segments<'src>` - Zero-copy borrowed segments
25pub trait StringStorage: Clone + Display {
26    /// The span type used by this storage.
27    ///
28    /// For zero-copy parsing (`Segments`), this is `Span` representing
29    /// source positions. For owned data (`String`), this is `()` since span
30    /// information is not preserved.
31    type Span: Copy + Debug + PartialEq + Eq + Hash;
32}
33
34// Implement StringStorage for references to types that implement it
35impl<T: StringStorage> StringStorage for &T {
36    type Span = T::Span;
37}
38
39impl StringStorage for String {
40    type Span = (); // No span information for owned strings
41}
42
43/// A span representing a range in the source code
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
45pub struct Span {
46    /// Start position of the span
47    pub start: usize,
48    /// End position of the span
49    pub end: usize,
50}
51
52impl Span {
53    /// Create a new span from start and end positions
54    #[must_use]
55    pub const fn new(start: usize, end: usize) -> Self {
56        Self { start, end }
57    }
58
59    /// Convert to a standard range
60    #[must_use]
61    pub const fn into_range(self) -> Range<usize> {
62        self.start..self.end
63    }
64}
65
66impl From<Range<usize>> for Span {
67    fn from(range: Range<usize>) -> Self {
68        Self {
69            start: range.start,
70            end: range.end,
71        }
72    }
73}
74
75impl From<SimpleSpan<usize>> for Span {
76    fn from(span: SimpleSpan<usize>) -> Self {
77        Self {
78            start: span.start,
79            end: span.end,
80        }
81    }
82}
83
84impl From<Span> for SimpleSpan<usize> {
85    fn from(span: Span) -> Self {
86        use chumsky::span::Span as _;
87        SimpleSpan::new((), span.start..span.end)
88    }
89}
90
91impl Display for Span {
92    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
93        write!(f, "{}..{}", self.start, self.end)
94    }
95}
96
97/// A spanned text segment (text with its position in the source)
98pub type Segment<'src> = (&'src str, Span);
99
100/// A collection of spanned text segments (multi-segment value with positions)
101#[derive(Default, Clone, Debug)]
102pub struct Segments<'src> {
103    pub(crate) segments: Vec<Segment<'src>>,
104    len: usize,
105}
106
107impl<'src> Segments<'src> {
108    /// Create a new `Segments` from a vector of segments
109    #[must_use]
110    pub(crate) fn new(segments: Vec<Segment<'src>>) -> Self {
111        let len = segments.iter().map(|(s, _)| s.len()).sum();
112        Self { segments, len }
113    }
114
115    /// Get the total length in bytes of all segments
116    #[must_use]
117    pub const fn len(&self) -> usize {
118        self.len
119    }
120
121    /// Returns `true` if the segments contain no elements
122    #[must_use]
123    pub const fn is_empty(&self) -> bool {
124        self.len == 0
125    }
126
127    /// Get the full span from first to last segment
128    #[must_use]
129    pub fn span(&self) -> Span {
130        match (self.segments.first(), self.segments.last()) {
131            (Some((_, first_span)), Some((_, last_span))) => Span {
132                start: first_span.start,
133                end: last_span.end,
134            },
135            _ => Span { start: 0, end: 0 },
136        }
137    }
138
139    /// Resolve segments into a single string (borrowed if single segment, owned otherwise)
140    ///
141    /// # Panics
142    ///
143    /// Panics if there are no segments. This should never happen in practice
144    /// as `Segments` is always created with at least one segment.
145    #[must_use]
146    pub fn resolve(&self) -> Cow<'src, str> {
147        if self.segments.len() == 1 {
148            let s = self.segments.first().unwrap().0; // SAFETY: due to len() == 1
149            Cow::Borrowed(s)
150        } else {
151            let mut s = String::with_capacity(self.len);
152            for (seg, _) in &self.segments {
153                s.push_str(seg);
154            }
155            Cow::Owned(s)
156        }
157    }
158
159    /// Convert to owned String efficiently
160    ///
161    /// This is more explicit and slightly more efficient than using the
162    /// `Display` trait's `to_string()` method, as it uses the known capacity.
163    #[must_use]
164    pub fn to_owned(&self) -> String {
165        let mut s = String::with_capacity(self.len);
166        for (seg, _) in &self.segments {
167            s.push_str(seg);
168        }
169        s
170    }
171
172    /// Check if segments start with the given prefix, ignoring ASCII case
173    #[must_use]
174    pub(crate) fn starts_with_str_ignore_ascii_case(&self, prefix: &str) -> bool {
175        if prefix.is_empty() {
176            return true;
177        } else if prefix.len() > self.len {
178            return false;
179        }
180
181        let mut remaining = prefix;
182        for (seg, _) in &self.segments {
183            if remaining.is_empty() {
184                return true;
185            } else if seg.len() >= remaining.len() {
186                // This segment is long enough to contain the rest of the prefix
187                return seg[..remaining.len()].eq_ignore_ascii_case(remaining);
188            } else if !seg.eq_ignore_ascii_case(&remaining[..seg.len()]) {
189                return false;
190            }
191            // This segment is shorter than the remaining prefix
192            remaining = &remaining[seg.len()..];
193        }
194
195        remaining.is_empty()
196    }
197
198    /// Compare segments to a string ignoring ASCII case
199    #[must_use]
200    pub fn eq_str_ignore_ascii_case(&self, mut other: &str) -> bool {
201        if other.len() != self.len {
202            return false;
203        }
204
205        for (seg, _) in &self.segments {
206            let Some((head, tail)) = other.split_at_checked(seg.len()) else {
207                return false;
208            };
209            if !head.eq_ignore_ascii_case(seg) {
210                return false;
211            }
212            other = tail;
213        }
214
215        true
216    }
217
218    pub(crate) fn into_spanned_chars(self) -> SegmentedSpannedChars<'src> {
219        SegmentedSpannedChars {
220            segments: self.segments,
221            seg_idx: 0,
222            chars: None,
223        }
224    }
225}
226
227impl Display for Segments<'_> {
228    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
229        for (seg, _) in &self.segments {
230            Display::fmt(seg, f)?;
231        }
232        Ok(())
233    }
234}
235
236impl StringStorage for Segments<'_> {
237    type Span = Span;
238}
239
240/// Iterator over characters in spanned segments
241#[derive(Debug, Clone)]
242pub struct SegmentedSpannedChars<'src> {
243    segments: Vec<Segment<'src>>,
244    seg_idx: usize,
245    chars: Option<(Span, Peekable<CharIndices<'src>>)>,
246}
247
248impl Iterator for SegmentedSpannedChars<'_> {
249    type Item = (char, Span);
250
251    fn next(&mut self) -> Option<Self::Item> {
252        while self.seg_idx < self.segments.len() {
253            match self.chars {
254                Some((ref span, ref mut chars)) => match chars.next() {
255                    Some((start, c)) => {
256                        let char_span = match chars.peek() {
257                            Some((end, _)) => Span::new(span.start + start, span.start + end),
258                            None => Span::new(span.start + start, span.end),
259                        };
260                        return Some((c, char_span));
261                    }
262                    None => {
263                        self.seg_idx += 1;
264                        self.chars = None;
265                    }
266                },
267                None => {
268                    let (s, span) = self.segments.get(self.seg_idx).unwrap(); // SAFETY: due to while condition
269                    self.chars = Some((*span, s.char_indices().peekable()));
270                }
271            }
272        }
273
274        None
275    }
276}
277
278#[cfg(test)]
279mod tests {
280    use super::*;
281
282    #[test]
283    fn spanned_segments_starts_with_str_ignore_ascii_case() {
284        fn make_segments<'a>(parts: &[(&'a str, Span)]) -> Segments<'a> {
285            let segments = parts.iter().map(|&(s, span)| (s, span)).collect();
286            Segments::new(segments)
287        }
288
289        // Test X- properties (case-insensitive)
290        let segments = make_segments(&[("X-CUSTOM-PROP", Span::new(0, 12))]);
291        assert!(segments.starts_with_str_ignore_ascii_case("X-"));
292        assert!(segments.starts_with_str_ignore_ascii_case("x-"));
293
294        // Test non-X- properties
295        let segments = make_segments(&[("NONSTANDARD-PROP", Span::new(0, 15))]);
296        assert!(!segments.starts_with_str_ignore_ascii_case("X-"));
297        assert!(!segments.starts_with_str_ignore_ascii_case("x-"));
298
299        // Test mixed case
300        let segments = make_segments(&[("x-custom", Span::new(0, 7))]);
301        assert!(segments.starts_with_str_ignore_ascii_case("X-"));
302        assert!(segments.starts_with_str_ignore_ascii_case("x-"));
303
304        // Test multi-segment
305        let segments = make_segments(&[("X-", Span::new(0, 2)), ("CUSTOM", Span::new(2, 7))]);
306        assert!(segments.starts_with_str_ignore_ascii_case("x-"));
307        assert!(segments.starts_with_str_ignore_ascii_case("X-C"));
308    }
309}