Skip to main content

substr/
lib.rs

1//! Encapsulates the concept of a substring of a certain length found at a
2//! given offset, which can be useful when this information cannot be directly
3//! coupled to the lifetime of the orignal string it was derived from. This can
4//! be useful, for example, when the section of a string which caused a parsing
5//! error must be reported in a manner which must survive the lifetime of the
6//! original parsed string.
7//! 
8//! This may sound like an odd set of requirements, but it notably occurs when
9//! implementing an external trait (which cannot be modified to take a lifetime
10//! parameter) which contains a function which takes a `&str` -- like, say,
11//! `std::str::FromStr`.
12
13use std::{cmp::Ordering, ops::Range};
14
15/// A `Substr` represents the position of a child string within a parent, and
16/// can be converted back to its original form if the parent `&str` it was
17/// derived from is still avaliable.
18#[derive(Copy, Clone, Debug)]
19pub struct Substr {
20    offset: usize,
21    length: usize,
22}
23
24impl Default for Substr {
25    fn default() -> Self {
26        Self::EMPTY
27    }
28}
29
30impl From<Range<usize>> for Substr {
31    fn from(r: Range<usize>) -> Self {
32        if r.start < r.end {
33            Self {
34                offset: r.start,
35                length: (r.end - r.start),
36            }
37        } else {
38            Self::EMPTY
39        }
40    }
41}
42
43impl PartialEq for Substr {
44    fn eq(&self, other: &Self) -> bool {
45        self.partial_cmp(other) == Some(Ordering::Equal)
46    }
47}
48
49/// Two Substr instances are considered equal if they are both empty or both refer to the same section of a string.
50impl PartialOrd for Substr {
51    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
52        let c = self.length.cmp(&other.length);
53        if c != Ordering::Equal {
54            Some(c)
55        } else {
56            if self.length == 0 || self.offset == other.offset {
57                Some(Ordering::Equal)
58            } else {
59                None
60            }
61        }
62    }
63}
64
65impl Substr {
66    const EMPTY: Self = Self {
67        offset: 0,
68        length: 0,
69    };
70
71    pub fn len(&self) -> usize {
72        self.length
73    }
74
75    pub fn is_empty(&self) -> bool {
76        self.length == 0
77    }
78
79    /// Creates a Substr by finding the child within the parent. If the child
80    /// str points to a range wholly contained within the parent, this will use
81    /// pointer arithmetic and be O(1); if not, `str::find` will be used, which
82    /// is linear in the size of the parent.
83    pub fn make(parent: &str, child: &str) -> Result<Self, ()> {
84        if child.is_empty() {
85            return Ok(Self::EMPTY);
86        }
87        let p = parent.as_ptr() as usize;
88        let c = child.as_ptr() as usize;
89        if c >= p && c + child.len() < p + parent.len() {
90            Ok(Self {
91                offset: c - p,
92                length: child.len(),
93            })
94        } else {
95            if let Some(i) = parent.find(child) {
96                Ok(Self {
97                    offset: i,
98                    length: child.len(),
99                })
100            } else {
101                Err(())
102            }
103        }
104    }
105
106    /// Recovers a `&str` from a Substr. The provided parent must be at least
107    /// as long as `offset + length`.
108    /// 
109    /// The recovered `&str` is guaranteed to point to the same slice of memory
110    /// as the orignal if the same parent is provided. The recovered `&str` is
111    /// guaranteed to compare equal to the original if the substring
112    /// `[offset, offset + length)` of the provided parent compares equal to the
113    /// same substring of the original parent.
114    /// 
115    /// # Panics
116    /// 
117    /// Panics if `length` or `length + offset` within the provided parent is
118    /// not on a UTF-8 code point boundary.
119    pub fn recover<'a>(&self, parent: &'a str) -> Result<&'a str, ()> {
120        if self.offset + self.length <= parent.len() {
121            Ok(parent.split_at(self.offset).1.split_at(self.length).0)
122        } else {
123            Err(())
124        }
125    }
126}
127
128impl From<Substr> for Range<usize> {
129    fn from(s: Substr) -> Self {
130        Self{
131            start: s.offset,
132            end: s.offset + s.length,
133        }
134    }
135}
136
137/// A helper trait to make converting a `&str` to a `Substr` easier. Returns an
138/// `Option` instead of a `Result` to match the behavior of `str::find()`.
139pub trait FindSubstr: {
140    fn find_substr(&self, s: &str) -> Option<Substr>;
141}
142
143impl FindSubstr for str {
144    fn find_substr(&self, s: &str) -> Option<Substr> {
145        Substr::make(self, s).ok()
146    }
147}
148
149/// Handy default `Substr` whose length and offset are both zero. Returned via
150/// `Substr::default()`.
151pub const EMPTY: Substr = Substr::EMPTY;
152
153#[cfg(test)]
154mod tests {
155    use super::*;
156
157    #[test]
158    fn empty_equality() {
159        assert_eq!(EMPTY, Substr{
160            offset: 0,
161            length: 0,
162        });
163        assert_eq!(EMPTY, Substr{
164            offset: 42,
165            length: 0,
166        });
167        assert_ne!(EMPTY, Substr{
168            offset: 0,
169            length: 42,
170        });
171    }
172
173    #[test]
174    fn roundtrip_with_contained_child() -> Result<(), ()> {
175        let foobar = "foobar";
176        let (foo, bar) = foobar.split_at(3);
177        let empty = foobar.split_at(0).0;
178
179        assert_eq!(Substr::make(foobar, foo)?.recover(foobar)?, "foo");
180        assert_eq!(Substr::make(foobar, bar)?.recover(foobar)?, "bar");
181        assert_eq!(Substr::make(foobar, empty)?.recover(foobar)?, "");
182        Ok(())
183    }
184
185    #[test]
186    fn roundtrip_preserves_ptr() -> Result<(), ()> {
187        let foobar = "foobar";
188        let (foo, bar) = foobar.split_at(3);
189
190        let foo_rec = Substr::make(foobar, foo)?.recover(foobar)?;
191        assert_eq!(foo_rec.as_ptr(), foo.as_ptr());
192
193        let bar_rec = Substr::make(foobar, bar)?.recover(foobar)?;
194        assert_eq!(bar_rec.as_ptr(), bar.as_ptr());
195        Ok(())
196    }
197
198    #[test]
199    fn roundtrip_without_contained_child() -> Result<(), ()> {
200        let foobar = "foobar";
201        let foo = "foo1".split_at(3).0;
202        let bar = "bar1".split_at(3).0;
203
204        assert_eq!(Substr::make(foobar, foo)?.recover(foobar)?, "foo");
205        assert_eq!(Substr::make(foobar, bar)?.recover(foobar)?, "bar");
206        Ok(())
207    }
208
209    #[test]
210    fn roundtrip_with_different_parent() -> Result<(), ()> {
211        let foobar1 = "foobar1";
212        let foobar2 = "foobar2";
213        let foo = "foo";
214        let bar = "bar";
215
216        assert_eq!(Substr::make(foobar1, foo)?.recover(foobar2)?, "foo");
217        assert_eq!(Substr::make(foobar1, bar)?.recover(foobar2)?, "bar");
218        Ok(())
219    }
220
221    #[test]
222    fn equality() -> Result<(), ()> {
223        let foobar = "foobar";
224        let (foo, bar) = foobar.split_at(3);
225        let empty = foobar.split_at(0).0;
226
227        let foo_sub = Substr::make(foobar, foo)?;
228        let bar_sub = Substr::make(foobar, bar)?;
229        let empty_sub = Substr::make(foobar, empty)?;
230
231        assert_eq!(foo_sub, foo_sub);
232        assert_ne!(foo_sub, bar_sub);
233        assert_ne!(foo_sub, empty_sub);
234
235        assert_ne!(bar_sub, foo_sub);
236        assert_eq!(bar_sub, bar_sub);
237        assert_ne!(bar_sub, empty_sub);
238
239        assert_ne!(empty_sub, foo_sub);
240        assert_ne!(empty_sub, bar_sub);
241        assert_eq!(empty_sub, empty_sub);
242
243        Ok(())
244    }
245
246    #[test]
247    fn equality_with_different_parents() -> Result<(), ()> {
248        let foo1bar1 = "foo1bar1";
249        let (foo1, bar1) = foo1bar1.split_at(4);
250
251        let foo2bar2 = "foo2bar2";
252        let (foo2, bar2) = foo2bar2.split_at(4);
253
254        let foo1_sub = Substr::make(foo1bar1, foo1)?;
255        let foo2_sub = Substr::make(foo2bar2, foo2)?;
256        let bar1_sub = Substr::make(foo1bar1, bar1)?;
257        let bar2_sub = Substr::make(foo2bar2, bar2)?;
258
259        assert_eq!(foo1_sub, foo2_sub);
260        assert_eq!(bar1_sub, bar2_sub);
261        Ok(())
262    }
263
264    #[test]
265    fn find_substr() -> Result<(), ()> {
266        let foobar = "foobar";
267        let foo = "foo";
268        let bar = "bar";
269
270        assert_eq!(foobar.find_substr(foo).ok_or(())?.recover(foobar)?, "foo");
271        assert_eq!(foobar.find_substr(bar).ok_or(())?.recover(foobar)?, "bar");
272        Ok(())
273    }
274
275    #[test]
276    fn to_range() -> Result<(), ()> {
277        let foobar = "foobar";
278        let (foo, bar) = foobar.split_at(3);
279
280        
281        let foo_sub = Substr::make(foobar, foo)?;
282        let bar_sub = Substr::make(foobar, bar)?;
283
284        let foo_range: Range<usize> = foo_sub.into();
285        let bar_range: Range<usize> = bar_sub.into();
286
287        assert_eq!(foo_range.start, 0);
288        assert_eq!(foo_range.end, 3);
289        assert_eq!(bar_range.start, 3);
290        assert_eq!(bar_range.end, 6);
291        Ok(())
292    }
293
294    #[test]
295    fn from_range() -> Result<(), ()> {
296        let foo_range = Range::<usize> {
297            start: 0,
298            end: 3,
299        };
300        let bar_range = Range::<usize> {
301            start: 3,
302            end: 6,
303        };
304
305        
306        let foo_sub: Substr = foo_range.into();
307        let bar_sub: Substr = bar_range.into();
308
309        let foobar = "foobar";
310        let foo = foo_sub.recover(foobar)?;
311        let bar = bar_sub.recover(foobar)?;
312
313        assert_eq!(foo, "foo");
314        assert_eq!(bar, "bar");
315        Ok(())
316    }
317}