stry_common/utils/fenn/
slice.rs

1//! Types allowing for the 'lazy' slicing of `&str`s while keeping a single lifetime.
2
3use std::{
4    fmt::{Display, Error, Formatter},
5    ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive},
6};
7
8/// A 'lazy' [`str`] slice using [`Range`]s.
9#[derive(Debug, Hash, PartialEq, Eq)]
10pub struct Slice<'s> {
11    source: &'s str,
12    range: Range<usize>,
13}
14
15impl<'s> Slice<'s> {
16    pub const fn new(source: &'s str) -> Slice<'s> {
17        Slice {
18            range: 0..source.len(),
19            source,
20        }
21    }
22
23    /// Returns the length of `self`.
24    ///
25    /// This length is in bytes, not [`char`]s or graphemes. In other words,
26    /// it may not be what a human considers the length of the string.
27    ///
28    /// [`char`]: prim@char
29    ///
30    /// # Examples
31    ///
32    /// Basic usage:
33    ///
34    /// ```
35    /// # use lazy_slice::Slice;
36    /// let len = Slice::new("foo").len();
37    /// assert_eq!(3, len);
38    ///
39    /// assert_eq!(Slice::new("ƒoo").len(), 4); // fancy f!
40    /// # // assert_eq!(Slice::new("ƒoo").chars().count(), 3);
41    /// ```
42    pub const fn len(&self) -> usize {
43        self.range.end - self.range.start
44    }
45
46    /// Returns `true` if `self` has a length of zero bytes.
47    ///
48    /// # Examples
49    ///
50    /// Basic usage:
51    ///
52    /// ```
53    /// # use lazy_slice::Slice;
54    /// let s = Slice::new("");
55    /// assert!(s.is_empty());
56    ///
57    /// let s = Slice::new("not empty");
58    /// assert!(!s.is_empty());
59    /// ```
60    pub const fn is_empty(&self) -> bool {
61        self.range.start == self.range.end
62    }
63
64    //
65
66    /// Returns `true` if the given pattern matches a prefix of this
67    /// string slice.
68    ///
69    /// Returns `false` if it does not.
70    ///
71    /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
72    /// function or closure that determines if a character matches.
73    ///
74    /// [`char`]: prim@char
75    /// [pattern]: std::str::pattern::Pattern
76    ///
77    /// # Examples
78    ///
79    /// Basic usage:
80    ///
81    /// ```
82    /// # use lazy_slice::Slice;
83    /// let bananas = Slice::new("bananas");
84    ///
85    /// assert!(bananas.starts_with("bana"));
86    /// assert!(!bananas.starts_with("nana"));
87    /// ```
88    pub fn starts_with<'r, P, F>(&self, pat: P) -> bool
89    where
90        P: Into<Pattern<'r, F>>,
91        F: FnMut(char) -> bool,
92    {
93        let pat: Pattern<'r, F> = pat.into();
94
95        let slice = &self.source[self.range.start..self.range.end];
96
97        match pat {
98            Pattern::Char(pat) => slice.starts_with(pat),
99            Pattern::CharArrayRef(pat) => slice.starts_with(pat),
100            Pattern::Function(pat) => slice.starts_with(pat),
101            Pattern::Str(pat) => slice.starts_with(pat),
102            Pattern::StrRef(pat) => slice.starts_with(pat),
103            Pattern::StringRef(pat) => slice.starts_with(pat),
104        }
105    }
106
107    /// Returns `true` if the given pattern matches a suffix of this
108    /// string slice.
109    ///
110    /// Returns `false` if it does not.
111    ///
112    /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
113    /// function or closure that determines if a character matches.
114    ///
115    /// [`char`]: prim@char
116    /// [pattern]: std::str::pattern::Pattern
117    ///
118    /// # Examples
119    ///
120    /// Basic usage:
121    ///
122    /// ```
123    /// # use lazy_slice::Slice;
124    /// let bananas = Slice::new("bananas");
125    ///
126    /// assert!(bananas.ends_with("anas"));
127    /// assert!(!bananas.ends_with("nana"));
128    /// ```
129    pub fn ends_with<'r, P, F>(&self, pat: P) -> bool
130    where
131        P: Into<Pattern<'r, F>>,
132        F: FnMut(char) -> bool,
133    {
134        let pat: Pattern<'r, F> = pat.into();
135
136        let slice = &self.source[self.range.start..self.range.end];
137
138        match pat {
139            Pattern::Char(pat) => slice.ends_with(pat),
140            Pattern::CharArrayRef(pat) => slice.ends_with(pat),
141            Pattern::Function(pat) => slice.ends_with(pat),
142            Pattern::Str(pat) => slice.ends_with(pat),
143            Pattern::StrRef(pat) => slice.ends_with(pat),
144            Pattern::StringRef(pat) => slice.ends_with(pat),
145        }
146    }
147
148    /// Returns `true` if the given pattern matches a sub-slice of
149    /// this string slice.
150    ///
151    /// Returns `false` if it does not.
152    ///
153    /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
154    /// function or closure that determines if a character matches.
155    ///
156    /// [`char`]: prim@char
157    /// [pattern]: std::str::pattern::Pattern
158    ///
159    /// # Examples
160    ///
161    /// Basic usage:
162    ///
163    /// ```
164    /// # use lazy_slice::Slice;
165    /// let bananas = Slice::new("bananas");
166    ///
167    /// assert!(bananas.contains("nana"));
168    /// assert!(!bananas.contains("apples"));
169    /// ```
170    pub fn contains<'r, P, F>(&self, pat: P) -> bool
171    where
172        P: Into<Pattern<'r, F>>,
173        F: FnMut(char) -> bool,
174    {
175        let pat: Pattern<'r, F> = pat.into();
176
177        let slice = &self.source[self.range.start..self.range.end];
178
179        match pat {
180            Pattern::Char(pat) => slice.contains(pat),
181            Pattern::CharArrayRef(pat) => slice.contains(pat),
182            Pattern::Function(pat) => slice.contains(pat),
183            Pattern::Str(pat) => slice.contains(pat),
184            Pattern::StrRef(pat) => slice.contains(pat),
185            Pattern::StringRef(pat) => slice.contains(pat),
186        }
187    }
188
189    //
190
191    // pub fn lines(&self) -> Lines<'s> {
192    //     self.split('\n');
193
194    //     todo!()
195    // }
196
197    //
198
199    // pub fn split<'r, P, F>(&self, pat: P)
200    // where
201    //     P: Into<Pattern<'r, F>>,
202    //     F: FnMut(char) -> bool,
203    // {
204    // }
205
206    //
207
208    /// Returns a string slice with leading and trailing whitespace removed.
209    ///
210    /// 'Whitespace' is defined according to the terms of the Unicode Derived
211    /// Core Property `White_Space`.
212    ///
213    /// # Examples
214    ///
215    /// Basic usage:
216    ///
217    /// ```
218    /// # use lazy_slice::Slice;
219    /// let s = Slice::new(" Hello\tworld\t");
220    ///
221    /// assert_eq!("Hello\tworld", s.trim().slice());
222    /// ```
223    pub fn trim(&self) -> Slice<'s> {
224        self.trim_start().trim_end()
225    }
226
227    /// Returns a string slice with leading whitespace removed.
228    ///
229    /// 'Whitespace' is defined according to the terms of the Unicode Derived
230    /// Core Property `White_Space`.
231    ///
232    /// # Text directionality
233    ///
234    /// A string is a sequence of bytes. `start` in this context means the first
235    /// position of that byte string; for a left-to-right language like English or
236    /// Russian, this will be left side, and for right-to-left languages like
237    /// Arabic or Hebrew, this will be the right side.
238    ///
239    /// # Examples
240    ///
241    /// Basic usage:
242    ///
243    /// ```
244    /// # use lazy_slice::Slice;
245    /// let s = Slice::new(" Hello\tworld\t");
246    /// assert_eq!("Hello\tworld\t", s.trim_start().slice());
247    /// ```
248    ///
249    /// Directionality:
250    ///
251    /// ```
252    /// # use lazy_slice::Slice;
253    /// let s = Slice::new("  English  ");
254    /// assert!(Some('E') == s.trim_start().slice().chars().next());
255    ///
256    /// let s = Slice::new("  עברית  ");
257    /// assert!(Some('ע') == s.trim_start().slice().chars().next());
258    /// ```
259    pub fn trim_start(&self) -> Slice<'s> {
260        let slice = &self.source[self.range.start..self.range.end];
261
262        if !slice.starts_with(|c: char| c.is_whitespace()) {
263            // return early if no whitespace
264            return Slice {
265                source: self.source,
266                range: self.range.start..self.range.end,
267            };
268        }
269
270        let mut up_to = 0;
271
272        for (i, c) in slice.char_indices() {
273            if !c.is_whitespace() {
274                break;
275            }
276
277            up_to = i;
278        }
279
280        // last index points to the start of the last whitespace
281        // we need to remove it
282        up_to += 1;
283
284        Slice {
285            source: self.source,
286            range: (self.range.start + up_to)..self.range.end,
287        }
288    }
289
290    /// Returns a string slice with trailing whitespace removed.
291    ///
292    /// 'Whitespace' is defined according to the terms of the Unicode Derived
293    /// Core Property `White_Space`.
294    ///
295    /// # Text directionality
296    ///
297    /// A string is a sequence of bytes. `end` in this context means the last
298    /// position of that byte string; for a left-to-right language like English or
299    /// Russian, this will be right side, and for right-to-left languages like
300    /// Arabic or Hebrew, this will be the left side.
301    ///
302    /// # Examples
303    ///
304    /// Basic usage:
305    ///
306    /// ```
307    /// # use lazy_slice::Slice;
308    /// let s = Slice::new(" Hello\tworld\t");
309    /// assert_eq!(" Hello\tworld", s.trim_end().slice());
310    /// ```
311    ///
312    /// Directionality:
313    ///
314    /// ```
315    /// # use lazy_slice::Slice;
316    /// let s = Slice::new("  English  ");
317    /// assert!(Some('h') == s.trim_end().slice().chars().rev().next());
318    ///
319    /// let s = Slice::new("  עברית  ");
320    /// assert!(Some('ת') == s.trim_end().slice().chars().rev().next());
321    /// ```
322    pub fn trim_end(&self) -> Slice<'s> {
323        let slice = &self.source[self.range.start..self.range.end];
324
325        if !slice.ends_with(|c: char| c.is_whitespace()) {
326            // return early if no whitespace
327            return Slice {
328                source: self.source,
329                range: self.range.start..self.range.end,
330            };
331        }
332
333        let mut down_to = 0;
334
335        for (i, c) in slice.char_indices().rev() {
336            if !c.is_whitespace() {
337                break;
338            }
339
340            down_to = i;
341        }
342
343        if down_to == 0 {
344            // there were no whitespace
345            down_to = self.range.end;
346        }
347
348        Slice {
349            source: self.source,
350            range: self.range.start..(down_to + self.range.start),
351        }
352    }
353
354    //
355
356    /// It isn't possible to return a owned [`Slice`] from a [`Index`], so you have to use this function.
357    ///
358    /// [`Index`]: std::ops::Index
359    pub fn index<R>(&self, range: R) -> Slice<'s>
360    where
361        R: Into<Ranges>,
362    {
363        Slice {
364            source: self.source,
365            range: Self::normalize_ranges((self.range.start)..(self.range.end), range.into()),
366        }
367    }
368
369    #[inline]
370    const fn normalize_ranges(base: Range<usize>, range: Ranges) -> Range<usize> {
371        match range {
372            // ..
373            Ranges::RangeFull(_) => (base.start)..(base.end),
374            // <num>..<num>
375            Ranges::Range(range) => Self::convert_range((base.start)..(base.end), range),
376            // <num>..=<num>
377            Ranges::RangeInclusive(range) => Self::convert_range(
378                (base.start)..(base.end),
379                (*range.start())..((*range.end()) + 1),
380            ),
381            // ..<num>
382            Ranges::RangeTo(range) => Self::convert_range_to((base.start)..(base.end), range),
383            // ..=<num>
384            Ranges::RangeToInclusive(range) => {
385                Self::convert_range_to((base.start)..(base.end), ..(range.end + 1))
386            }
387            // <num>..
388            Ranges::RangeFrom(range) => (base.start + range.start)..(base.end),
389        }
390    }
391
392    #[inline]
393    const fn convert_range(base: Range<usize>, other: Range<usize>) -> Range<usize> {
394        (base.start + other.start)..(if other.end == 0 {
395            base.end
396        } else {
397            base.start + other.end
398        })
399    }
400
401    #[inline]
402    const fn convert_range_to(base: Range<usize>, other: RangeTo<usize>) -> Range<usize> {
403        (base.start)..(if other.end == 0 {
404            base.end
405        } else {
406            base.start + other.end
407        })
408    }
409
410    //
411
412    /// Consume and 'run' the slice, returning the given range of the source [`str`].
413    pub fn slice(mut self) -> &'s str {
414        self.source = &self.source[self.range.start..self.range.end];
415
416        self.source
417    }
418}
419
420impl<'s> Clone for Slice<'s> {
421    fn clone(&self) -> Self {
422        Slice {
423            source: self.source,
424            range: self.range.start..self.range.end,
425        }
426    }
427}
428
429impl<'s> Display for Slice<'s> {
430    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
431        write!(f, "{}", &self.source[self.range.start..self.range.end])
432    }
433}
434
435// pub struct Split<'s> {
436//     source: &'s str,
437// }
438
439// impl<'s> Iterator for Split<'s> {
440//     type Item = Slice<'s>;
441
442//     fn next(&mut self) -> Option<Self::Item> {
443//         todo!()
444//     }
445// }
446
447// pub struct Lines<'s> {
448//     source: &'s str,
449// }
450
451// impl<'s> Iterator for Lines<'s> {
452//     type Item = Slice<'s>;
453
454//     fn next(&mut self) -> Option<Self::Item> {
455//         todo!()
456//     }
457// }
458
459pub enum Ranges {
460    Range(Range<usize>),
461    RangeFrom(RangeFrom<usize>),
462    RangeFull(RangeFull),
463    RangeInclusive(RangeInclusive<usize>),
464    RangeTo(RangeTo<usize>),
465    RangeToInclusive(RangeToInclusive<usize>),
466}
467
468impl From<Range<usize>> for Ranges {
469    fn from(range: Range<usize>) -> Ranges {
470        Ranges::Range(range)
471    }
472}
473
474impl From<RangeFrom<usize>> for Ranges {
475    fn from(range: RangeFrom<usize>) -> Ranges {
476        Ranges::RangeFrom(range)
477    }
478}
479
480impl From<RangeFull> for Ranges {
481    fn from(range: RangeFull) -> Ranges {
482        Ranges::RangeFull(range)
483    }
484}
485
486impl From<RangeInclusive<usize>> for Ranges {
487    fn from(range: RangeInclusive<usize>) -> Ranges {
488        Ranges::RangeInclusive(range)
489    }
490}
491
492impl From<RangeTo<usize>> for Ranges {
493    fn from(range: RangeTo<usize>) -> Ranges {
494        Ranges::RangeTo(range)
495    }
496}
497
498impl From<RangeToInclusive<usize>> for Ranges {
499    fn from(range: RangeToInclusive<usize>) -> Ranges {
500        Ranges::RangeToInclusive(range)
501    }
502}
503
504/// A horrible wrapper around the unstable [Pattern API](https://github.com/rust-lang/rust/issues/56345).
505pub enum Pattern<'r, F>
506where
507    F: FnMut(char) -> bool,
508{
509    Char(char),
510    CharArrayRef(&'r [char]),
511    Function(F),
512    Str(&'r str),
513    StrRef(&'r &'r str),
514    StringRef(&'r String),
515}
516
517impl From<char> for Pattern<'_, fn(char) -> bool> {
518    fn from(pat: char) -> Self {
519        Pattern::Char(pat)
520    }
521}
522
523impl<'r> From<&'r [char]> for Pattern<'r, fn(char) -> bool> {
524    fn from(pat: &'r [char]) -> Self {
525        Pattern::CharArrayRef(pat)
526    }
527}
528
529impl<F> From<F> for Pattern<'_, F>
530where
531    F: FnMut(char) -> bool,
532{
533    fn from(pat: F) -> Self {
534        Pattern::Function(pat)
535    }
536}
537
538impl<'r> From<&'r str> for Pattern<'r, fn(char) -> bool> {
539    fn from(pat: &'r str) -> Self {
540        Pattern::Str(pat)
541    }
542}
543
544impl<'r> From<&'r &'r str> for Pattern<'r, fn(char) -> bool> {
545    fn from(pat: &'r &'r str) -> Self {
546        Pattern::StrRef(pat)
547    }
548}
549
550impl<'r> From<&'r String> for Pattern<'r, fn(char) -> bool> {
551    fn from(pat: &'r String) -> Self {
552        Pattern::StringRef(pat)
553    }
554}
555
556#[cfg(test)]
557mod test {
558    use super::*;
559
560    #[test]
561    fn test_starts_with() {
562        assert_eq!(
563            true,
564            Slice::new("Hello World!").starts_with("Hello"),
565            "`starts_with` is true",
566        );
567    }
568
569    #[test]
570    fn test_trim() {
571        assert_eq!(
572            "Hello World!",
573            Slice::new("Hello World!").trim().slice(),
574            "`trim` without any whitespace",
575        );
576        assert_eq!(
577            "Hello World!",
578            Slice::new("   Hello World!   ").trim().slice(),
579            "`trim` with whitespace",
580        );
581    }
582
583    #[test]
584    fn test_trim_start() {
585        assert_eq!(
586            "Hello World!",
587            Slice::new("Hello World!").trim_start().slice(),
588            "`trim_start` without any whitespace",
589        );
590        assert_eq!(
591            "Hello World!",
592            Slice::new("   Hello World!").trim_start().slice(),
593            "`trim_start` with whitespace",
594        );
595    }
596
597    #[test]
598    fn test_trim_end() {
599        assert_eq!(
600            "Hello World!",
601            Slice::new("Hello World!").trim_end().slice(),
602            "`trim_end` without any whitespace",
603        );
604        assert_eq!(
605            "Hello World!",
606            Slice::new("Hello World!   ").trim_end().slice(),
607            "`trim_end` with whitespace",
608        );
609    }
610
611    #[test]
612    fn test_index() {
613        let slice = Slice::new("Hello World!")
614            .index(..11) // "Hello World"
615            .index(6..) // "World"
616            .index(..) // "World"
617            .index(1..=2) // "or"
618            .index(..);
619
620        assert_eq!("or", slice.slice());
621    }
622
623    #[test]
624    fn test_index_range_full() {
625        let slice = Slice::new("Hello World!")
626            .index(..)
627            .index(..)
628            .index(..)
629            .index(..)
630            .index(..);
631
632        assert_eq!("Hello World!", slice.slice());
633    }
634
635    #[test]
636    fn test_index_range() {
637        let slice = Slice::new("Hello World!")
638            .index(0..11) // "Hello World"
639            .index(1..11) // "ello World"
640            .index(2..8); // "lo Wor"
641
642        assert_eq!("lo Wor", slice.slice());
643    }
644
645    #[test]
646    fn test_index_range_inclusive() {
647        let slice = Slice::new("Hello World!")
648            .index(0..=11) // "Hello World!"
649            .index(1..=8) // "ello Wor
650            .index(0..=4); // "ello "
651
652        assert_eq!("ello ", slice.slice());
653    }
654}