yz_string_utils/
lib.rs

1#![no_std]
2
3extern crate alloc;
4pub use alloc::borrow::Cow;
5
6mod shard;
7pub use shard::Shard;
8
9mod shwsplit;
10pub use shwsplit::{ShellwordSplitter, SyntaxError as SimpleSyntaxError};
11
12fn get_offset_of<T>(whole_buffer: &T, part: &T) -> usize
13where
14    T: AsRef<[u8]> + ?Sized,
15{
16    // NOTE: originally I wanted to use offset_from() here once it's stable,
17    // but according to https://github.com/rust-lang/rust/issues/41079#issuecomment-657163887
18    // this would be UB in cases where the code below isn't.
19    part.as_ref().as_ptr() as usize - whole_buffer.as_ref().as_ptr() as usize
20}
21
22/// Assuming that `post_part` is a true (in regards to memory allocations)
23/// subslice of `whole_buffer_start`, returns everything which comes before `post_part`.
24pub fn slice_between<'a>(whole_buffer_start: &'a [u8], post_part: &'a [u8]) -> &'a [u8] {
25    debug_assert!(post_part.len() < whole_buffer_start.len());
26    &whole_buffer_start[..get_offset_of(whole_buffer_start, post_part)]
27}
28
29/// Counts the number of bytes that got accepted by `f`.
30pub fn count_str_bytes<F>(inp: &str, mut f: F) -> usize
31where
32    F: FnMut(char) -> bool,
33{
34    inp.chars()
35        .take_while(move |&i| f(i))
36        .map(|i| i.len_utf8())
37        .sum()
38}
39
40pub trait SplitAtWhile {
41    type Item;
42
43    /// Splits a slice at the first point after which `f` returns false.
44    /// Usually used to segment input according to character categories.
45    ///
46    /// e.g. 1. part while `f(x) == true`, then 2. part
47    fn split_at_while<F>(&self, f: F) -> (&Self, &Self)
48    where
49        F: FnMut(&Self::Item) -> bool;
50}
51
52impl<T> SplitAtWhile for [T] {
53    type Item = T;
54
55    fn split_at_while<F>(&self, mut f: F) -> (&Self, &Self)
56    where
57        F: FnMut(&T) -> bool,
58    {
59        self.split_at(self.iter().take_while(move |&i| f(i)).count())
60    }
61}
62
63impl SplitAtWhile for str {
64    type Item = char;
65
66    fn split_at_while<F>(&self, f: F) -> (&Self, &Self)
67    where
68        F: FnMut(&char) -> bool,
69    {
70        self.split_at(self.chars().take_while(f).map(|i| i.len_utf8()).sum())
71    }
72}
73
74#[derive(Copy, Clone)]
75pub struct StrLexerBase<'a> {
76    pub inp: &'a str,
77    pub offset: usize,
78}
79
80impl<'a> StrLexerBase<'a> {
81    #[inline]
82    pub fn consume(&mut self, l: usize) -> &'a str {
83        let (a, b) = self.inp.split_at(l);
84        self.inp = b;
85        self.offset += l;
86        a
87    }
88
89    pub fn consume_select<F>(&mut self, f: F) -> &'a str
90    where
91        F: FnMut(char) -> bool,
92    {
93        self.consume(count_str_bytes(self.inp, f))
94    }
95
96    /// # Panics
97    /// This panics if the string does not start with a character in XID
98    #[cfg(feature = "consume-ident")]
99    pub fn consume_ident(&mut self) -> alloc::string::String {
100        use alloc::string::ToString;
101        use unicode_normalization::UnicodeNormalization;
102        let s = self
103            .consume_select(unicode_ident::is_xid_continue)
104            .nfkc()
105            .to_string();
106        assert!(!s.is_empty());
107        s
108    }
109
110    #[cfg(feature = "consume-ident")]
111    pub fn try_consume_ident(&mut self) -> Option<alloc::string::String> {
112        if self.inp.chars().next().map(unicode_ident::is_xid_start) == Some(true) {
113            Some(self.consume_ident())
114        } else {
115            None
116        }
117    }
118}
119
120#[cfg(test)]
121mod tests {
122    use super::*;
123
124    #[test]
125    #[cfg(feature = "consume-ident")]
126    fn test_consume_ident() {
127        use alloc::string::ToString;
128        let mut slb = StrLexerBase {
129            inp: "hello ",
130            offset: 0,
131        };
132        assert_eq!(slb.try_consume_ident(), Some("hello".to_string()));
133        assert_eq!(slb.offset, 5);
134
135        let mut slb = StrLexerBase {
136            inp: "ö ",
137            offset: 0,
138        };
139        assert_eq!(slb.try_consume_ident(), Some("ö".to_string()));
140        assert_eq!(slb.offset, 2);
141
142        let mut slb = StrLexerBase {
143            inp: ".ö ",
144            offset: 0,
145        };
146        assert_eq!(slb.try_consume_ident(), None);
147        assert_eq!(slb.offset, 0);
148    }
149}