simple_cursor/
lib.rs

1//! A super simple `#[no_std]`-compatible character cursor implementation geared towards
2//! lexers/tokenizers. The implementation is inspired by the one used in `rustc` and should be
3//! performant enough to handle pretty much anything you could throw at it.
4//!
5//! # Basic use
6//! The following examples showcases the basic features of `simple_cursor`. Please refer to
7//! [`Cursor`] for more info.
8//!
9//! ```rust
10//! use simple_cursor::Cursor;
11//!
12//! // Create the input string and the cursor.
13//! let input = "123 foobar竜<!>";
14//! let mut cursor = Cursor::new(input);
15//!
16//! // "123"
17//! let number_start = cursor.byte_pos();
18//! cursor.skip_while(|c| c.is_ascii_digit());
19//! let number_end = cursor.byte_pos();
20//!
21//! // Some(' ')
22//! let whitespace = cursor.bump();
23//!
24//! // "foobar"
25//! let ident_start = cursor.byte_pos();
26//! cursor.skip_while(|c| c.is_ascii_alphabetic());
27//! let ident_end = cursor.byte_pos();
28//!
29//! // "竜<!>"
30//! let rest_start = ident_end;
31//! let rest_end = input.len();
32//!
33//! assert_eq!("123", &input[number_start..number_end]);
34//! assert_eq!(Some(' '), whitespace);
35//! assert_eq!("foobar", &input[ident_start..ident_end]);
36//! assert_eq!("竜<!>", &input[rest_start..rest_end]);
37//! ```
38
39#![no_std]
40
41use core::str::Chars;
42
43/// Abstraction over a character iterator.
44pub struct Cursor<'a> {
45    /// Raw charactor iterator.
46    chars: Chars<'a>,
47    /// Current byte position of the cursor.
48    byte_pos: usize,
49}
50
51impl<'a> Cursor<'a> {
52    /// Creates a new [`Cursor`] from an input string.
53    pub fn new(input: &'a str) -> Self {
54        Self {
55            chars: input.chars(),
56            byte_pos: 0,
57        }
58    }
59
60    /// Immutable reference to the internal character iterator.
61    pub fn chars(&self) -> &Chars<'a> {
62        &self.chars
63    }
64
65    /// The current byte position of the cursor into the input string.
66    pub fn byte_pos(&self) -> usize {
67        self.byte_pos
68    }
69
70    /// Peeks the next character without advancing the cursor.
71    pub fn peek(&self) -> Option<char> {
72        // Cloning a [`Chars`] iterator is cheap.
73        self.chars.clone().next()
74    }
75
76    /// Peeks the next two characters without advancing the cursor.
77    pub fn peek_two(&self) -> (Option<char>, Option<char>) {
78        // Cloning a [`Chars`] iterator is cheap.
79        let mut cloned = self.chars.clone();
80        (cloned.next(), cloned.next())
81    }
82
83    /// Bumps the cursor and returns the next character.
84    pub fn bump(&mut self) -> Option<char> {
85        // Bump the character iterator
86        let c = self.chars.next();
87        // Bump the byte position
88        self.byte_pos += c.map(char::len_utf8).unwrap_or_default();
89        c
90    }
91
92    /// Bumps the cursor and returns the next two characters.
93    pub fn bump_two(&mut self) -> (Option<char>, Option<char>) {
94        // Bump the character iterator
95        let (c1, c2) = (self.chars.next(), self.chars.next());
96
97        // Bump the byte position
98        self.byte_pos += c1.map(char::len_utf8).unwrap_or_default();
99        self.byte_pos += c2.map(char::len_utf8).unwrap_or_default();
100
101        (c1, c2)
102    }
103
104    /// Bumps the cursor while `predicate` is true for the current character.
105    ///
106    /// Notably, this method will **not** consume the first non-matching character. This is in
107    /// contrast with methods like [`Iterator::take_while`]. This behavior is achieved by peeking
108    /// the next character to see if it matches before consuming it.
109    pub fn skip_while(&mut self, predicate: fn(char) -> bool) {
110        // Record the remaining input bytes before skipping
111        let start_length = self.chars.as_str().len();
112        // Skip while predicate matches (without taking the first non-matching)
113        while matches!(self.peek(), Some(c) if predicate(c)) {
114            // Notice how this doesn't call [`Cursor::next`] directly.
115            // This way we can batch the byte_pos update.
116            self.chars.next();
117        }
118        // Record the remaining input bytes after skipping
119        let final_length = self.chars.as_str().len();
120        // Bump the byte_pos by how many bytes were skipped
121        self.byte_pos += start_length - final_length;
122    }
123}
124
125#[cfg(test)]
126mod tests {
127    use super::Cursor;
128
129    #[test]
130    fn peek() {
131        let input = "s";
132        let cursor = Cursor::new(input);
133        assert_eq!(cursor.peek(), Some('s'));
134        assert_eq!(cursor.byte_pos, 0);
135        assert_eq!(cursor.chars.as_str(), input);
136
137        let input = "";
138        let cursor = Cursor::new(input);
139        assert_eq!(cursor.peek(), None);
140        assert_eq!(cursor.byte_pos, 0);
141        assert_eq!(cursor.chars.as_str(), input);
142    }
143
144    #[test]
145    fn peek_two() {
146        let input = "ab";
147        let cursor = Cursor::new(input);
148        assert_eq!(cursor.peek_two(), (Some('a'), Some('b')));
149        assert_eq!(cursor.byte_pos, 0);
150        assert_eq!(cursor.chars.as_str(), input);
151
152        let input = "a";
153        let cursor = Cursor::new(input);
154        assert_eq!(cursor.peek_two(), (Some('a'), None));
155        assert_eq!(cursor.byte_pos, 0);
156        assert_eq!(cursor.chars.as_str(), input);
157
158        let input = "";
159        let cursor = Cursor::new(input);
160        assert_eq!(cursor.peek_two(), (None, None));
161        assert_eq!(cursor.byte_pos, 0);
162        assert_eq!(cursor.chars.as_str(), input);
163    }
164
165    #[test]
166    fn bump() {
167        let input = "a";
168        let mut cursor = Cursor::new(input);
169        assert_eq!(cursor.bump(), Some('a'));
170        assert_eq!(cursor.byte_pos, 1);
171        assert_eq!(cursor.chars.as_str(), "");
172    }
173
174    #[test]
175    fn bump_two() {
176        let input = "abc";
177        let mut cursor = Cursor::new(input);
178        assert_eq!(cursor.bump_two(), (Some('a'), Some('b')));
179        assert_eq!(cursor.byte_pos, 2);
180        assert_eq!(cursor.chars.as_str(), "c");
181
182        assert_eq!(cursor.bump_two(), (Some('c'), None));
183        assert_eq!(cursor.byte_pos, 3);
184        assert_eq!(cursor.chars.as_str(), "");
185
186        assert_eq!(cursor.bump_two(), (None, None));
187        assert_eq!(cursor.byte_pos, 3);
188        assert_eq!(cursor.chars.as_str(), "");
189    }
190
191    #[test]
192    fn skip_while() {
193        let input = "aaaab";
194        let mut cursor = Cursor::new(input);
195        cursor.skip_while(|c| c == 'a');
196        assert_eq!(cursor.byte_pos, 4);
197        assert_eq!(cursor.chars.as_str(), "b");
198    }
199}