Skip to main content

ruff_python_trivia/
cursor.rs

1use std::str::Chars;
2
3use ruff_text_size::{TextLen, TextSize};
4
5pub const EOF_CHAR: char = '\0';
6
7/// A [`Cursor`] over a string.
8///
9/// Based on [`rustc`'s `Cursor`](https://github.com/rust-lang/rust/blob/d1b7355d3d7b4ead564dbecb1d240fcc74fff21b/compiler/rustc_lexer/src/cursor.rs)
10#[derive(Debug, Clone)]
11pub struct Cursor<'a> {
12    chars: Chars<'a>,
13    source_length: TextSize,
14}
15
16impl<'a> Cursor<'a> {
17    pub fn new(source: &'a str) -> Self {
18        Self {
19            source_length: source.text_len(),
20            chars: source.chars(),
21        }
22    }
23
24    /// Retrieves the current offset of the cursor within the source code.
25    pub fn offset(&self) -> TextSize {
26        self.source_length - self.text_len()
27    }
28
29    /// Return the remaining input as a string slice.
30    pub fn chars(&self) -> Chars<'a> {
31        self.chars.clone()
32    }
33
34    /// Returns the remaining input as byte slice.
35    pub fn as_bytes(&self) -> &'a [u8] {
36        self.as_str().as_bytes()
37    }
38
39    /// Returns the remaining input as string slice.
40    pub fn as_str(&self) -> &'a str {
41        self.chars.as_str()
42    }
43
44    /// Peeks the next character from the input stream without consuming it.
45    /// Returns [`EOF_CHAR`] if the file is at the end of the file.
46    pub fn first(&self) -> char {
47        self.chars.clone().next().unwrap_or(EOF_CHAR)
48    }
49
50    /// Peeks the second character from the input stream without consuming it.
51    /// Returns [`EOF_CHAR`] if the position is past the end of the file.
52    pub fn second(&self) -> char {
53        let mut chars = self.chars.clone();
54        chars.next();
55        chars.next().unwrap_or(EOF_CHAR)
56    }
57
58    /// Peeks the next character from the input stream without consuming it.
59    /// Returns [`EOF_CHAR`] if the file is at the end of the file.
60    pub fn last(&self) -> char {
61        self.chars.clone().next_back().unwrap_or(EOF_CHAR)
62    }
63
64    pub fn text_len(&self) -> TextSize {
65        self.chars.as_str().text_len()
66    }
67
68    pub fn token_len(&self) -> TextSize {
69        self.source_length - self.text_len()
70    }
71
72    pub fn start_token(&mut self) {
73        self.source_length = self.text_len();
74    }
75
76    /// Returns `true` if the file is at the end of the file.
77    pub fn is_eof(&self) -> bool {
78        self.chars.as_str().is_empty()
79    }
80
81    /// Consumes the next character
82    pub fn bump(&mut self) -> Option<char> {
83        self.chars.next()
84    }
85
86    /// Consumes the next character from the back
87    pub fn bump_back(&mut self) -> Option<char> {
88        self.chars.next_back()
89    }
90
91    pub fn eat_char(&mut self, c: char) -> bool {
92        if self.first() == c {
93            self.bump();
94            true
95        } else {
96            false
97        }
98    }
99
100    /// Eats the next two characters if they are `c1` and `c2`. Does not
101    /// consume any input otherwise, even if the first character matches.
102    pub fn eat_char2(&mut self, c1: char, c2: char) -> bool {
103        let mut chars = self.chars.clone();
104        if chars.next() == Some(c1) && chars.next() == Some(c2) {
105            self.bump();
106            self.bump();
107            true
108        } else {
109            false
110        }
111    }
112
113    /// Eats the next three characters if they are `c1`, `c2` and `c3`
114    /// Does not consume any input otherwise, even if the first character matches.
115    pub fn eat_char3(&mut self, c1: char, c2: char, c3: char) -> bool {
116        let mut chars = self.chars.clone();
117        if chars.next() == Some(c1) && chars.next() == Some(c2) && chars.next() == Some(c3) {
118            self.bump();
119            self.bump();
120            self.bump();
121            true
122        } else {
123            false
124        }
125    }
126
127    pub fn eat_char_back(&mut self, c: char) -> bool {
128        if self.last() == c {
129            self.bump_back();
130            true
131        } else {
132            false
133        }
134    }
135
136    /// Eats the next character if `predicate` returns `true`.
137    pub fn eat_if(&mut self, mut predicate: impl FnMut(char) -> bool) -> bool {
138        if predicate(self.first()) && !self.is_eof() {
139            self.bump();
140            true
141        } else {
142            false
143        }
144    }
145
146    /// Eats symbols while predicate returns true or until the end of file is reached.
147    pub fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
148        // It was tried making optimized version of this for eg. line comments, but
149        // LLVM can inline all of this and compile it down to fast iteration over bytes.
150        while predicate(self.first()) && !self.is_eof() {
151            self.bump();
152        }
153    }
154
155    /// Eats symbols from the back while predicate returns true or until the beginning of file is reached.
156    pub fn eat_back_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
157        // It was tried making optimized version of this for eg. line comments, but
158        // LLVM can inline all of this and compile it down to fast iteration over bytes.
159        while predicate(self.last()) && !self.is_eof() {
160            self.bump_back();
161        }
162    }
163
164    /// Skips the next `count` bytes.
165    ///
166    /// ## Panics
167    ///  - If `count` is larger than the remaining bytes in the input stream.
168    ///  - If `count` indexes into a multi-byte character.
169    pub fn skip_bytes(&mut self, count: usize) {
170        self.chars = self.chars.as_str()[count..].chars();
171    }
172}