Skip to main content

rustpython_ruff_python_ast/
identifier.rs

1//! Extract [`TextRange`] information from AST nodes.
2//!
3//! For example, given:
4//! ```python
5//! try:
6//!     ...
7//! except Exception as e:
8//!     ...
9//! ```
10//!
11//! This module can be used to identify the [`TextRange`] of the `except` token.
12
13use crate::{self as ast, Alias, ExceptHandler, Parameter, ParameterWithDefault, Stmt};
14use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
15
16use ruff_python_trivia::{Cursor, is_python_whitespace};
17
18pub trait Identifier {
19    /// Return the [`TextRange`] of the identifier in the given AST node.
20    fn identifier(&self) -> TextRange;
21}
22
23impl Identifier for ast::StmtFunctionDef {
24    /// Return the [`TextRange`] of the identifier in the given function definition.
25    ///
26    /// For example, return the range of `f` in:
27    /// ```python
28    /// def f():
29    ///     ...
30    /// ```
31    fn identifier(&self) -> TextRange {
32        self.name.range()
33    }
34}
35
36impl Identifier for ast::StmtClassDef {
37    /// Return the [`TextRange`] of the identifier in the given class definition.
38    ///
39    /// For example, return the range of `C` in:
40    /// ```python
41    /// class C():
42    ///     ...
43    /// ```
44    fn identifier(&self) -> TextRange {
45        self.name.range()
46    }
47}
48
49impl Identifier for Stmt {
50    /// Return the [`TextRange`] of the identifier in the given statement.
51    ///
52    /// For example, return the range of `f` in:
53    /// ```python
54    /// def f():
55    ///     ...
56    /// ```
57    fn identifier(&self) -> TextRange {
58        match self {
59            Stmt::ClassDef(class) => class.identifier(),
60            Stmt::FunctionDef(function) => function.identifier(),
61            _ => self.range(),
62        }
63    }
64}
65
66impl Identifier for Parameter {
67    /// Return the [`TextRange`] for the identifier defining an [`Parameter`].
68    ///
69    /// For example, return the range of `x` in:
70    /// ```python
71    /// def f(x: int):
72    ///     ...
73    /// ```
74    fn identifier(&self) -> TextRange {
75        self.name.range()
76    }
77}
78
79impl Identifier for ParameterWithDefault {
80    /// Return the [`TextRange`] for the identifier defining an [`ParameterWithDefault`].
81    ///
82    /// For example, return the range of `x` in:
83    /// ```python
84    /// def f(x: int = 0):
85    ///     ...
86    /// ```
87    fn identifier(&self) -> TextRange {
88        self.parameter.identifier()
89    }
90}
91
92impl Identifier for Alias {
93    /// Return the [`TextRange`] for the identifier defining an [`Alias`].
94    ///
95    /// For example, return the range of `x` in:
96    /// ```python
97    /// from foo import bar as x
98    /// ```
99    fn identifier(&self) -> TextRange {
100        self.asname
101            .as_ref()
102            .map_or_else(|| self.name.range(), Ranged::range)
103    }
104}
105
106/// Return the [`TextRange`] of the `except` token in an [`ExceptHandler`].
107pub fn except(handler: &ExceptHandler, source: &str) -> TextRange {
108    IdentifierTokenizer::new(source, handler.range())
109        .next()
110        .expect("Failed to find `except` token in `ExceptHandler`")
111}
112
113/// Return the [`TextRange`] of the `else` token in a `For` or `While` statement.
114pub fn else_(stmt: &Stmt, source: &str) -> Option<TextRange> {
115    let (Stmt::For(ast::StmtFor { body, orelse, .. })
116    | Stmt::While(ast::StmtWhile { body, orelse, .. })) = stmt
117    else {
118        return None;
119    };
120
121    if orelse.is_empty() {
122        return None;
123    }
124
125    IdentifierTokenizer::starts_at(
126        body.last().expect("Expected body to be non-empty").end(),
127        source,
128    )
129    .next()
130}
131
132/// Return `true` if the given character starts a valid Python identifier.
133///
134/// Python identifiers must start with an alphabetic character or an underscore.
135fn is_python_identifier_start(c: char) -> bool {
136    c.is_alphabetic() || c == '_'
137}
138
139/// Return `true` if the given character is a valid Python identifier continuation character.
140///
141/// Python identifiers can contain alphanumeric characters and underscores, but cannot start with a
142/// number.
143fn is_python_identifier_continue(c: char) -> bool {
144    c.is_alphanumeric() || c == '_'
145}
146
147/// Simple zero allocation tokenizer for Python identifiers.
148///
149/// The tokenizer must operate over a range that can only contain identifiers, keywords, and
150/// comments (along with whitespace and continuation characters). It does not support other tokens,
151/// like operators, literals, or delimiters. It also does not differentiate between keywords and
152/// identifiers, treating every valid token as an "identifier".
153///
154/// This is useful for cases like, e.g., identifying the alias name in an aliased import (`bar` in
155/// `import foo as bar`), where we're guaranteed to only have identifiers and keywords in the
156/// relevant range.
157pub(crate) struct IdentifierTokenizer<'a> {
158    cursor: Cursor<'a>,
159    offset: TextSize,
160}
161
162impl<'a> IdentifierTokenizer<'a> {
163    pub(crate) fn new(source: &'a str, range: TextRange) -> Self {
164        Self {
165            cursor: Cursor::new(&source[range]),
166            offset: range.start(),
167        }
168    }
169
170    pub(crate) fn starts_at(offset: TextSize, source: &'a str) -> Self {
171        let range = TextRange::new(offset, source.text_len());
172        Self::new(source, range)
173    }
174
175    fn next_token(&mut self) -> Option<TextRange> {
176        while let Some(c) = {
177            self.offset += self.cursor.token_len();
178            self.cursor.start_token();
179            self.cursor.bump()
180        } {
181            match c {
182                c if is_python_identifier_start(c) => {
183                    self.cursor.eat_while(is_python_identifier_continue);
184                    return Some(TextRange::at(self.offset, self.cursor.token_len()));
185                }
186
187                c if is_python_whitespace(c) => {
188                    self.cursor.eat_while(is_python_whitespace);
189                }
190
191                '#' => {
192                    self.cursor.eat_while(|c| !matches!(c, '\n' | '\r'));
193                }
194
195                '\r' => {
196                    self.cursor.eat_char('\n');
197                }
198
199                '\n' => {
200                    // Nothing to do.
201                }
202
203                '\\' => {
204                    // Nothing to do.
205                }
206
207                _ => {
208                    // Nothing to do.
209                }
210            }
211        }
212
213        None
214    }
215}
216
217impl Iterator for IdentifierTokenizer<'_> {
218    type Item = TextRange;
219
220    fn next(&mut self) -> Option<Self::Item> {
221        self.next_token()
222    }
223}
224
225#[cfg(test)]
226mod tests {
227    use super::IdentifierTokenizer;
228    use ruff_text_size::{TextLen, TextRange, TextSize};
229
230    #[test]
231    fn extract_global_names() {
232        let contents = r"global X,Y, Z".trim();
233
234        let mut names = IdentifierTokenizer::new(
235            contents,
236            TextRange::new(TextSize::new(0), contents.text_len()),
237        );
238
239        let range = names.next_token().unwrap();
240        assert_eq!(&contents[range], "global");
241        assert_eq!(range, TextRange::new(TextSize::from(0), TextSize::from(6)));
242
243        let range = names.next_token().unwrap();
244        assert_eq!(&contents[range], "X");
245        assert_eq!(range, TextRange::new(TextSize::from(7), TextSize::from(8)));
246
247        let range = names.next_token().unwrap();
248        assert_eq!(&contents[range], "Y");
249        assert_eq!(range, TextRange::new(TextSize::from(9), TextSize::from(10)));
250
251        let range = names.next_token().unwrap();
252        assert_eq!(&contents[range], "Z");
253        assert_eq!(
254            range,
255            TextRange::new(TextSize::from(12), TextSize::from(13))
256        );
257    }
258}