use crate::{self as ast, Alias, ExceptHandler, Parameter, ParameterWithDefault, Stmt};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use ruff_python_trivia::{Cursor, is_python_whitespace};
pub trait Identifier {
fn identifier(&self) -> TextRange;
}
impl Identifier for ast::StmtFunctionDef {
fn identifier(&self) -> TextRange {
self.name.range()
}
}
impl Identifier for ast::StmtClassDef {
fn identifier(&self) -> TextRange {
self.name.range()
}
}
impl Identifier for Stmt {
fn identifier(&self) -> TextRange {
match self {
Stmt::ClassDef(class) => class.identifier(),
Stmt::FunctionDef(function) => function.identifier(),
_ => self.range(),
}
}
}
impl Identifier for Parameter {
fn identifier(&self) -> TextRange {
self.name.range()
}
}
impl Identifier for ParameterWithDefault {
fn identifier(&self) -> TextRange {
self.parameter.identifier()
}
}
impl Identifier for Alias {
fn identifier(&self) -> TextRange {
self.asname
.as_ref()
.map_or_else(|| self.name.range(), Ranged::range)
}
}
pub fn except(handler: &ExceptHandler, source: &str) -> TextRange {
IdentifierTokenizer::new(source, handler.range())
.next()
.expect("Failed to find `except` token in `ExceptHandler`")
}
pub fn else_(stmt: &Stmt, source: &str) -> Option<TextRange> {
let (Stmt::For(ast::StmtFor { body, orelse, .. })
| Stmt::While(ast::StmtWhile { body, orelse, .. })) = stmt
else {
return None;
};
if orelse.is_empty() {
return None;
}
IdentifierTokenizer::starts_at(
body.last().expect("Expected body to be non-empty").end(),
source,
)
.next()
}
fn is_python_identifier_start(c: char) -> bool {
c.is_alphabetic() || c == '_'
}
fn is_python_identifier_continue(c: char) -> bool {
c.is_alphanumeric() || c == '_'
}
pub(crate) struct IdentifierTokenizer<'a> {
cursor: Cursor<'a>,
offset: TextSize,
}
impl<'a> IdentifierTokenizer<'a> {
pub(crate) fn new(source: &'a str, range: TextRange) -> Self {
Self {
cursor: Cursor::new(&source[range]),
offset: range.start(),
}
}
pub(crate) fn starts_at(offset: TextSize, source: &'a str) -> Self {
let range = TextRange::new(offset, source.text_len());
Self::new(source, range)
}
fn next_token(&mut self) -> Option<TextRange> {
while let Some(c) = {
self.offset += self.cursor.token_len();
self.cursor.start_token();
self.cursor.bump()
} {
match c {
c if is_python_identifier_start(c) => {
self.cursor.eat_while(is_python_identifier_continue);
return Some(TextRange::at(self.offset, self.cursor.token_len()));
}
c if is_python_whitespace(c) => {
self.cursor.eat_while(is_python_whitespace);
}
'#' => {
self.cursor.eat_while(|c| !matches!(c, '\n' | '\r'));
}
'\r' => {
self.cursor.eat_char('\n');
}
'\n' => {
}
'\\' => {
}
_ => {
}
}
}
None
}
}
impl Iterator for IdentifierTokenizer<'_> {
type Item = TextRange;
fn next(&mut self) -> Option<Self::Item> {
self.next_token()
}
}
#[cfg(test)]
mod tests {
use super::IdentifierTokenizer;
use ruff_text_size::{TextLen, TextRange, TextSize};
#[test]
fn extract_global_names() {
let contents = r"global X,Y, Z".trim();
let mut names = IdentifierTokenizer::new(
contents,
TextRange::new(TextSize::new(0), contents.text_len()),
);
let range = names.next_token().unwrap();
assert_eq!(&contents[range], "global");
assert_eq!(range, TextRange::new(TextSize::from(0), TextSize::from(6)));
let range = names.next_token().unwrap();
assert_eq!(&contents[range], "X");
assert_eq!(range, TextRange::new(TextSize::from(7), TextSize::from(8)));
let range = names.next_token().unwrap();
assert_eq!(&contents[range], "Y");
assert_eq!(range, TextRange::new(TextSize::from(9), TextSize::from(10)));
let range = names.next_token().unwrap();
assert_eq!(&contents[range], "Z");
assert_eq!(
range,
TextRange::new(TextSize::from(12), TextSize::from(13))
);
}
}