any_lexer/lexers/
python.rs1use text_scanner::ext::{PythonScannerExt, PythonStrExt};
2use text_scanner::Scanner;
3
4use crate::{impl_lexer_from_scanner, ScanToken, ScannerExt, TokenSpan};
5
6#[derive(PartialEq, Eq, Clone, Copy, Debug)]
7pub enum PythonToken {
8 Space,
9 LineComment,
10 ExplicitLineJoiner,
11 Ident,
12 Keyword,
13 SoftKeyword,
14 ShortString,
15 LongString,
16 ShortBytes,
17 LongBytes,
18 Int,
19 Float,
20 Delim,
21 Punct,
22 Unknown,
28}
29
30impl ScanToken for PythonToken {
31 fn scan_token<'text>(scanner: &mut Scanner<'text>) -> Option<(Self, TokenSpan<'text>)> {
32 let (r, _s) = scanner.skip_whitespace();
33 if !r.is_empty() {
34 return Some((Self::Space, scanner.span(r)));
35 }
36
37 if let Ok((r, _s)) = scanner.scan_python_line_comment() {
38 return Some((Self::LineComment, scanner.span(r)));
39 }
40
41 if let Ok((r, ident)) = scanner.scan_python_identifier() {
42 let tok = if ident.is_python_keyword() {
43 Self::Keyword
44 } else if ident.is_python_soft_keyword() {
45 Self::SoftKeyword
46 } else {
47 Self::Ident
48 };
49 return Some((tok, scanner.span(r)));
50 }
51
52 if let Ok((r, _s)) = scanner.scan_python_long_string() {
53 return Some((Self::LongString, scanner.span(r)));
54 } else if let Ok((r, _s)) = scanner.scan_python_short_string() {
55 return Some((Self::ShortString, scanner.span(r)));
56 } else if let Ok((r, _s)) = scanner.scan_python_long_bytes() {
57 return Some((Self::LongBytes, scanner.span(r)));
58 } else if let Ok((r, _s)) = scanner.scan_python_short_bytes() {
59 return Some((Self::ShortBytes, scanner.span(r)));
60 }
61
62 if let Ok((r, _s)) = scanner.scan_python_float() {
63 return Some((Self::Float, scanner.span(r)));
64 } else if let Ok((r, _s)) = scanner
65 .scan_python_int_hex()
66 .or_else(|_| scanner.scan_python_int_oct())
67 .or_else(|_| scanner.scan_python_int_dec())
68 {
69 return Some((Self::Int, scanner.span(r)));
70 }
71
72 if let Ok((r, _c)) = scanner.scan_python_delimiter() {
73 return Some((Self::Delim, scanner.span(r)));
74 } else if let Ok((r, _c)) = scanner.scan_python_operator() {
75 return Some((Self::Punct, scanner.span(r)));
76 }
77
78 if let Ok((r, _s)) = scanner.scan_python_explicit_line_joiner() {
79 return Some((Self::ExplicitLineJoiner, scanner.span(r)));
80 }
81
82 let (r, _c) = scanner.next().ok()?;
83 Some((Self::Unknown, scanner.span(r)))
84 }
85}
86
87#[derive(Clone, Debug)]
93pub struct PythonLexer<'text> {
94 scanner: Scanner<'text>,
95}
96
97impl<'text> PythonLexer<'text> {
98 #[inline]
99 pub fn new(text: &'text str) -> Self {
100 Self {
101 scanner: Scanner::new(text),
102 }
103 }
104}
105
106impl_lexer_from_scanner!('text, PythonLexer<'text>, PythonToken, scanner);
107
108#[cfg(test)]
109mod tests {
110 use super::*;
111
112 #[test]
113 fn test_python_lexer_spans() {
114 let input = include_str!("../../../text-scanner/src/ext/rust.rs");
117 let mut output = String::new();
118
119 let lexer = PythonLexer::new(input);
120 for (_tok, span) in lexer {
121 output.push_str(span.as_str());
122 }
123
124 assert_eq!(input, output);
125 }
126}