1use text_scanner::{ext::RustScannerExt, Scanner};
2
3use crate::{impl_lexer_from_scanner, ScanToken, ScannerExt, TokenSpan};
4
5#[rustfmt::skip]
6const KEYWORDS: [&str; 53] = [
7 "as", "break", "const", "continue", "crate", "else", "enum", "extern",
8 "false", "fn", "for", "if", "impl", "in", "let", "loop", "macro_rules",
9 "match", "mod", "move", "mut", "pub", "ref", "return", "self", "Self",
10 "static", "struct", "super", "trait", "true", "type", "union", "unsafe",
11 "use", "where", "while", "async", "await", "dyn", "abstract", "become",
12 "box", "do", "final", "macro", "override", "priv", "typeof", "unsized",
13 "virtual", "yield", "try",
14];
15
16#[derive(PartialEq, Eq, Clone, Copy, Debug)]
17pub enum RustToken {
18 Space,
19 LineComment,
20 BlockComment,
21 Ident,
22 Keyword,
23 Lifetime,
24 Char,
25 String,
26 RawString,
27 Int,
28 Float,
29 Delim,
30 Punct,
31 Unknown,
37}
38
39impl ScanToken for RustToken {
40 fn scan_token<'text>(scanner: &mut Scanner<'text>) -> Option<(Self, TokenSpan<'text>)> {
41 let (r, _s) = scanner.skip_whitespace();
42 if !r.is_empty() {
43 return Some((Self::Space, scanner.span(r)));
44 }
45
46 if let Ok((r, _s)) = scanner.scan_rust_line_comment() {
47 return Some((Self::LineComment, scanner.span(r)));
48 } else if let Ok((r, _s)) = scanner.scan_rust_block_comment() {
49 return Some((Self::BlockComment, scanner.span(r)));
50 }
51
52 if let Ok((r, _s)) = scanner
53 .scan_rust_raw_string()
54 .or_else(|_| scanner.scan_rust_string())
55 {
56 return Some((Self::String, scanner.span(r)));
57 }
58
59 if let Ok((r, ident)) = scanner
60 .scan_rust_raw_identifier()
61 .or_else(|_| scanner.scan_rust_identifier())
62 {
63 let tok = if KEYWORDS.contains(&ident) {
64 Self::Keyword
65 } else {
66 Self::Ident
67 };
68 return Some((tok, scanner.span(r)));
69 }
70
71 if let Ok((_r, '\'')) = scanner.peek() {
72 if let Ok((r, _s)) = scanner.scan_rust_char() {
73 return Some((Self::Char, scanner.span(r)));
74 }
75
76 let res = scanner.scan_with(|scanner| {
77 scanner.accept_char('\'')?;
78 scanner.scan_rust_identifier()?;
79 Ok(())
80 });
81 if let Ok((r, _s)) = res {
82 return Some((Self::Lifetime, scanner.span(r)));
83 }
84
85 let (r, _c) = scanner.next().ok()?;
86 return Some((Self::Unknown, scanner.span(r)));
87 }
88
89 if let Ok((r, _s)) = scanner.scan_rust_float() {
90 return Some((Self::Float, scanner.span(r)));
91 } else if let Ok((r, _s)) = scanner
92 .scan_rust_int_hex()
93 .or_else(|_| scanner.scan_rust_int_oct())
94 .or_else(|_| scanner.scan_rust_int_bin())
95 .or_else(|_| scanner.scan_rust_int_dec())
96 {
97 return Some((Self::Int, scanner.span(r)));
98 }
99
100 if let Ok((r, _c)) = scanner.accept_char_any(&['{', '}', '[', ']', '(', ')']) {
101 return Some((Self::Delim, scanner.span(r)));
102 }
103
104 let res = scanner.scan_with(|scanner| {
105 let (r, c) = scanner.next()?;
106 match c {
107 '=' => {
108 _ = scanner.accept_char_any(&['=', '>']);
109 }
110 '-' => {
111 _ = scanner.accept_char_any(&['=', '>']);
112 }
113 '+' | '*' | '/' | '%' | '^' | '!' => {
114 _ = scanner.accept_char('=');
115 }
116 '&' => {
117 _ = scanner.accept_char_any(&['&', '=']);
118 }
119 '|' => {
120 _ = scanner.accept_char_any(&['|', '=']);
121 }
122 '<' => {
123 _ = scanner.accept_char('<');
124 _ = scanner.accept_char('=');
125 }
126 '>' => {
127 _ = scanner.accept_char('>');
128 _ = scanner.accept_char('=');
129 }
130 '.' => {
131 if scanner.accept_char('.').is_ok() {
132 _ = scanner.accept_char_any(&['.', '=']);
133 }
134 }
135 ':' => {
136 _ = scanner.accept_char(':');
137 }
138 '@' | '_' | ',' | ';' | '#' | '$' | '?' | '~' => {}
139 _ => return Err(scanner.ranged_text(r)),
140 }
141 Ok(())
142 });
143 if let Ok((r, _s)) = res {
144 return Some((Self::Punct, scanner.span(r)));
145 }
146
147 let (r, _c) = scanner.next().ok()?;
148 Some((Self::Unknown, scanner.span(r)))
149 }
150}
151
152#[derive(Clone, Debug)]
158pub struct RustLexer<'text> {
159 scanner: Scanner<'text>,
160}
161
162impl<'text> RustLexer<'text> {
163 #[inline]
164 pub fn new(text: &'text str) -> Self {
165 Self {
166 scanner: Scanner::new(text),
167 }
168 }
169}
170
171impl_lexer_from_scanner!('text, RustLexer<'text>, RustToken, scanner);
172
173#[cfg(test)]
174mod tests {
175 use super::*;
176
177 #[test]
178 fn test_rust_lexer_spans() {
179 let input = include_str!("../../../text-scanner/src/ext/rust.rs");
180 let mut output = String::new();
181
182 let lexer = RustLexer::new(input);
183 for (_tok, span) in lexer {
184 output.push_str(span.as_str());
185 }
186
187 assert_eq!(input, output);
188 }
189}