1use text_scanner::{ext::CScannerExt, Scanner};
2
3use crate::{impl_lexer_from_scanner, ScanToken, ScannerExt, TokenSpan};
4
5#[rustfmt::skip]
7const KEYWORDS: [&str; 46] = [
8 "alignas", "alignof", "auto", "break", "case", "char", "const", "continue",
9 "default", "do", "double", "else", "enum", "extern", "float", "for", "goto",
10 "if", "inline", "int", "long", "register", "restrict", "return", "short",
11 "signed", "sizeof", "static", "struct", "switch", "typedef", "union",
12 "unsigned", "void", "volatile", "while", "_Alignas", "_Alignof", "_Atomic",
13 "_Bool", "_Complex", "_Generic", "_Imaginary", "_Noreturn", "_Static_assert",
14 "_Thread_local",
15];
16
17#[rustfmt::skip]
19const KEYWORDS_MS: [&str; 21] = [
20 "__asm", "__based", "__cdecl", "__declspec", "__except", "__fastcall", "__finally",
21 "__inline", "__int16", "__int32", "__int64", "__int8", "__leave", "__restrict",
22 "__stdcall", "__try", "dllexport", "dllimport", "naked", "static_assert", "thread",
23];
24
25#[derive(PartialEq, Eq, Clone, Copy, Debug)]
26pub enum CToken {
27 Space,
28 LineComment,
29 BlockComment,
30 Ident,
31 Keyword,
32 Char,
33 String,
34 Int,
35 Float,
36 Delim,
37 Punct,
38 Unknown,
44}
45
46impl ScanToken for CToken {
47 fn scan_token<'text>(scanner: &mut Scanner<'text>) -> Option<(Self, TokenSpan<'text>)> {
48 let (r, _s) = scanner.skip_whitespace();
49 if !r.is_empty() {
50 return Some((Self::Space, scanner.span(r)));
51 }
52
53 if let Ok((r, _s)) = scanner.scan_c_line_comment() {
54 return Some((Self::LineComment, scanner.span(r)));
55 } else if let Ok((r, _s)) = scanner.scan_c_block_comment() {
56 return Some((Self::BlockComment, scanner.span(r)));
57 }
58
59 if let Ok((r, ident)) = scanner.scan_c_identifier() {
60 let tok = if KEYWORDS.contains(&ident) || KEYWORDS_MS.contains(&ident) {
61 Self::Keyword
62 } else {
63 Self::Ident
64 };
65 return Some((tok, scanner.span(r)));
66 }
67
68 if let Ok((r, _s)) = scanner.scan_c_char() {
69 return Some((Self::Char, scanner.span(r)));
70 } else if let Ok((r, _s)) = scanner.scan_c_string() {
71 return Some((Self::String, scanner.span(r)));
72 }
73
74 if let Ok((r, _s)) = scanner.scan_c_float() {
75 return Some((Self::Float, scanner.span(r)));
76 } else if let Ok((r, _s)) = scanner
77 .scan_c_int_hex()
78 .or_else(|_| scanner.scan_c_int_oct())
79 .or_else(|_| scanner.scan_c_int_dec())
80 {
81 return Some((Self::Int, scanner.span(r)));
82 }
83
84 if let Ok((r, _c)) = scanner.accept_char_any(&['{', '}', '[', ']', '(', ')']) {
85 return Some((Self::Delim, scanner.span(r)));
86 }
87
88 let res = scanner.scan_with(|scanner| {
90 let (r, c) = scanner.next()?;
91 match c {
92 '=' => {
93 _ = scanner.accept_char_any(&['=', '>']);
94 }
95 '+' => {
96 _ = scanner.accept_char_any(&['+', '=']);
97 }
98 '-' => {
99 _ = scanner.accept_char_any(&['-', '=']);
100 }
101 '*' | '/' | '%' | '^' | '!' => {
102 _ = scanner.accept_char('=');
103 }
104 '&' => {
105 _ = scanner.accept_char_any(&['&', '=']);
106 }
107 '|' => {
108 _ = scanner.accept_char_any(&['|', '=']);
109 }
110 '<' => {
111 _ = scanner.accept_char('<');
112 _ = scanner.accept_char('=');
113 }
114 '>' => {
115 _ = scanner.accept_char('>');
116 _ = scanner.accept_char('=');
117 }
118 '.' => {
119 _ = scanner.scan_with(|scanner| {
120 scanner.accept_char('.')?;
121 scanner.accept_char('.')?;
122 Ok(())
123 });
124 }
125 '#' => {
126 _ = scanner.accept_char('#');
127 }
128 ',' | ';' | ':' | '?' | '~' => {}
129 _ => return Err(scanner.ranged_text(r)),
130 }
131 Ok(())
132 });
133 if let Ok((r, _s)) = res {
134 return Some((Self::Punct, scanner.span(r)));
135 }
136
137 let (r, _c) = scanner.next().ok()?;
138 Some((Self::Unknown, scanner.span(r)))
139 }
140}
141
142#[derive(Clone, Debug)]
148pub struct CLexer<'text> {
149 scanner: Scanner<'text>,
150}
151
152impl<'text> CLexer<'text> {
153 #[inline]
154 pub fn new(text: &'text str) -> Self {
155 Self {
156 scanner: Scanner::new(text),
157 }
158 }
159}
160
161impl_lexer_from_scanner!('text, CLexer<'text>, CToken, scanner);
162
163#[cfg(test)]
164mod tests {
165 use super::*;
166
167 #[test]
168 fn test_c_lexer_spans() {
169 let input = include_str!("../../../text-scanner/src/ext/rust.rs");
172 let mut output = String::new();
173
174 let lexer = CLexer::new(input);
175 for (_tok, span) in lexer {
176 output.push_str(span.as_str());
177 }
178
179 assert_eq!(input, output);
180 }
181}