1use cfront_definition::{token::{Token, TokenType}, Keyword};
2use cfront_definition_keyword::automaton;
3
4pub use cfront_definition_keyword::keyword as keyword;
5pub use cfront_definition::token as token;
6
7pub fn analyze(input: &str) -> Vec<Token<'_>> {
8 let char_indices: Vec<_> = input.char_indices().collect();
9 let char_indices = &char_indices[..];
10 let mut idx = 0;
11 let (mut last_line, mut last_column) = (0, 0);
12 let (mut line, mut column) = (0, 0);
13 let mut just_ignore: usize = 0;
14 enum CommentState {
15 None,
16 Line,
17 Block,
18 }
19 let mut comment_state = CommentState::None;
20 enum QuotingState {
21 None,
22 Single(usize),
23 Double(usize),
24 }
25 let mut quoting_state = QuotingState::None;
26 let mut lidx: Option<usize> = None;
27 enum NormalState<'a> {
28 Identifier,
29 Number(Option<&'a str>),
30 }
31 let mut normal_state = NormalState::Identifier;
32 let mut ans = Vec::new();
33 while let Some((i, c)) = char_indices.get(idx) {
34 'scope : {
35 match just_ignore {
36 ref mut x if *x > 0 => {
37 *x -= 1;
38 break 'scope;
39 }
40 _ => (),
41 }
42 match quoting_state {
43 QuotingState::None => {
44 if *c == '\'' {
45 quoting_state = QuotingState::Single(*i);
46 break 'scope;
47 } else if *c == '"' {
48 quoting_state = QuotingState::Double(*i);
49 break 'scope;
50 }
51 }
52 QuotingState::Single(l) => {
53 if *c == '\'' {
54 ans.push(Token { token_type: TokenType::CharLiteral(&input[l+1..*i], true), line, column, });
55 quoting_state = QuotingState::None;
56 } else if *c == '\n' {
57 ans.push(Token { token_type: TokenType::CharLiteral(&input[l+1..*i], false), line, column, });
58 quoting_state = QuotingState::None;
59 }
60 break 'scope;
61 }
62 QuotingState::Double(l) => {
63 if *c == '"' {
64 ans.push(Token { token_type: TokenType::StringLiteral(&input[l+1..*i], true), line, column, });
65 quoting_state = QuotingState::None;
66 } else if *c == '\n' {
67 ans.push(Token { token_type: TokenType::StringLiteral(&input[l+1..*i], false), line, column, });
68 quoting_state = QuotingState::None;
69 }
70 break 'scope;
71 }
72 }
73 match comment_state {
74 CommentState::None => {
75 if *c == '/' {
76 if let Some((_, c2)) = char_indices.get(idx + 1) {
77 if *c2 == '/' {
78 comment_state = CommentState::Line;
79 just_ignore = 1;
80 break 'scope;
81 } else if *c2 == '*' {
82 comment_state = CommentState::Block;
83 just_ignore = 1;
84 break 'scope;
85 }
86 }
87 }
88 }
89 CommentState::Line => {
90 if *c == '\n' {
91 comment_state = CommentState::None;
92 }
93 break 'scope;
94 }
95 CommentState::Block => {
96 if *c == '*' {
97 if let Some((_, c2)) = char_indices.get(idx + 1) {
98 if *c2 == '/' {
99 comment_state = CommentState::None;
100 just_ignore = 1;
101 break 'scope;
102 }
103 }
104 }
105 break 'scope;
106 }
107 }
108 let mut punt = false;
109 match lidx {
110 Some(l) => {
111 let mut should_put = false;
112 if c.is_ascii_whitespace() {
113 should_put = true;
114 }
115 if c.is_ascii_punctuation() {
116 let mut i = true;
117 if *c == '.' {
118 if let NormalState::Number(_) = normal_state {
119 i = false;
120 }
121 }
122 if *c == '$' || *c == '@' || *c == '_' {
123 i = false;
124 }
125 if i {
126 should_put = true;
127 punt = true;
128 }
129 }
130 if should_put {
131 match normal_state {
132 NormalState::Identifier => {
133 let s = &input[l..*i];
134 if let Some(k) = try_into_keyword(s) {
135 ans.push(Token { token_type: TokenType::Keyword(k), line: last_line, column: last_column, });
136 } else {
137 ans.push(Token { token_type: TokenType::Identifier(s), line: last_line, column: last_column, });
138 }
139 }
140 NormalState::Number(prefix) => {
141 let s = &input[l..*i];
142 ans.push(Token { token_type: TokenType::NumberLiteral(s, prefix), line: last_line, column: last_column });
143 }
144 }
145 lidx = None;
146 }
147 },
148 None => {
149 if c.is_whitespace() {
150 break 'scope;
151 }
152 if c.is_ascii_punctuation() {
153 punt = true;
154 if *c == '$' || *c == '@' || *c == '_' {
155 punt = false;
156 }
157 }
158 if !punt {
159 lidx = Some(*i);
160 if c.is_digit(10) {
161 normal_state = NormalState::Number(None);
162 if *c == '0' {
163 let p = char_indices.get(idx + 1);
164 match p {
165 Some((_, 'x')) | Some((_, 'X')) | Some((_, 'b')) | Some((_, 'B')) => {
166 normal_state = NormalState::Number(Some(&input[*i..i+2]));
167 just_ignore = 1;
168 break 'scope;
169 }
170 _ => {
171 normal_state = NormalState::Number(Some(&input[*i..i+1]));
172 }
173 }
174 }
175 } else {
176 normal_state = NormalState::Identifier;
177 }
178 }
179 },
180 }
181 if punt {
182 use TokenType::*;
183 match c {
184 '(' => {
185 ans.push(Token { token_type: Parenthesis { is_left: true }, line, column, });
186 }
187 ')' => {
188 ans.push(Token { token_type: Parenthesis { is_left: false }, line, column, });
189 }
190 '[' => {
191 ans.push(Token { token_type: Bracket { is_left: true }, line, column, });
192 }
193 ']' => {
194 ans.push(Token { token_type: Bracket { is_left: false }, line, column, });
195 }
196 '{' => {
197 ans.push(Token { token_type: Brace { is_left: true }, line, column, });
198 }
199 '}' => {
200 ans.push(Token { token_type: Brace { is_left: false }, line, column, });
201 }
202 | '.' | ',' | ';' | '~' | ':' => {
203 ans.push(Token { token_type: Operator(&input[*i..i+1]), line, column, });
204 }
205 | '<' | '=' | '>' | '+' | '-' | '*' | '/' | '%' | '&' | '^' | '|' | '!' => {
207 if *c == '<' {
208 let p = (char_indices.get(idx + 1), char_indices.get(idx + 2));
209 match p {
210 (Some((_, '<')), Some((_, '='))) => {
211 ans.push(Token { token_type: Operator(&input[*i..i+3]), line, column: column + 2 });
212 just_ignore = 2;
213 break 'scope;
214 }
215 _ => (),
216 }
217 }
218 if *c == '>' {
219 let p = (char_indices.get(idx + 1), char_indices.get(idx + 2));
220 match p {
221 (Some((_, '>')), Some((_, '='))) => {
222 ans.push(Token { token_type: Operator(&input[*i..i+3]), line, column: column + 2 });
223 just_ignore = 2;
224 break 'scope;
225 }
226 _ => (),
227 }
228 }
229 let p = char_indices.get(idx + 1);
230 match p {
231 Some((_, '=')) => {
232 ans.push(Token { token_type: Operator(&input[*i..i+2]), line, column: column + 1 });
233 just_ignore = 1;
234 break 'scope;
235 }
236 _ => (),
237 }
238 if *c == '+' || *c == '-' || *c == '&' || *c == '|' {
239 match p {
240 Some((_, b)) if *b == *c => {
241 ans.push(Token { token_type: Operator(&input[*i..i+2]), line, column: column + 1});
242 just_ignore = 1;
243 break 'scope;
244 }
245 _ => (),
246 }
247 }
248 if *c == '-' {
249 match p {
250 Some((_, '>')) => {
251 ans.push(Token { token_type: Operator(&input[*i..i+2]), line, column: column + 1});
252 just_ignore = 1;
253 break 'scope;
254 }
255 _ => (),
256 }
257 }
258 ans.push(Token { token_type: Operator(&input[*i..i+1]), line, column });
259 }
260 _ => {
261 ans.push(Token { token_type: Operator(&input[*i..i+1]), line, column });
262 }
263 }
264 }
265 }
266 (last_line, last_column) = (line, column);
267 if *c == '\n' {
268 line += 1;
269 column = 0;
270 } else {
271 column += 1;
272 }
273 idx += 1;
274 }
275 return ans;
276}
277
278#[deprecated]
279pub fn try_into_keyword_directly(input: &str) -> Option<Keyword> {
280 use Keyword::*;
281 let ans = match input {
282 "alignas" => AlignAs,
283 "alignof" => AlignOf,
284 "auto" => Auto,
285 "bool" => Bool,
286 "break" => Break,
287 "case" => Case,
288 "char" => Char,
289 "const" => Const,
290 "constexpr" => Constexpr,
291 "continue" => Continue,
292 "default" => Default,
293 "do" => Do,
294 "double" => Double,
295 "else" => Else,
296 "enum" => Enum,
297 "extern" => Extern,
298 "false" => False,
299 "float" => Float,
300 "for" => For,
301 "goto" => Goto,
302 "if" => If,
303 "inline" => Inline,
304 "int" => Int,
305 "long" => Long,
306 "nullptr" => Nullptr,
307 "register" => Register,
308 "restrict" => Restrict,
309 "return" => Return,
310 "short" => Short,
311 "signed" => Signed,
312 "sizeof" => Sizeof,
313 "static" => Static,
314 "static_assert" => StaticAssert,
315 "struct" => Struct,
316 "switch" => Switch,
317 "thread_local" => ThreadLocal,
318 "true" => True,
319 "typedef" => Typedef,
320 "typeof" => TypeOf,
321 "typeof_unqual" => TypeOfUnqual,
322 "union" => Union,
323 "unsigned" => Unsigned,
324 "void" => Void,
325 "volatile" => Volatile,
326 "while" => While,
327 "_Alignas" => _AlignAs,
328 "_Alignof" => _AlignOf,
329 "_Atomic" => _Atomic,
330 "_Bool" => _Bool,
331 "_Complex" => _Complex,
332 "_Decimal128" => _Decimal128,
333 "_Decimal32" => _Decimal32,
334 "_Decimal64" => _Decimal64,
335 "_Generic" => _Generic,
336 "_Imaginary" => _Imaginary,
337 "_Noreturn" => _Noreturn,
338 "_Static_assert" => _StaticAssert,
339 "_Thread_local" => _ThreadLocal,
340 "asm" => Asm,
341 _ => return None,
342 };
343 Some(ans)
344}
345
346pub fn try_into_keyword_automaton(input: &str) -> Option<Keyword> {
347 let input = input.chars();
348 let mut state = automaton::State::default();
349 let mut rst = None;
350 for i in input {
351 let s = state.read(i);
352 match s {
353 Ok((s, k)) => {
354 state = s;
355 rst = k;
356 }
357 Err(_) => return None,
358 }
359 }
360 return rst;
361}
362
363pub fn try_into_keyword(input: &str) -> Option<Keyword> {
364 try_into_keyword_automaton(input)
365}