scribe_analysis/
parser.rs1use crate::ast::AstNode;
6use scribe_core::Result;
7
8struct SimpleTokenizer {
10 input: Vec<char>,
11 position: usize,
12}
13
14impl SimpleTokenizer {
15 fn new(input: &str) -> Self {
16 Self {
17 input: input.chars().collect(),
18 position: 0,
19 }
20 }
21
22 fn is_at_end(&self) -> bool {
23 self.position >= self.input.len()
24 }
25
26 fn advance(&mut self) {
27 if !self.is_at_end() {
28 self.position += 1;
29 }
30 }
31
32 fn peek_char(&self) -> Option<char> {
33 self.input.get(self.position).copied()
34 }
35
36 fn peek_ahead(&self, offset: usize) -> Option<char> {
37 self.input.get(self.position + offset).copied()
38 }
39
40 fn current_char(&self) -> Option<char> {
41 self.input.get(self.position).copied()
42 }
43
44 fn skip_whitespace(&mut self) {
45 while !self.is_at_end() {
46 match self.current_char() {
47 Some(' ') | Some('\t') | Some('\r') => self.advance(),
48 _ => break,
49 }
50 }
51 }
52
53 fn skip_line(&mut self) {
54 while !self.is_at_end() {
55 if self.current_char() == Some('\n') {
56 self.advance();
57 break;
58 }
59 self.advance();
60 }
61 }
62
63 fn peek_word(&self, word: &str) -> bool {
64 let word_chars: Vec<char> = word.chars().collect();
65
66 if self.position + word_chars.len() > self.input.len() {
67 return false;
68 }
69
70 for (i, &expected_char) in word_chars.iter().enumerate() {
72 if let Some(actual_char) = self.input.get(self.position + i) {
73 if *actual_char != expected_char {
74 return false;
75 }
76 } else {
77 return false;
78 }
79 }
80
81 if let Some(next_char) = self.input.get(self.position + word_chars.len()) {
83 if next_char.is_alphanumeric() || *next_char == '_' {
84 return false;
85 }
86 }
87
88 true
89 }
90
91 fn consume_word(&mut self, word: &str) -> Result<()> {
92 if self.peek_word(word) {
93 self.position += word.chars().count();
94 Ok(())
95 } else {
96 Err(scribe_core::ScribeError::parse(&format!(
97 "Expected '{}'",
98 word
99 )))
100 }
101 }
102
103 fn next(&mut self) -> Option<String> {
104 self.skip_whitespace();
105
106 if self.is_at_end() {
107 return None;
108 }
109
110 let mut token = String::new();
111
112 while !self.is_at_end() {
114 let ch = self.current_char().unwrap();
115 if ch.is_alphanumeric() || ch == '_' {
116 token.push(ch);
117 self.advance();
118 } else {
119 break;
120 }
121 }
122
123 if token.is_empty() {
124 if let Some(ch) = self.current_char() {
126 token.push(ch);
127 self.advance();
128 }
129 }
130
131 if token.is_empty() {
132 None
133 } else {
134 Some(token)
135 }
136 }
137}
138
139#[derive(Debug, Clone)]
140pub struct ParseResult {
141 pub ast: AstNode,
142 pub errors: Vec<String>,
143}
144
145impl ParseResult {
146 pub fn new(ast: AstNode) -> Self {
147 Self {
148 ast,
149 errors: Vec::new(),
150 }
151 }
152
153 pub fn with_errors(mut self, errors: Vec<String>) -> Self {
154 self.errors = errors;
155 self
156 }
157}
158
159pub struct Parser;
160
161impl Parser {
162 pub fn new() -> Result<Self> {
163 Ok(Self)
164 }
165
166 fn create_node_with_children(node_type: &str, children: Vec<AstNode>) -> AstNode {
168 let mut node = AstNode::new(node_type.to_string());
169 for child in children {
170 node = node.add_child(child);
171 }
172 node
173 }
174
175 pub fn parse(&self, code: &str, language: &str) -> Result<AstNode> {
176 let mut tokenizer = SimpleTokenizer::new(code);
177
178 match language.to_lowercase().as_str() {
179 "rust" | "rs" => self.parse_rust(&mut tokenizer),
180 "python" | "py" => self.parse_python(&mut tokenizer),
181 "javascript" | "js" | "typescript" | "ts" => self.parse_javascript(&mut tokenizer),
182 _ => self.parse_generic(&mut tokenizer),
183 }
184 }
185
186 fn parse_rust(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
187 let mut statements = Vec::new();
188
189 while !tokenizer.is_at_end() {
190 if let Some(stmt) = self.parse_statement(tokenizer)? {
191 statements.push(stmt);
192 }
193 }
194
195 Ok(Self::create_node_with_children("block", statements))
196 }
197
198 fn parse_python(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
199 let mut statements = Vec::new();
201
202 while !tokenizer.is_at_end() {
203 if let Some(stmt) = self.parse_python_statement(tokenizer)? {
204 statements.push(stmt);
205 }
206 }
207
208 Ok(Self::create_node_with_children("block", statements))
209 }
210
211 fn parse_javascript(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
212 let mut statements = Vec::new();
214
215 while !tokenizer.is_at_end() {
216 if let Some(stmt) = self.parse_js_statement(tokenizer)? {
217 statements.push(stmt);
218 }
219 }
220
221 Ok(Self::create_node_with_children("block", statements))
222 }
223
224 fn parse_generic(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
225 let mut statements = Vec::new();
227
228 while !tokenizer.is_at_end() {
229 if let Some(token) = tokenizer.next() {
230 statements.push(AstNode::new(token));
231 }
232 }
233
234 Ok(Self::create_node_with_children("block", statements))
235 }
236
237 fn parse_statement(&self, tokenizer: &mut SimpleTokenizer) -> Result<Option<AstNode>> {
238 tokenizer.skip_whitespace();
239
240 if tokenizer.is_at_end() {
241 return Ok(None);
242 }
243
244 if tokenizer.peek_word("if") {
246 return Ok(Some(self.parse_if_statement(tokenizer)?));
247 }
248
249 if tokenizer.peek_word("while") {
250 return Ok(Some(self.parse_while_statement(tokenizer)?));
251 }
252
253 if tokenizer.peek_word("for") {
254 return Ok(Some(self.parse_for_statement(tokenizer)?));
255 }
256
257 if tokenizer.peek_word("match") {
258 return Ok(Some(self.parse_match_statement(tokenizer)?));
259 }
260
261 tokenizer.skip_line();
263 Ok(Some(AstNode::new("statement".to_string())))
264 }
265
266 fn parse_python_statement(&self, tokenizer: &mut SimpleTokenizer) -> Result<Option<AstNode>> {
267 tokenizer.skip_whitespace();
268
269 if tokenizer.is_at_end() {
270 return Ok(None);
271 }
272
273 if tokenizer.peek_word("if") {
274 return Ok(Some(self.parse_python_if(tokenizer)?));
275 }
276
277 if tokenizer.peek_word("while") {
278 return Ok(Some(self.parse_python_while(tokenizer)?));
279 }
280
281 if tokenizer.peek_word("for") {
282 return Ok(Some(self.parse_python_for(tokenizer)?));
283 }
284
285 tokenizer.skip_line();
286 Ok(Some(AstNode::new("statement".to_string())))
287 }
288
289 fn parse_js_statement(&self, tokenizer: &mut SimpleTokenizer) -> Result<Option<AstNode>> {
290 tokenizer.skip_whitespace();
291
292 if tokenizer.is_at_end() {
293 return Ok(None);
294 }
295
296 if tokenizer.peek_word("if") {
297 return Ok(Some(self.parse_js_if(tokenizer)?));
298 }
299
300 if tokenizer.peek_word("while") {
301 return Ok(Some(self.parse_js_while(tokenizer)?));
302 }
303
304 if tokenizer.peek_word("for") {
305 return Ok(Some(self.parse_js_for(tokenizer)?));
306 }
307
308 if tokenizer.peek_word("switch") {
309 return Ok(Some(self.parse_js_switch(tokenizer)?));
310 }
311
312 tokenizer.skip_line();
313 Ok(Some(AstNode::new("statement".to_string())))
314 }
315
316 fn parse_if_statement(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
317 tokenizer.consume_word("if")?;
318 let _condition = self.parse_condition(tokenizer)?;
319 let then_branch = self.parse_block(tokenizer)?;
320
321 let mut children = vec![then_branch];
322
323 if tokenizer.peek_word("else") {
324 tokenizer.consume_word("else")?;
325 let else_branch = self.parse_block(tokenizer)?;
326 children.push(else_branch);
327 }
328
329 Ok(Self::create_node_with_children("if", children))
330 }
331
332 fn parse_while_statement(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
333 tokenizer.consume_word("while")?;
334 let _condition = self.parse_condition(tokenizer)?;
335 let body = self.parse_block(tokenizer)?;
336
337 Ok(Self::create_node_with_children("while", vec![body]))
338 }
339
340 fn parse_for_statement(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
341 tokenizer.consume_word("for")?;
342
343 let _init = "init".to_string();
345 let _condition = "condition".to_string();
346 let _update = "update".to_string();
347 let body = self.parse_block(tokenizer)?;
348
349 Ok(Self::create_node_with_children("for", vec![body]))
350 }
351
352 fn parse_match_statement(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
353 tokenizer.consume_word("match")?;
354 let _condition = self.parse_condition(tokenizer)?;
355
356 let mut cases = Vec::new();
358
359 while !tokenizer.is_at_end() && tokenizer.current_char() != Some('}') {
361 if tokenizer.current_char() == Some('=') && tokenizer.peek_ahead(1) == Some('>') {
362 cases.push(AstNode::new("match_arm".to_string()));
363 }
364 tokenizer.advance();
365 }
366
367 Ok(Self::create_node_with_children("match", cases))
368 }
369
370 fn parse_python_if(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
371 tokenizer.consume_word("if")?;
372 let condition = self.parse_condition(tokenizer)?;
373 let then_branch = Box::new(self.parse_python_block(tokenizer)?);
374
375 let else_branch = if tokenizer.peek_word("else") {
376 tokenizer.consume_word("else")?;
377 Some(Box::new(self.parse_python_block(tokenizer)?))
378 } else {
379 None
380 };
381
382 Ok(Self::create_node_with_children("if", vec![*then_branch]))
383 }
384
385 fn parse_python_while(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
386 tokenizer.consume_word("while")?;
387 let condition = self.parse_condition(tokenizer)?;
388 let body = Box::new(self.parse_python_block(tokenizer)?);
389
390 Ok(Self::create_node_with_children("while", vec![*body]))
391 }
392
393 fn parse_python_for(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
394 tokenizer.consume_word("for")?;
395
396 let init = "for_init".to_string();
397 let condition = "for_condition".to_string();
398 let update = "for_update".to_string();
399 let body = Box::new(self.parse_python_block(tokenizer)?);
400
401 Ok(Self::create_node_with_children("for", vec![*body]))
402 }
403
404 fn parse_js_if(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
405 tokenizer.consume_word("if")?;
406 let condition = self.parse_condition(tokenizer)?;
407 let then_branch = Box::new(self.parse_js_block(tokenizer)?);
408
409 let else_branch = if tokenizer.peek_word("else") {
410 tokenizer.consume_word("else")?;
411 Some(Box::new(self.parse_js_block(tokenizer)?))
412 } else {
413 None
414 };
415
416 Ok(Self::create_node_with_children("if", vec![*then_branch]))
417 }
418
419 fn parse_js_while(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
420 tokenizer.consume_word("while")?;
421 let condition = self.parse_condition(tokenizer)?;
422 let body = Box::new(self.parse_js_block(tokenizer)?);
423
424 Ok(Self::create_node_with_children("while", vec![*body]))
425 }
426
427 fn parse_js_for(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
428 tokenizer.consume_word("for")?;
429
430 let init = "for_init".to_string();
431 let condition = "for_condition".to_string();
432 let update = "for_update".to_string();
433 let body = Box::new(self.parse_js_block(tokenizer)?);
434
435 Ok(Self::create_node_with_children("for", vec![*body]))
436 }
437
438 fn parse_js_switch(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
439 tokenizer.consume_word("switch")?;
440 let condition = self.parse_condition(tokenizer)?;
441
442 let mut cases = Vec::new();
443
444 while !tokenizer.is_at_end() {
446 if tokenizer.peek_word("case") || tokenizer.peek_word("default") {
447 cases.push(AstNode::new("case".to_string()));
448 }
449 tokenizer.advance();
450 }
451
452 Ok(Self::create_node_with_children("switch", cases))
453 }
454
455 fn parse_condition(&self, tokenizer: &mut SimpleTokenizer) -> Result<String> {
456 let mut condition = String::new();
458
459 tokenizer.skip_whitespace();
460
461 while !tokenizer.is_at_end() {
462 let ch = tokenizer.peek_char().unwrap_or(' ');
463 if ch == '{' || ch == ':' || ch == '\n' {
464 break;
465 }
466 condition.push(ch);
467 tokenizer.advance();
468 }
469
470 Ok(condition.trim().to_string())
471 }
472
473 fn parse_block(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
474 let mut statements = Vec::new();
475
476 tokenizer.skip_whitespace();
478 if tokenizer.peek_char() == Some('{') {
479 tokenizer.advance(); let mut brace_count = 1;
482 while !tokenizer.is_at_end() && brace_count > 0 {
483 if tokenizer.peek_char() == Some('{') {
484 brace_count += 1;
485 } else if tokenizer.peek_char() == Some('}') {
486 brace_count -= 1;
487 }
488
489 if brace_count > 0 {
490 if let Some(stmt) = self.parse_statement(tokenizer)? {
491 statements.push(stmt);
492 }
493 } else {
494 tokenizer.advance(); }
496 }
497 }
498
499 Ok(Self::create_node_with_children("block", statements))
500 }
501
502 fn parse_python_block(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
503 let mut statements = Vec::new();
504
505 tokenizer.skip_line(); for _ in 0..5 {
510 if tokenizer.is_at_end() {
511 break;
512 }
513 if let Some(stmt) = self.parse_python_statement(tokenizer)? {
514 statements.push(stmt);
515 }
516 }
517
518 Ok(Self::create_node_with_children("block", statements))
519 }
520
521 fn parse_js_block(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
522 self.parse_block(tokenizer)
524 }
525}
526
527impl Default for Parser {
528 fn default() -> Self {
529 Self::new().expect("Failed to create Parser")
530 }
531}