scribe_analysis/
parser.rs

1//! # Code Parsing Infrastructure
2//!
3//! Placeholder module for language-specific parsers.
4
5use crate::ast::AstNode;
6use scribe_core::Result;
7
8/// Simple character-based tokenizer without regex
9struct SimpleTokenizer {
10    input: Vec<char>,
11    position: usize,
12}
13
14impl SimpleTokenizer {
15    fn new(input: &str) -> Self {
16        Self {
17            input: input.chars().collect(),
18            position: 0,
19        }
20    }
21
22    fn is_at_end(&self) -> bool {
23        self.position >= self.input.len()
24    }
25
26    fn advance(&mut self) {
27        if !self.is_at_end() {
28            self.position += 1;
29        }
30    }
31
32    fn peek_char(&self) -> Option<char> {
33        self.input.get(self.position).copied()
34    }
35
36    fn peek_ahead(&self, offset: usize) -> Option<char> {
37        self.input.get(self.position + offset).copied()
38    }
39
40    fn current_char(&self) -> Option<char> {
41        self.input.get(self.position).copied()
42    }
43
44    fn skip_whitespace(&mut self) {
45        while !self.is_at_end() {
46            match self.current_char() {
47                Some(' ') | Some('\t') | Some('\r') => self.advance(),
48                _ => break,
49            }
50        }
51    }
52
53    fn skip_line(&mut self) {
54        while !self.is_at_end() {
55            if self.current_char() == Some('\n') {
56                self.advance();
57                break;
58            }
59            self.advance();
60        }
61    }
62
63    fn peek_word(&self, word: &str) -> bool {
64        let word_chars: Vec<char> = word.chars().collect();
65
66        if self.position + word_chars.len() > self.input.len() {
67            return false;
68        }
69
70        // Check if characters match
71        for (i, &expected_char) in word_chars.iter().enumerate() {
72            if let Some(actual_char) = self.input.get(self.position + i) {
73                if *actual_char != expected_char {
74                    return false;
75                }
76            } else {
77                return false;
78            }
79        }
80
81        // Check that it's a complete word (not part of a larger identifier)
82        if let Some(next_char) = self.input.get(self.position + word_chars.len()) {
83            if next_char.is_alphanumeric() || *next_char == '_' {
84                return false;
85            }
86        }
87
88        true
89    }
90
91    fn consume_word(&mut self, word: &str) -> Result<()> {
92        if self.peek_word(word) {
93            self.position += word.chars().count();
94            Ok(())
95        } else {
96            Err(scribe_core::ScribeError::parse(&format!(
97                "Expected '{}'",
98                word
99            )))
100        }
101    }
102
103    fn next(&mut self) -> Option<String> {
104        self.skip_whitespace();
105
106        if self.is_at_end() {
107            return None;
108        }
109
110        let mut token = String::new();
111
112        // Collect alphanumeric characters and underscores
113        while !self.is_at_end() {
114            let ch = self.current_char().unwrap();
115            if ch.is_alphanumeric() || ch == '_' {
116                token.push(ch);
117                self.advance();
118            } else {
119                break;
120            }
121        }
122
123        if token.is_empty() {
124            // Single character token
125            if let Some(ch) = self.current_char() {
126                token.push(ch);
127                self.advance();
128            }
129        }
130
131        if token.is_empty() {
132            None
133        } else {
134            Some(token)
135        }
136    }
137}
138
139#[derive(Debug, Clone)]
140pub struct ParseResult {
141    pub ast: AstNode,
142    pub errors: Vec<String>,
143}
144
145impl ParseResult {
146    pub fn new(ast: AstNode) -> Self {
147        Self {
148            ast,
149            errors: Vec::new(),
150        }
151    }
152
153    pub fn with_errors(mut self, errors: Vec<String>) -> Self {
154        self.errors = errors;
155        self
156    }
157}
158
159pub struct Parser;
160
161impl Parser {
162    pub fn new() -> Result<Self> {
163        Ok(Self)
164    }
165
166    // Helper function to create nodes with children
167    fn create_node_with_children(node_type: &str, children: Vec<AstNode>) -> AstNode {
168        let mut node = AstNode::new(node_type.to_string());
169        for child in children {
170            node = node.add_child(child);
171        }
172        node
173    }
174
175    pub fn parse(&self, code: &str, language: &str) -> Result<AstNode> {
176        let mut tokenizer = SimpleTokenizer::new(code);
177
178        match language.to_lowercase().as_str() {
179            "rust" | "rs" => self.parse_rust(&mut tokenizer),
180            "python" | "py" => self.parse_python(&mut tokenizer),
181            "javascript" | "js" | "typescript" | "ts" => self.parse_javascript(&mut tokenizer),
182            _ => self.parse_generic(&mut tokenizer),
183        }
184    }
185
186    fn parse_rust(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
187        let mut statements = Vec::new();
188
189        while !tokenizer.is_at_end() {
190            if let Some(stmt) = self.parse_statement(tokenizer)? {
191                statements.push(stmt);
192            }
193        }
194
195        Ok(Self::create_node_with_children("block", statements))
196    }
197
198    fn parse_python(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
199        // Simple Python parsing - detect basic structures
200        let mut statements = Vec::new();
201
202        while !tokenizer.is_at_end() {
203            if let Some(stmt) = self.parse_python_statement(tokenizer)? {
204                statements.push(stmt);
205            }
206        }
207
208        Ok(Self::create_node_with_children("block", statements))
209    }
210
211    fn parse_javascript(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
212        // Simple JavaScript parsing
213        let mut statements = Vec::new();
214
215        while !tokenizer.is_at_end() {
216            if let Some(stmt) = self.parse_js_statement(tokenizer)? {
217                statements.push(stmt);
218            }
219        }
220
221        Ok(Self::create_node_with_children("block", statements))
222    }
223
224    fn parse_generic(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
225        // Generic parsing - just count basic structures
226        let mut statements = Vec::new();
227
228        while !tokenizer.is_at_end() {
229            if let Some(token) = tokenizer.next() {
230                statements.push(AstNode::new(token));
231            }
232        }
233
234        Ok(Self::create_node_with_children("block", statements))
235    }
236
237    fn parse_statement(&self, tokenizer: &mut SimpleTokenizer) -> Result<Option<AstNode>> {
238        tokenizer.skip_whitespace();
239
240        if tokenizer.is_at_end() {
241            return Ok(None);
242        }
243
244        // Look for common keywords
245        if tokenizer.peek_word("if") {
246            return Ok(Some(self.parse_if_statement(tokenizer)?));
247        }
248
249        if tokenizer.peek_word("while") {
250            return Ok(Some(self.parse_while_statement(tokenizer)?));
251        }
252
253        if tokenizer.peek_word("for") {
254            return Ok(Some(self.parse_for_statement(tokenizer)?));
255        }
256
257        if tokenizer.peek_word("match") {
258            return Ok(Some(self.parse_match_statement(tokenizer)?));
259        }
260
261        // Skip to next line for other statements
262        tokenizer.skip_line();
263        Ok(Some(AstNode::new("statement".to_string())))
264    }
265
266    fn parse_python_statement(&self, tokenizer: &mut SimpleTokenizer) -> Result<Option<AstNode>> {
267        tokenizer.skip_whitespace();
268
269        if tokenizer.is_at_end() {
270            return Ok(None);
271        }
272
273        if tokenizer.peek_word("if") {
274            return Ok(Some(self.parse_python_if(tokenizer)?));
275        }
276
277        if tokenizer.peek_word("while") {
278            return Ok(Some(self.parse_python_while(tokenizer)?));
279        }
280
281        if tokenizer.peek_word("for") {
282            return Ok(Some(self.parse_python_for(tokenizer)?));
283        }
284
285        tokenizer.skip_line();
286        Ok(Some(AstNode::new("statement".to_string())))
287    }
288
289    fn parse_js_statement(&self, tokenizer: &mut SimpleTokenizer) -> Result<Option<AstNode>> {
290        tokenizer.skip_whitespace();
291
292        if tokenizer.is_at_end() {
293            return Ok(None);
294        }
295
296        if tokenizer.peek_word("if") {
297            return Ok(Some(self.parse_js_if(tokenizer)?));
298        }
299
300        if tokenizer.peek_word("while") {
301            return Ok(Some(self.parse_js_while(tokenizer)?));
302        }
303
304        if tokenizer.peek_word("for") {
305            return Ok(Some(self.parse_js_for(tokenizer)?));
306        }
307
308        if tokenizer.peek_word("switch") {
309            return Ok(Some(self.parse_js_switch(tokenizer)?));
310        }
311
312        tokenizer.skip_line();
313        Ok(Some(AstNode::new("statement".to_string())))
314    }
315
316    fn parse_if_statement(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
317        tokenizer.consume_word("if")?;
318        let _condition = self.parse_condition(tokenizer)?;
319        let then_branch = self.parse_block(tokenizer)?;
320
321        let mut children = vec![then_branch];
322
323        if tokenizer.peek_word("else") {
324            tokenizer.consume_word("else")?;
325            let else_branch = self.parse_block(tokenizer)?;
326            children.push(else_branch);
327        }
328
329        Ok(Self::create_node_with_children("if", children))
330    }
331
332    fn parse_while_statement(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
333        tokenizer.consume_word("while")?;
334        let _condition = self.parse_condition(tokenizer)?;
335        let body = self.parse_block(tokenizer)?;
336
337        Ok(Self::create_node_with_children("while", vec![body]))
338    }
339
340    fn parse_for_statement(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
341        tokenizer.consume_word("for")?;
342
343        // Simplified for loop parsing
344        let _init = "init".to_string();
345        let _condition = "condition".to_string();
346        let _update = "update".to_string();
347        let body = self.parse_block(tokenizer)?;
348
349        Ok(Self::create_node_with_children("for", vec![body]))
350    }
351
352    fn parse_match_statement(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
353        tokenizer.consume_word("match")?;
354        let _condition = self.parse_condition(tokenizer)?;
355
356        // Simplified match parsing - just count arms
357        let mut cases = Vec::new();
358
359        // Skip to opening brace and count patterns
360        while !tokenizer.is_at_end() && tokenizer.current_char() != Some('}') {
361            if tokenizer.current_char() == Some('=') && tokenizer.peek_ahead(1) == Some('>') {
362                cases.push(AstNode::new("match_arm".to_string()));
363            }
364            tokenizer.advance();
365        }
366
367        Ok(Self::create_node_with_children("match", cases))
368    }
369
370    fn parse_python_if(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
371        tokenizer.consume_word("if")?;
372        let condition = self.parse_condition(tokenizer)?;
373        let then_branch = Box::new(self.parse_python_block(tokenizer)?);
374
375        let else_branch = if tokenizer.peek_word("else") {
376            tokenizer.consume_word("else")?;
377            Some(Box::new(self.parse_python_block(tokenizer)?))
378        } else {
379            None
380        };
381
382        Ok(Self::create_node_with_children("if", vec![*then_branch]))
383    }
384
385    fn parse_python_while(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
386        tokenizer.consume_word("while")?;
387        let condition = self.parse_condition(tokenizer)?;
388        let body = Box::new(self.parse_python_block(tokenizer)?);
389
390        Ok(Self::create_node_with_children("while", vec![*body]))
391    }
392
393    fn parse_python_for(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
394        tokenizer.consume_word("for")?;
395
396        let init = "for_init".to_string();
397        let condition = "for_condition".to_string();
398        let update = "for_update".to_string();
399        let body = Box::new(self.parse_python_block(tokenizer)?);
400
401        Ok(Self::create_node_with_children("for", vec![*body]))
402    }
403
404    fn parse_js_if(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
405        tokenizer.consume_word("if")?;
406        let condition = self.parse_condition(tokenizer)?;
407        let then_branch = Box::new(self.parse_js_block(tokenizer)?);
408
409        let else_branch = if tokenizer.peek_word("else") {
410            tokenizer.consume_word("else")?;
411            Some(Box::new(self.parse_js_block(tokenizer)?))
412        } else {
413            None
414        };
415
416        Ok(Self::create_node_with_children("if", vec![*then_branch]))
417    }
418
419    fn parse_js_while(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
420        tokenizer.consume_word("while")?;
421        let condition = self.parse_condition(tokenizer)?;
422        let body = Box::new(self.parse_js_block(tokenizer)?);
423
424        Ok(Self::create_node_with_children("while", vec![*body]))
425    }
426
427    fn parse_js_for(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
428        tokenizer.consume_word("for")?;
429
430        let init = "for_init".to_string();
431        let condition = "for_condition".to_string();
432        let update = "for_update".to_string();
433        let body = Box::new(self.parse_js_block(tokenizer)?);
434
435        Ok(Self::create_node_with_children("for", vec![*body]))
436    }
437
438    fn parse_js_switch(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
439        tokenizer.consume_word("switch")?;
440        let condition = self.parse_condition(tokenizer)?;
441
442        let mut cases = Vec::new();
443
444        // Count case statements
445        while !tokenizer.is_at_end() {
446            if tokenizer.peek_word("case") || tokenizer.peek_word("default") {
447                cases.push(AstNode::new("case".to_string()));
448            }
449            tokenizer.advance();
450        }
451
452        Ok(Self::create_node_with_children("switch", cases))
453    }
454
455    fn parse_condition(&self, tokenizer: &mut SimpleTokenizer) -> Result<String> {
456        // Simple condition parsing - just collect until we hit a delimiter
457        let mut condition = String::new();
458
459        tokenizer.skip_whitespace();
460
461        while !tokenizer.is_at_end() {
462            let ch = tokenizer.peek_char().unwrap_or(' ');
463            if ch == '{' || ch == ':' || ch == '\n' {
464                break;
465            }
466            condition.push(ch);
467            tokenizer.advance();
468        }
469
470        Ok(condition.trim().to_string())
471    }
472
473    fn parse_block(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
474        let mut statements = Vec::new();
475
476        // Look for opening brace
477        tokenizer.skip_whitespace();
478        if tokenizer.peek_char() == Some('{') {
479            tokenizer.advance(); // consume '{'
480
481            let mut brace_count = 1;
482            while !tokenizer.is_at_end() && brace_count > 0 {
483                if tokenizer.peek_char() == Some('{') {
484                    brace_count += 1;
485                } else if tokenizer.peek_char() == Some('}') {
486                    brace_count -= 1;
487                }
488
489                if brace_count > 0 {
490                    if let Some(stmt) = self.parse_statement(tokenizer)? {
491                        statements.push(stmt);
492                    }
493                } else {
494                    tokenizer.advance(); // consume '}'
495                }
496            }
497        }
498
499        Ok(Self::create_node_with_children("block", statements))
500    }
501
502    fn parse_python_block(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
503        let mut statements = Vec::new();
504
505        // Python uses indentation
506        tokenizer.skip_line(); // Skip to next line
507
508        // For simplicity, just parse a few lines
509        for _ in 0..5 {
510            if tokenizer.is_at_end() {
511                break;
512            }
513            if let Some(stmt) = self.parse_python_statement(tokenizer)? {
514                statements.push(stmt);
515            }
516        }
517
518        Ok(Self::create_node_with_children("block", statements))
519    }
520
521    fn parse_js_block(&self, tokenizer: &mut SimpleTokenizer) -> Result<AstNode> {
522        // JavaScript blocks are similar to Rust
523        self.parse_block(tokenizer)
524    }
525}
526
527impl Default for Parser {
528    fn default() -> Self {
529        Self::new().expect("Failed to create Parser")
530    }
531}