oxur_lang/
parser.rs

1//! Stage 1: Parse
2//!
3//! Converts raw Oxur source text into Surface Forms (S-expression AST).
4//! Handles tokenization, reader, and reader macros.
5
6use crate::Result;
7use oxur_smap::Span;
8
9/// Parser converts Oxur source text into Surface Forms
10pub struct Parser {
11    source: String,
12    position: usize,  // Byte offset in source
13    line: usize,      // Current line (1-indexed)
14    column: usize,    // Current column (1-indexed)
15    filename: String, // Source filename (or "<repl>")
16}
17
18impl Parser {
19    pub fn new(source: String) -> Self {
20        Self {
21            source,
22            position: 0,
23            line: 1,   // 1-indexed
24            column: 1, // 1-indexed
25            filename: "<repl>".to_string(),
26        }
27    }
28
29    /// Create a parser for a named file
30    pub fn new_file(source: String, filename: String) -> Self {
31        Self { source, position: 0, line: 1, column: 1, filename }
32    }
33
34    /// Parse the source into Surface Forms
35    pub fn parse(&mut self) -> Result<Vec<SurfaceForm>> {
36        let mut forms = Vec::new();
37
38        while !self.is_at_end() {
39            self.skip_whitespace();
40            if self.is_at_end() {
41                break;
42            }
43            forms.push(self.parse_form()?);
44        }
45
46        Ok(forms)
47    }
48
49    fn parse_form(&mut self) -> Result<SurfaceForm> {
50        self.skip_whitespace();
51
52        if self.is_at_end() {
53            return Err(crate::Error::Syntax("Unexpected end of input".to_string()));
54        }
55
56        let ch = self.current_char();
57
58        match ch {
59            '(' => self.parse_list(),
60            '"' => self.parse_string(),
61            '0'..='9' | '-' => self.parse_number(),
62            _ => self.parse_symbol(),
63        }
64    }
65
66    fn parse_list(&mut self) -> Result<SurfaceForm> {
67        let (start_line, start_column) = self.mark_position();
68
69        self.advance(); // consume '('
70        let mut elements = Vec::new();
71
72        loop {
73            self.skip_whitespace();
74
75            if self.is_at_end() {
76                return Err(crate::Error::Syntax("Unclosed list".to_string()));
77            }
78
79            if self.current_char() == ')' {
80                self.advance(); // consume ')'
81                break;
82            }
83
84            elements.push(self.parse_form()?);
85        }
86
87        let span = self.make_span(start_line, start_column);
88        Ok(SurfaceForm::List { span, elements })
89    }
90
91    fn parse_string(&mut self) -> Result<SurfaceForm> {
92        let (start_line, start_column) = self.mark_position();
93
94        self.advance(); // consume opening '"'
95        let start = self.position;
96
97        while !self.is_at_end() && self.current_char() != '"' {
98            self.advance();
99        }
100
101        if self.is_at_end() {
102            return Err(crate::Error::Syntax("Unclosed string".to_string()));
103        }
104
105        let value = self.source[start..self.position].to_string();
106        self.advance(); // consume closing '"'
107
108        let span = self.make_span(start_line, start_column);
109        Ok(SurfaceForm::String { span, value })
110    }
111
112    fn parse_number(&mut self) -> Result<SurfaceForm> {
113        let (start_line, start_column) = self.mark_position();
114        let start = self.position;
115
116        if self.current_char() == '-' {
117            self.advance();
118        }
119
120        while !self.is_at_end() && self.current_char().is_ascii_digit() {
121            self.advance();
122        }
123
124        let num_str = &self.source[start..self.position];
125        let value = num_str
126            .parse::<i64>()
127            .map_err(|_| crate::Error::Syntax(format!("Invalid number: {}", num_str)))?;
128
129        let span = self.make_span(start_line, start_column);
130        Ok(SurfaceForm::Number { span, value })
131    }
132
133    fn parse_symbol(&mut self) -> Result<SurfaceForm> {
134        let (start_line, start_column) = self.mark_position();
135        let start = self.position;
136
137        while !self.is_at_end() && self.is_symbol_char(self.current_char()) {
138            self.advance();
139        }
140
141        let name = self.source[start..self.position].to_string();
142        let span = self.make_span(start_line, start_column);
143        Ok(SurfaceForm::Symbol { span, name })
144    }
145
146    fn is_symbol_char(&self, ch: char) -> bool {
147        !ch.is_whitespace() && ch != '(' && ch != ')' && ch != '"'
148    }
149
150    fn current_char(&self) -> char {
151        self.source.chars().nth(self.position).unwrap()
152    }
153
154    fn advance(&mut self) {
155        if self.position < self.source.len() {
156            let ch = self.current_char();
157            self.position += 1;
158
159            if ch == '\n' {
160                self.line += 1;
161                self.column = 1;
162            } else {
163                self.column += 1;
164            }
165        }
166    }
167
168    fn skip_whitespace(&mut self) {
169        while !self.is_at_end() && self.current_char().is_whitespace() {
170            self.advance();
171        }
172    }
173
174    /// Get current position as (line, column) tuple
175    fn current_pos(&self) -> (u32, u32) {
176        (self.line as u32, self.column as u32)
177    }
178
179    /// Mark current position for span tracking
180    fn mark_position(&self) -> (u32, u32) {
181        self.current_pos()
182    }
183
184    /// Create a span from start position to current position
185    fn make_span(&self, start_line: u32, start_column: u32) -> Span {
186        let (end_line, end_column) = self.current_pos();
187        Span::new(self.filename.clone(), start_line, start_column, end_line, end_column)
188    }
189
190    fn is_at_end(&self) -> bool {
191        self.position >= self.source.len()
192    }
193}
194
195/// Surface Forms - parsed S-expressions before macro expansion
196///
197/// Each variant includes a Span tracking its source location for
198/// error reporting and debugging.
199#[derive(Debug, Clone)]
200pub enum SurfaceForm {
201    /// A symbol (identifier, operator, etc.)
202    Symbol { span: Span, name: String },
203
204    /// A numeric literal
205    Number { span: Span, value: i64 },
206
207    /// A string literal
208    String { span: Span, value: String },
209
210    /// A list (parenthesized expression)
211    List { span: Span, elements: Vec<SurfaceForm> },
212}
213
214impl SurfaceForm {
215    /// Get the span of this surface form
216    pub fn span(&self) -> &Span {
217        match self {
218            SurfaceForm::Symbol { span, .. } => span,
219            SurfaceForm::Number { span, .. } => span,
220            SurfaceForm::String { span, .. } => span,
221            SurfaceForm::List { span, .. } => span,
222        }
223    }
224}
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229
230    #[test]
231    fn test_parser_creation() {
232        let parser = Parser::new("(+ 1 2)".to_string());
233        assert_eq!(parser.position, 0);
234    }
235
236    #[test]
237    fn test_parse_empty() {
238        let mut parser = Parser::new("".to_string());
239        let result = parser.parse();
240        assert!(result.is_ok());
241        assert_eq!(result.unwrap().len(), 0);
242    }
243
244    #[test]
245    fn test_surface_form_symbol() {
246        let span = Span::repl(1, 1, 1, 5);
247        let form = SurfaceForm::Symbol { span, name: "test".to_string() };
248        match form {
249            SurfaceForm::Symbol { name, .. } => assert_eq!(name, "test"),
250            _ => panic!("Wrong variant"),
251        }
252    }
253
254    #[test]
255    fn test_surface_form_number() {
256        let span = Span::repl(1, 1, 1, 3);
257        let form = SurfaceForm::Number { span, value: 42 };
258        match form {
259            SurfaceForm::Number { value, .. } => assert_eq!(value, 42),
260            _ => panic!("Wrong variant"),
261        }
262    }
263
264    #[test]
265    fn test_surface_form_string() {
266        let span = Span::repl(1, 1, 1, 7);
267        let form = SurfaceForm::String { span, value: "hello".to_string() };
268        match form {
269            SurfaceForm::String { value, .. } => assert_eq!(value, "hello"),
270            _ => panic!("Wrong variant"),
271        }
272    }
273
274    #[test]
275    fn test_surface_form_list() {
276        let span = Span::repl(1, 1, 1, 3);
277        let form = SurfaceForm::List { span, elements: vec![] };
278        match form {
279            SurfaceForm::List { elements, .. } => assert_eq!(elements.len(), 0),
280            _ => panic!("Wrong variant"),
281        }
282    }
283
284    #[test]
285    fn test_parse_hello_world() {
286        let source = r#"(deffn main ()
287  (println! "Hello, world!"))"#;
288        let mut parser = Parser::new(source.to_string());
289        let result = parser.parse();
290
291        assert!(result.is_ok());
292        let forms = result.unwrap();
293        assert_eq!(forms.len(), 1);
294
295        // Should be a list starting with 'deffn'
296        if let SurfaceForm::List { elements, .. } = &forms[0] {
297            assert!(elements.len() >= 3);
298            if let SurfaceForm::Symbol { name, .. } = &elements[0] {
299                assert_eq!(name, "deffn");
300            } else {
301                panic!("Expected Symbol(deffn)");
302            }
303        } else {
304            panic!("Expected List");
305        }
306    }
307
308    #[test]
309    fn test_parse_simple_list() {
310        let mut parser = Parser::new("(+ 1 2)".to_string());
311        let result = parser.parse();
312
313        assert!(result.is_ok());
314        let forms = result.unwrap();
315        assert_eq!(forms.len(), 1);
316
317        if let SurfaceForm::List { elements, .. } = &forms[0] {
318            assert_eq!(elements.len(), 3);
319        } else {
320            panic!("Expected List");
321        }
322    }
323
324    #[test]
325    fn test_parse_string() {
326        let mut parser = Parser::new(r#""hello""#.to_string());
327        let result = parser.parse();
328
329        assert!(result.is_ok());
330        let forms = result.unwrap();
331        assert_eq!(forms.len(), 1);
332
333        if let SurfaceForm::String { value, .. } = &forms[0] {
334            assert_eq!(value, "hello");
335        } else {
336            panic!("Expected String");
337        }
338    }
339
340    #[test]
341    fn test_parse_number() {
342        let mut parser = Parser::new("42".to_string());
343        let result = parser.parse();
344
345        assert!(result.is_ok());
346        let forms = result.unwrap();
347        assert_eq!(forms.len(), 1);
348
349        if let SurfaceForm::Number { value, .. } = &forms[0] {
350            assert_eq!(*value, 42);
351        } else {
352            panic!("Expected Number");
353        }
354    }
355
356    #[test]
357    fn test_parse_symbol() {
358        let mut parser = Parser::new("println!".to_string());
359        let result = parser.parse();
360
361        assert!(result.is_ok());
362        let forms = result.unwrap();
363        assert_eq!(forms.len(), 1);
364
365        if let SurfaceForm::Symbol { name, .. } = &forms[0] {
366            assert_eq!(name, "println!");
367        } else {
368            panic!("Expected Symbol");
369        }
370    }
371
372    #[test]
373    fn test_span_tracking_symbol() {
374        let mut parser = Parser::new("hello".to_string());
375        let forms = parser.parse().unwrap();
376
377        if let SurfaceForm::Symbol { span, name } = &forms[0] {
378            assert_eq!(name, "hello");
379            assert_eq!(span.start_line, 1);
380            assert_eq!(span.start_column, 1);
381            assert_eq!(span.end_line, 1);
382            assert_eq!(span.end_column, 6); // After 'o'
383        } else {
384            panic!("Expected Symbol");
385        }
386    }
387
388    #[test]
389    fn test_span_tracking_list() {
390        let mut parser = Parser::new("(+ 1 2)".to_string());
391        let forms = parser.parse().unwrap();
392
393        if let SurfaceForm::List { span, elements } = &forms[0] {
394            assert_eq!(elements.len(), 3);
395            assert_eq!(span.start_line, 1);
396            assert_eq!(span.start_column, 1);
397            assert_eq!(span.end_line, 1);
398            assert_eq!(span.end_column, 8); // After ')'
399        } else {
400            panic!("Expected List");
401        }
402    }
403
404    #[test]
405    fn test_span_tracking_multiline() {
406        let source = r#"(deffn main ()
407  (println! "test"))"#;
408        let mut parser = Parser::new(source.to_string());
409        let forms = parser.parse().unwrap();
410
411        if let SurfaceForm::List { span, .. } = &forms[0] {
412            assert_eq!(span.start_line, 1);
413            assert_eq!(span.start_column, 1);
414            assert_eq!(span.end_line, 2);
415            // Should span to end of second line
416            assert!(span.end_line > span.start_line);
417        } else {
418            panic!("Expected List");
419        }
420    }
421
422    #[test]
423    fn test_span_tracking_number() {
424        let mut parser = Parser::new("42".to_string());
425        let forms = parser.parse().unwrap();
426
427        if let SurfaceForm::Number { span, value } = &forms[0] {
428            assert_eq!(*value, 42);
429            assert_eq!(span.start_line, 1);
430            assert_eq!(span.start_column, 1);
431            assert_eq!(span.end_line, 1);
432            assert_eq!(span.end_column, 3); // After '2'
433        } else {
434            panic!("Expected Number");
435        }
436    }
437
438    #[test]
439    fn test_span_tracking_string() {
440        let mut parser = Parser::new(r#""hello""#.to_string());
441        let forms = parser.parse().unwrap();
442
443        if let SurfaceForm::String { span, value } = &forms[0] {
444            assert_eq!(value, "hello");
445            assert_eq!(span.start_line, 1);
446            assert_eq!(span.start_column, 1);
447            assert_eq!(span.end_line, 1);
448            assert_eq!(span.end_column, 8); // After closing '"'
449        } else {
450            panic!("Expected String");
451        }
452    }
453
454    #[test]
455    fn test_span_tracking_negative_number() {
456        let mut parser = Parser::new("-42".to_string());
457        let forms = parser.parse().unwrap();
458
459        if let SurfaceForm::Number { span, value } = &forms[0] {
460            assert_eq!(*value, -42);
461            assert_eq!(span.start_line, 1);
462            assert_eq!(span.start_column, 1);
463            assert_eq!(span.end_line, 1);
464            assert_eq!(span.end_column, 4); // After '2'
465        } else {
466            panic!("Expected Number");
467        }
468    }
469
470    #[test]
471    fn test_span_tracking_nested_elements() {
472        let mut parser = Parser::new("(+ 1 2)".to_string());
473        let forms = parser.parse().unwrap();
474
475        if let SurfaceForm::List { elements, .. } = &forms[0] {
476            // Check operator symbol span
477            if let SurfaceForm::Symbol { span, name } = &elements[0] {
478                assert_eq!(name, "+");
479                assert_eq!(span.start_line, 1);
480                assert_eq!(span.start_column, 2);
481                assert_eq!(span.end_column, 3);
482            } else {
483                panic!("Expected Symbol");
484            }
485
486            // Check first number span
487            if let SurfaceForm::Number { span, value } = &elements[1] {
488                assert_eq!(*value, 1);
489                assert_eq!(span.start_line, 1);
490                assert_eq!(span.start_column, 4);
491                assert_eq!(span.end_column, 5);
492            } else {
493                panic!("Expected Number");
494            }
495
496            // Check second number span
497            if let SurfaceForm::Number { span, value } = &elements[2] {
498                assert_eq!(*value, 2);
499                assert_eq!(span.start_line, 1);
500                assert_eq!(span.start_column, 6);
501                assert_eq!(span.end_column, 7);
502            } else {
503                panic!("Expected Number");
504            }
505        } else {
506            panic!("Expected List");
507        }
508    }
509
510    #[test]
511    fn test_span_tracking_empty_list() {
512        let mut parser = Parser::new("()".to_string());
513        let forms = parser.parse().unwrap();
514
515        if let SurfaceForm::List { span, elements } = &forms[0] {
516            assert_eq!(elements.len(), 0);
517            assert_eq!(span.start_line, 1);
518            assert_eq!(span.start_column, 1);
519            assert_eq!(span.end_line, 1);
520            assert_eq!(span.end_column, 3); // After ')'
521        } else {
522            panic!("Expected List");
523        }
524    }
525
526    #[test]
527    fn test_span_tracking_multiple_forms() {
528        let source = "42 \"test\" symbol";
529        let mut parser = Parser::new(source.to_string());
530        let forms = parser.parse().unwrap();
531
532        assert_eq!(forms.len(), 3);
533
534        // First form: number
535        if let SurfaceForm::Number { span, value } = &forms[0] {
536            assert_eq!(*value, 42);
537            assert_eq!(span.start_column, 1);
538            assert_eq!(span.end_column, 3);
539        } else {
540            panic!("Expected Number");
541        }
542
543        // Second form: string
544        if let SurfaceForm::String { span, value } = &forms[1] {
545            assert_eq!(value, "test");
546            assert_eq!(span.start_column, 4);
547            assert_eq!(span.end_column, 10);
548        } else {
549            panic!("Expected String");
550        }
551
552        // Third form: symbol
553        if let SurfaceForm::Symbol { span, name } = &forms[2] {
554            assert_eq!(name, "symbol");
555            assert_eq!(span.start_column, 11);
556            assert_eq!(span.end_column, 17);
557        } else {
558            panic!("Expected Symbol");
559        }
560    }
561
562    #[test]
563    fn test_parser_new_file() {
564        let parser = Parser::new_file("(+ 1 2)".to_string(), "test.oxur".to_string());
565        assert_eq!(parser.filename, "test.oxur");
566        assert_eq!(parser.line, 1);
567        assert_eq!(parser.column, 1);
568    }
569
570    #[test]
571    fn test_current_position() {
572        let parser = Parser::new("hello".to_string());
573        let (line, col) = parser.current_pos();
574        assert_eq!(line, 1);
575        assert_eq!(col, 1);
576    }
577
578    #[test]
579    fn test_error_unclosed_list() {
580        let mut parser = Parser::new("(+ 1 2".to_string());
581        let result = parser.parse();
582
583        assert!(result.is_err());
584        if let Err(crate::Error::Syntax(msg)) = result {
585            assert_eq!(msg, "Unclosed list");
586        } else {
587            panic!("Expected Syntax error for unclosed list");
588        }
589    }
590
591    #[test]
592    fn test_error_unclosed_string() {
593        let mut parser = Parser::new(r#""hello"#.to_string());
594        let result = parser.parse();
595
596        assert!(result.is_err());
597        if let Err(crate::Error::Syntax(msg)) = result {
598            assert_eq!(msg, "Unclosed string");
599        } else {
600            panic!("Expected Syntax error for unclosed string");
601        }
602    }
603
604    #[test]
605    fn test_parse_invalid_number() {
606        let mut parser = Parser::new("-".to_string());
607        let result = parser.parse();
608
609        assert!(result.is_err());
610        if let Err(crate::Error::Syntax(msg)) = result {
611            assert!(msg.contains("Invalid number") || msg.contains("Unexpected end"));
612        } else {
613            panic!("Expected Syntax error for invalid number");
614        }
615    }
616}