1use crate::Result;
7use oxur_smap::Span;
8
9pub struct Parser {
11 source: String,
12 position: usize, line: usize, column: usize, filename: String, }
17
18impl Parser {
19 pub fn new(source: String) -> Self {
20 Self {
21 source,
22 position: 0,
23 line: 1, column: 1, filename: "<repl>".to_string(),
26 }
27 }
28
29 pub fn new_file(source: String, filename: String) -> Self {
31 Self { source, position: 0, line: 1, column: 1, filename }
32 }
33
34 pub fn parse(&mut self) -> Result<Vec<SurfaceForm>> {
36 let mut forms = Vec::new();
37
38 while !self.is_at_end() {
39 self.skip_whitespace();
40 if self.is_at_end() {
41 break;
42 }
43 forms.push(self.parse_form()?);
44 }
45
46 Ok(forms)
47 }
48
49 fn parse_form(&mut self) -> Result<SurfaceForm> {
50 self.skip_whitespace();
51
52 if self.is_at_end() {
53 return Err(crate::Error::Syntax("Unexpected end of input".to_string()));
54 }
55
56 let ch = self.current_char();
57
58 match ch {
59 '(' => self.parse_list(),
60 '"' => self.parse_string(),
61 '0'..='9' | '-' => self.parse_number(),
62 _ => self.parse_symbol(),
63 }
64 }
65
66 fn parse_list(&mut self) -> Result<SurfaceForm> {
67 let (start_line, start_column) = self.mark_position();
68
69 self.advance(); let mut elements = Vec::new();
71
72 loop {
73 self.skip_whitespace();
74
75 if self.is_at_end() {
76 return Err(crate::Error::Syntax("Unclosed list".to_string()));
77 }
78
79 if self.current_char() == ')' {
80 self.advance(); break;
82 }
83
84 elements.push(self.parse_form()?);
85 }
86
87 let span = self.make_span(start_line, start_column);
88 Ok(SurfaceForm::List { span, elements })
89 }
90
91 fn parse_string(&mut self) -> Result<SurfaceForm> {
92 let (start_line, start_column) = self.mark_position();
93
94 self.advance(); let start = self.position;
96
97 while !self.is_at_end() && self.current_char() != '"' {
98 self.advance();
99 }
100
101 if self.is_at_end() {
102 return Err(crate::Error::Syntax("Unclosed string".to_string()));
103 }
104
105 let value = self.source[start..self.position].to_string();
106 self.advance(); let span = self.make_span(start_line, start_column);
109 Ok(SurfaceForm::String { span, value })
110 }
111
112 fn parse_number(&mut self) -> Result<SurfaceForm> {
113 let (start_line, start_column) = self.mark_position();
114 let start = self.position;
115
116 if self.current_char() == '-' {
117 self.advance();
118 }
119
120 while !self.is_at_end() && self.current_char().is_ascii_digit() {
121 self.advance();
122 }
123
124 let num_str = &self.source[start..self.position];
125 let value = num_str
126 .parse::<i64>()
127 .map_err(|_| crate::Error::Syntax(format!("Invalid number: {}", num_str)))?;
128
129 let span = self.make_span(start_line, start_column);
130 Ok(SurfaceForm::Number { span, value })
131 }
132
133 fn parse_symbol(&mut self) -> Result<SurfaceForm> {
134 let (start_line, start_column) = self.mark_position();
135 let start = self.position;
136
137 while !self.is_at_end() && self.is_symbol_char(self.current_char()) {
138 self.advance();
139 }
140
141 let name = self.source[start..self.position].to_string();
142 let span = self.make_span(start_line, start_column);
143 Ok(SurfaceForm::Symbol { span, name })
144 }
145
146 fn is_symbol_char(&self, ch: char) -> bool {
147 !ch.is_whitespace() && ch != '(' && ch != ')' && ch != '"'
148 }
149
150 fn current_char(&self) -> char {
151 self.source.chars().nth(self.position).unwrap()
152 }
153
154 fn advance(&mut self) {
155 if self.position < self.source.len() {
156 let ch = self.current_char();
157 self.position += 1;
158
159 if ch == '\n' {
160 self.line += 1;
161 self.column = 1;
162 } else {
163 self.column += 1;
164 }
165 }
166 }
167
168 fn skip_whitespace(&mut self) {
169 while !self.is_at_end() && self.current_char().is_whitespace() {
170 self.advance();
171 }
172 }
173
174 fn current_pos(&self) -> (u32, u32) {
176 (self.line as u32, self.column as u32)
177 }
178
179 fn mark_position(&self) -> (u32, u32) {
181 self.current_pos()
182 }
183
184 fn make_span(&self, start_line: u32, start_column: u32) -> Span {
186 let (end_line, end_column) = self.current_pos();
187 Span::new(self.filename.clone(), start_line, start_column, end_line, end_column)
188 }
189
190 fn is_at_end(&self) -> bool {
191 self.position >= self.source.len()
192 }
193}
194
195#[derive(Debug, Clone)]
200pub enum SurfaceForm {
201 Symbol { span: Span, name: String },
203
204 Number { span: Span, value: i64 },
206
207 String { span: Span, value: String },
209
210 List { span: Span, elements: Vec<SurfaceForm> },
212}
213
214impl SurfaceForm {
215 pub fn span(&self) -> &Span {
217 match self {
218 SurfaceForm::Symbol { span, .. } => span,
219 SurfaceForm::Number { span, .. } => span,
220 SurfaceForm::String { span, .. } => span,
221 SurfaceForm::List { span, .. } => span,
222 }
223 }
224}
225
226#[cfg(test)]
227mod tests {
228 use super::*;
229
230 #[test]
231 fn test_parser_creation() {
232 let parser = Parser::new("(+ 1 2)".to_string());
233 assert_eq!(parser.position, 0);
234 }
235
236 #[test]
237 fn test_parse_empty() {
238 let mut parser = Parser::new("".to_string());
239 let result = parser.parse();
240 assert!(result.is_ok());
241 assert_eq!(result.unwrap().len(), 0);
242 }
243
244 #[test]
245 fn test_surface_form_symbol() {
246 let span = Span::repl(1, 1, 1, 5);
247 let form = SurfaceForm::Symbol { span, name: "test".to_string() };
248 match form {
249 SurfaceForm::Symbol { name, .. } => assert_eq!(name, "test"),
250 _ => panic!("Wrong variant"),
251 }
252 }
253
254 #[test]
255 fn test_surface_form_number() {
256 let span = Span::repl(1, 1, 1, 3);
257 let form = SurfaceForm::Number { span, value: 42 };
258 match form {
259 SurfaceForm::Number { value, .. } => assert_eq!(value, 42),
260 _ => panic!("Wrong variant"),
261 }
262 }
263
264 #[test]
265 fn test_surface_form_string() {
266 let span = Span::repl(1, 1, 1, 7);
267 let form = SurfaceForm::String { span, value: "hello".to_string() };
268 match form {
269 SurfaceForm::String { value, .. } => assert_eq!(value, "hello"),
270 _ => panic!("Wrong variant"),
271 }
272 }
273
274 #[test]
275 fn test_surface_form_list() {
276 let span = Span::repl(1, 1, 1, 3);
277 let form = SurfaceForm::List { span, elements: vec![] };
278 match form {
279 SurfaceForm::List { elements, .. } => assert_eq!(elements.len(), 0),
280 _ => panic!("Wrong variant"),
281 }
282 }
283
284 #[test]
285 fn test_parse_hello_world() {
286 let source = r#"(deffn main ()
287 (println! "Hello, world!"))"#;
288 let mut parser = Parser::new(source.to_string());
289 let result = parser.parse();
290
291 assert!(result.is_ok());
292 let forms = result.unwrap();
293 assert_eq!(forms.len(), 1);
294
295 if let SurfaceForm::List { elements, .. } = &forms[0] {
297 assert!(elements.len() >= 3);
298 if let SurfaceForm::Symbol { name, .. } = &elements[0] {
299 assert_eq!(name, "deffn");
300 } else {
301 panic!("Expected Symbol(deffn)");
302 }
303 } else {
304 panic!("Expected List");
305 }
306 }
307
308 #[test]
309 fn test_parse_simple_list() {
310 let mut parser = Parser::new("(+ 1 2)".to_string());
311 let result = parser.parse();
312
313 assert!(result.is_ok());
314 let forms = result.unwrap();
315 assert_eq!(forms.len(), 1);
316
317 if let SurfaceForm::List { elements, .. } = &forms[0] {
318 assert_eq!(elements.len(), 3);
319 } else {
320 panic!("Expected List");
321 }
322 }
323
324 #[test]
325 fn test_parse_string() {
326 let mut parser = Parser::new(r#""hello""#.to_string());
327 let result = parser.parse();
328
329 assert!(result.is_ok());
330 let forms = result.unwrap();
331 assert_eq!(forms.len(), 1);
332
333 if let SurfaceForm::String { value, .. } = &forms[0] {
334 assert_eq!(value, "hello");
335 } else {
336 panic!("Expected String");
337 }
338 }
339
340 #[test]
341 fn test_parse_number() {
342 let mut parser = Parser::new("42".to_string());
343 let result = parser.parse();
344
345 assert!(result.is_ok());
346 let forms = result.unwrap();
347 assert_eq!(forms.len(), 1);
348
349 if let SurfaceForm::Number { value, .. } = &forms[0] {
350 assert_eq!(*value, 42);
351 } else {
352 panic!("Expected Number");
353 }
354 }
355
356 #[test]
357 fn test_parse_symbol() {
358 let mut parser = Parser::new("println!".to_string());
359 let result = parser.parse();
360
361 assert!(result.is_ok());
362 let forms = result.unwrap();
363 assert_eq!(forms.len(), 1);
364
365 if let SurfaceForm::Symbol { name, .. } = &forms[0] {
366 assert_eq!(name, "println!");
367 } else {
368 panic!("Expected Symbol");
369 }
370 }
371
372 #[test]
373 fn test_span_tracking_symbol() {
374 let mut parser = Parser::new("hello".to_string());
375 let forms = parser.parse().unwrap();
376
377 if let SurfaceForm::Symbol { span, name } = &forms[0] {
378 assert_eq!(name, "hello");
379 assert_eq!(span.start_line, 1);
380 assert_eq!(span.start_column, 1);
381 assert_eq!(span.end_line, 1);
382 assert_eq!(span.end_column, 6); } else {
384 panic!("Expected Symbol");
385 }
386 }
387
388 #[test]
389 fn test_span_tracking_list() {
390 let mut parser = Parser::new("(+ 1 2)".to_string());
391 let forms = parser.parse().unwrap();
392
393 if let SurfaceForm::List { span, elements } = &forms[0] {
394 assert_eq!(elements.len(), 3);
395 assert_eq!(span.start_line, 1);
396 assert_eq!(span.start_column, 1);
397 assert_eq!(span.end_line, 1);
398 assert_eq!(span.end_column, 8); } else {
400 panic!("Expected List");
401 }
402 }
403
404 #[test]
405 fn test_span_tracking_multiline() {
406 let source = r#"(deffn main ()
407 (println! "test"))"#;
408 let mut parser = Parser::new(source.to_string());
409 let forms = parser.parse().unwrap();
410
411 if let SurfaceForm::List { span, .. } = &forms[0] {
412 assert_eq!(span.start_line, 1);
413 assert_eq!(span.start_column, 1);
414 assert_eq!(span.end_line, 2);
415 assert!(span.end_line > span.start_line);
417 } else {
418 panic!("Expected List");
419 }
420 }
421
422 #[test]
423 fn test_span_tracking_number() {
424 let mut parser = Parser::new("42".to_string());
425 let forms = parser.parse().unwrap();
426
427 if let SurfaceForm::Number { span, value } = &forms[0] {
428 assert_eq!(*value, 42);
429 assert_eq!(span.start_line, 1);
430 assert_eq!(span.start_column, 1);
431 assert_eq!(span.end_line, 1);
432 assert_eq!(span.end_column, 3); } else {
434 panic!("Expected Number");
435 }
436 }
437
438 #[test]
439 fn test_span_tracking_string() {
440 let mut parser = Parser::new(r#""hello""#.to_string());
441 let forms = parser.parse().unwrap();
442
443 if let SurfaceForm::String { span, value } = &forms[0] {
444 assert_eq!(value, "hello");
445 assert_eq!(span.start_line, 1);
446 assert_eq!(span.start_column, 1);
447 assert_eq!(span.end_line, 1);
448 assert_eq!(span.end_column, 8); } else {
450 panic!("Expected String");
451 }
452 }
453
454 #[test]
455 fn test_span_tracking_negative_number() {
456 let mut parser = Parser::new("-42".to_string());
457 let forms = parser.parse().unwrap();
458
459 if let SurfaceForm::Number { span, value } = &forms[0] {
460 assert_eq!(*value, -42);
461 assert_eq!(span.start_line, 1);
462 assert_eq!(span.start_column, 1);
463 assert_eq!(span.end_line, 1);
464 assert_eq!(span.end_column, 4); } else {
466 panic!("Expected Number");
467 }
468 }
469
470 #[test]
471 fn test_span_tracking_nested_elements() {
472 let mut parser = Parser::new("(+ 1 2)".to_string());
473 let forms = parser.parse().unwrap();
474
475 if let SurfaceForm::List { elements, .. } = &forms[0] {
476 if let SurfaceForm::Symbol { span, name } = &elements[0] {
478 assert_eq!(name, "+");
479 assert_eq!(span.start_line, 1);
480 assert_eq!(span.start_column, 2);
481 assert_eq!(span.end_column, 3);
482 } else {
483 panic!("Expected Symbol");
484 }
485
486 if let SurfaceForm::Number { span, value } = &elements[1] {
488 assert_eq!(*value, 1);
489 assert_eq!(span.start_line, 1);
490 assert_eq!(span.start_column, 4);
491 assert_eq!(span.end_column, 5);
492 } else {
493 panic!("Expected Number");
494 }
495
496 if let SurfaceForm::Number { span, value } = &elements[2] {
498 assert_eq!(*value, 2);
499 assert_eq!(span.start_line, 1);
500 assert_eq!(span.start_column, 6);
501 assert_eq!(span.end_column, 7);
502 } else {
503 panic!("Expected Number");
504 }
505 } else {
506 panic!("Expected List");
507 }
508 }
509
510 #[test]
511 fn test_span_tracking_empty_list() {
512 let mut parser = Parser::new("()".to_string());
513 let forms = parser.parse().unwrap();
514
515 if let SurfaceForm::List { span, elements } = &forms[0] {
516 assert_eq!(elements.len(), 0);
517 assert_eq!(span.start_line, 1);
518 assert_eq!(span.start_column, 1);
519 assert_eq!(span.end_line, 1);
520 assert_eq!(span.end_column, 3); } else {
522 panic!("Expected List");
523 }
524 }
525
526 #[test]
527 fn test_span_tracking_multiple_forms() {
528 let source = "42 \"test\" symbol";
529 let mut parser = Parser::new(source.to_string());
530 let forms = parser.parse().unwrap();
531
532 assert_eq!(forms.len(), 3);
533
534 if let SurfaceForm::Number { span, value } = &forms[0] {
536 assert_eq!(*value, 42);
537 assert_eq!(span.start_column, 1);
538 assert_eq!(span.end_column, 3);
539 } else {
540 panic!("Expected Number");
541 }
542
543 if let SurfaceForm::String { span, value } = &forms[1] {
545 assert_eq!(value, "test");
546 assert_eq!(span.start_column, 4);
547 assert_eq!(span.end_column, 10);
548 } else {
549 panic!("Expected String");
550 }
551
552 if let SurfaceForm::Symbol { span, name } = &forms[2] {
554 assert_eq!(name, "symbol");
555 assert_eq!(span.start_column, 11);
556 assert_eq!(span.end_column, 17);
557 } else {
558 panic!("Expected Symbol");
559 }
560 }
561
562 #[test]
563 fn test_parser_new_file() {
564 let parser = Parser::new_file("(+ 1 2)".to_string(), "test.oxur".to_string());
565 assert_eq!(parser.filename, "test.oxur");
566 assert_eq!(parser.line, 1);
567 assert_eq!(parser.column, 1);
568 }
569
570 #[test]
571 fn test_current_position() {
572 let parser = Parser::new("hello".to_string());
573 let (line, col) = parser.current_pos();
574 assert_eq!(line, 1);
575 assert_eq!(col, 1);
576 }
577
578 #[test]
579 fn test_error_unclosed_list() {
580 let mut parser = Parser::new("(+ 1 2".to_string());
581 let result = parser.parse();
582
583 assert!(result.is_err());
584 if let Err(crate::Error::Syntax(msg)) = result {
585 assert_eq!(msg, "Unclosed list");
586 } else {
587 panic!("Expected Syntax error for unclosed list");
588 }
589 }
590
591 #[test]
592 fn test_error_unclosed_string() {
593 let mut parser = Parser::new(r#""hello"#.to_string());
594 let result = parser.parse();
595
596 assert!(result.is_err());
597 if let Err(crate::Error::Syntax(msg)) = result {
598 assert_eq!(msg, "Unclosed string");
599 } else {
600 panic!("Expected Syntax error for unclosed string");
601 }
602 }
603
604 #[test]
605 fn test_parse_invalid_number() {
606 let mut parser = Parser::new("-".to_string());
607 let result = parser.parse();
608
609 assert!(result.is_err());
610 if let Err(crate::Error::Syntax(msg)) = result {
611 assert!(msg.contains("Invalid number") || msg.contains("Unexpected end"));
612 } else {
613 panic!("Expected Syntax error for invalid number");
614 }
615 }
616}