1use crate::ast::{
12 Include, MatchArm, Pattern, Program, SourceLocation, Statement, UnionDef, UnionField,
13 UnionVariant, WordDef,
14};
15use crate::types::{Effect, SideEffect, StackType, Type};
16
17#[derive(Debug, Clone)]
19pub struct Token {
20 pub text: String,
21 pub line: usize,
23 pub column: usize,
25}
26
27impl Token {
28 fn new(text: String, line: usize, column: usize) -> Self {
29 Token { text, line, column }
30 }
31}
32
33impl PartialEq<&str> for Token {
34 fn eq(&self, other: &&str) -> bool {
35 self.text == *other
36 }
37}
38
39impl PartialEq<str> for Token {
40 fn eq(&self, other: &str) -> bool {
41 self.text == other
42 }
43}
44
45pub struct Parser {
46 tokens: Vec<Token>,
47 pos: usize,
48 next_quotation_id: usize,
51}
52
53impl Parser {
54 pub fn new(source: &str) -> Self {
55 let tokens = tokenize(source);
56 Parser {
57 tokens,
58 pos: 0,
59 next_quotation_id: 0,
60 }
61 }
62
63 pub fn parse(&mut self) -> Result<Program, String> {
64 let mut program = Program::new();
65
66 if let Some(error_token) = self.tokens.iter().find(|t| *t == "<<<UNCLOSED_STRING>>>") {
68 return Err(format!(
69 "Unclosed string literal at line {}, column {} - missing closing quote",
70 error_token.line + 1, error_token.column + 1
72 ));
73 }
74
75 while !self.is_at_end() {
76 self.skip_comments();
77 if self.is_at_end() {
78 break;
79 }
80
81 if self.check("include") {
83 let include = self.parse_include()?;
84 program.includes.push(include);
85 continue;
86 }
87
88 if self.check("union") {
90 let union_def = self.parse_union_def()?;
91 program.unions.push(union_def);
92 continue;
93 }
94
95 let word = self.parse_word_def()?;
96 program.words.push(word);
97 }
98
99 Ok(program)
100 }
101
102 fn parse_include(&mut self) -> Result<Include, String> {
107 self.consume("include");
108
109 let token = self
110 .advance()
111 .ok_or("Expected module name after 'include'")?
112 .clone();
113
114 if token == "std" {
116 if !self.consume(":") {
118 return Err("Expected ':' after 'std' in include statement".to_string());
119 }
120 let name = self
122 .advance()
123 .ok_or("Expected module name after 'std:'")?
124 .clone();
125 return Ok(Include::Std(name));
126 }
127
128 if token == "ffi" {
130 if !self.consume(":") {
132 return Err("Expected ':' after 'ffi' in include statement".to_string());
133 }
134 let name = self
136 .advance()
137 .ok_or("Expected library name after 'ffi:'")?
138 .clone();
139 return Ok(Include::Ffi(name));
140 }
141
142 if token.starts_with('"') && token.ends_with('"') {
144 let path = token.trim_start_matches('"').trim_end_matches('"');
145 return Ok(Include::Relative(path.to_string()));
146 }
147
148 Err(format!(
149 "Invalid include syntax '{}'. Use 'include std:name', 'include ffi:lib', or 'include \"path\"'",
150 token
151 ))
152 }
153
154 fn parse_union_def(&mut self) -> Result<UnionDef, String> {
161 let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
163
164 self.consume("union");
166
167 let name = self
169 .advance()
170 .ok_or("Expected union name after 'union'")?
171 .clone();
172
173 if !name
174 .chars()
175 .next()
176 .map(|c| c.is_uppercase())
177 .unwrap_or(false)
178 {
179 return Err(format!(
180 "Union name '{}' must start with an uppercase letter",
181 name
182 ));
183 }
184
185 self.skip_comments();
187
188 if !self.consume("{") {
190 return Err(format!(
191 "Expected '{{' after union name '{}', got '{}'",
192 name,
193 self.current()
194 ));
195 }
196
197 let mut variants = Vec::new();
199 loop {
200 self.skip_comments();
201
202 if self.check("}") {
203 break;
204 }
205
206 if self.is_at_end() {
207 return Err(format!("Unexpected end of file in union '{}'", name));
208 }
209
210 variants.push(self.parse_union_variant()?);
211 }
212
213 let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
215
216 self.consume("}");
218
219 if variants.is_empty() {
220 return Err(format!("Union '{}' must have at least one variant", name));
221 }
222
223 let mut seen_variants = std::collections::HashSet::new();
225 for variant in &variants {
226 if !seen_variants.insert(&variant.name) {
227 return Err(format!(
228 "Duplicate variant name '{}' in union '{}'",
229 variant.name, name
230 ));
231 }
232 }
233
234 Ok(UnionDef {
235 name,
236 variants,
237 source: Some(SourceLocation::span(
238 std::path::PathBuf::new(),
239 start_line,
240 end_line,
241 )),
242 })
243 }
244
245 fn parse_union_variant(&mut self) -> Result<UnionVariant, String> {
249 let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
250
251 let name = self.advance().ok_or("Expected variant name")?.clone();
253
254 if !name
255 .chars()
256 .next()
257 .map(|c| c.is_uppercase())
258 .unwrap_or(false)
259 {
260 return Err(format!(
261 "Variant name '{}' must start with an uppercase letter",
262 name
263 ));
264 }
265
266 self.skip_comments();
267
268 let fields = if self.check("{") {
270 self.consume("{");
271 let fields = self.parse_union_fields()?;
272 if !self.consume("}") {
273 return Err(format!("Expected '}}' after variant '{}' fields", name));
274 }
275 fields
276 } else {
277 Vec::new()
278 };
279
280 Ok(UnionVariant {
281 name,
282 fields,
283 source: Some(SourceLocation::new(std::path::PathBuf::new(), start_line)),
284 })
285 }
286
287 fn parse_union_fields(&mut self) -> Result<Vec<UnionField>, String> {
289 let mut fields = Vec::new();
290
291 loop {
292 self.skip_comments();
293
294 if self.check("}") {
295 break;
296 }
297
298 let field_name = self.advance().ok_or("Expected field name")?.clone();
300
301 if !self.consume(":") {
303 return Err(format!(
304 "Expected ':' after field name '{}', got '{}'",
305 field_name,
306 self.current()
307 ));
308 }
309
310 let type_name = self
312 .advance()
313 .ok_or("Expected type name after ':'")?
314 .clone();
315
316 fields.push(UnionField {
317 name: field_name,
318 type_name,
319 });
320
321 self.skip_comments();
323 self.consume(",");
324 }
325
326 let mut seen_fields = std::collections::HashSet::new();
328 for field in &fields {
329 if !seen_fields.insert(&field.name) {
330 return Err(format!("Duplicate field name '{}' in variant", field.name));
331 }
332 }
333
334 Ok(fields)
335 }
336
337 fn parse_word_def(&mut self) -> Result<WordDef, String> {
338 let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
340
341 if !self.consume(":") {
343 return Err(format!(
344 "Expected ':' to start word definition, got '{}'",
345 self.current()
346 ));
347 }
348
349 let name = self
351 .advance()
352 .ok_or("Expected word name after ':'")?
353 .clone();
354
355 let effect = if self.check("(") {
357 Some(self.parse_stack_effect()?)
358 } else {
359 None
360 };
361
362 let mut body = Vec::new();
364 while !self.check(";") {
365 if self.is_at_end() {
366 return Err(format!("Unexpected end of file in word '{}'", name));
367 }
368
369 self.skip_comments();
371 if self.check(";") {
372 break;
373 }
374
375 body.push(self.parse_statement()?);
376 }
377
378 let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
380
381 self.consume(";");
383
384 Ok(WordDef {
385 name,
386 effect,
387 body,
388 source: Some(crate::ast::SourceLocation::span(
389 std::path::PathBuf::new(),
390 start_line,
391 end_line,
392 )),
393 })
394 }
395
396 fn parse_statement(&mut self) -> Result<Statement, String> {
397 use crate::ast::Span;
398 let tok = self.advance_token().ok_or("Unexpected end of file")?;
399 let token = &tok.text;
400 let tok_line = tok.line;
401 let tok_column = tok.column;
402 let tok_len = tok.text.len();
403
404 if let Some(f) = is_float_literal(token)
407 .then(|| token.parse::<f64>().ok())
408 .flatten()
409 {
410 return Ok(Statement::FloatLiteral(f));
411 }
412
413 if let Some(hex) = token
415 .strip_prefix("0x")
416 .or_else(|| token.strip_prefix("0X"))
417 {
418 return i64::from_str_radix(hex, 16)
419 .map(Statement::IntLiteral)
420 .map_err(|_| format!("Invalid hex literal: {}", token));
421 }
422
423 if let Some(bin) = token
425 .strip_prefix("0b")
426 .or_else(|| token.strip_prefix("0B"))
427 {
428 return i64::from_str_radix(bin, 2)
429 .map(Statement::IntLiteral)
430 .map_err(|_| format!("Invalid binary literal: {}", token));
431 }
432
433 if let Ok(n) = token.parse::<i64>() {
435 return Ok(Statement::IntLiteral(n));
436 }
437
438 if token == "true" {
440 return Ok(Statement::BoolLiteral(true));
441 }
442 if token == "false" {
443 return Ok(Statement::BoolLiteral(false));
444 }
445
446 if token == ":" {
448 let name_tok = self
450 .advance_token()
451 .ok_or("Expected symbol name after ':', got end of input")?;
452 let name = &name_tok.text;
453 if name.is_empty() {
455 return Err("Symbol name cannot be empty".to_string());
456 }
457 if name.starts_with(|c: char| c.is_ascii_digit()) {
458 return Err(format!(
459 "Symbol name cannot start with a digit: ':{}'\n Hint: Symbol names must start with a letter",
460 name
461 ));
462 }
463 if let Some(bad_char) = name.chars().find(|c| {
464 !c.is_alphanumeric()
465 && *c != '-'
466 && *c != '_'
467 && *c != '.'
468 && *c != '?'
469 && *c != '!'
470 }) {
471 return Err(format!(
472 "Symbol name contains invalid character '{}': ':{}'\n Hint: Allowed: letters, digits, - _ . ? !",
473 bad_char, name
474 ));
475 }
476 return Ok(Statement::Symbol(name.clone()));
477 }
478
479 if token.starts_with('"') {
481 if token.len() < 2 || !token.ends_with('"') {
483 return Err(format!("Malformed string literal: {}", token));
484 }
485 let raw = &token[1..token.len() - 1];
488 let unescaped = unescape_string(raw)?;
489 return Ok(Statement::StringLiteral(unescaped));
490 }
491
492 if token == "if" {
494 return self.parse_if();
495 }
496
497 if token == "[" {
499 return self.parse_quotation(tok_line, tok_column);
500 }
501
502 if token == "match" {
504 return self.parse_match();
505 }
506
507 Ok(Statement::WordCall {
509 name: token.to_string(),
510 span: Some(Span::new(tok_line, tok_column, tok_len)),
511 })
512 }
513
514 fn parse_if(&mut self) -> Result<Statement, String> {
515 let mut then_branch = Vec::new();
516
517 loop {
519 if self.is_at_end() {
520 return Err("Unexpected end of file in 'if' statement".to_string());
521 }
522
523 self.skip_comments();
525
526 if self.check("else") {
527 self.advance();
528 break;
530 }
531
532 if self.check("then") {
533 self.advance();
534 return Ok(Statement::If {
536 then_branch,
537 else_branch: None,
538 });
539 }
540
541 then_branch.push(self.parse_statement()?);
542 }
543
544 let mut else_branch = Vec::new();
546 loop {
547 if self.is_at_end() {
548 return Err("Unexpected end of file in 'else' branch".to_string());
549 }
550
551 self.skip_comments();
553
554 if self.check("then") {
555 self.advance();
556 return Ok(Statement::If {
557 then_branch,
558 else_branch: Some(else_branch),
559 });
560 }
561
562 else_branch.push(self.parse_statement()?);
563 }
564 }
565
566 fn parse_quotation(
567 &mut self,
568 start_line: usize,
569 start_column: usize,
570 ) -> Result<Statement, String> {
571 use crate::ast::QuotationSpan;
572 let mut body = Vec::new();
573
574 loop {
576 if self.is_at_end() {
577 return Err("Unexpected end of file in quotation".to_string());
578 }
579
580 self.skip_comments();
582
583 if self.check("]") {
584 let end_tok = self.advance_token().unwrap();
585 let end_line = end_tok.line;
586 let end_column = end_tok.column + 1; let id = self.next_quotation_id;
588 self.next_quotation_id += 1;
589 let span = QuotationSpan::new(start_line, start_column, end_line, end_column);
591 return Ok(Statement::Quotation {
592 id,
593 body,
594 span: Some(span),
595 });
596 }
597
598 body.push(self.parse_statement()?);
599 }
600 }
601
602 fn parse_match(&mut self) -> Result<Statement, String> {
609 let mut arms = Vec::new();
610
611 loop {
612 self.skip_comments();
613
614 if self.check("end") {
616 self.advance();
617 break;
618 }
619
620 if self.is_at_end() {
621 return Err("Unexpected end of file in match expression".to_string());
622 }
623
624 arms.push(self.parse_match_arm()?);
625 }
626
627 if arms.is_empty() {
628 return Err("Match expression must have at least one arm".to_string());
629 }
630
631 Ok(Statement::Match { arms })
632 }
633
634 fn parse_match_arm(&mut self) -> Result<MatchArm, String> {
639 let variant_name = self
641 .advance()
642 .ok_or("Expected variant name in match arm")?
643 .clone();
644
645 self.skip_comments();
646
647 let pattern = if self.check("{") {
649 self.consume("{");
650 let mut bindings = Vec::new();
651
652 loop {
653 self.skip_comments();
654
655 if self.check("}") {
656 break;
657 }
658
659 if self.is_at_end() {
660 return Err(format!(
661 "Unexpected end of file in match arm bindings for '{}'",
662 variant_name
663 ));
664 }
665
666 let token = self.advance().ok_or("Expected binding name")?.clone();
667
668 if let Some(field_name) = token.strip_prefix('>') {
670 if field_name.is_empty() {
671 return Err(format!(
672 "Expected field name after '>' in match bindings for '{}'",
673 variant_name
674 ));
675 }
676 bindings.push(field_name.to_string());
677 } else {
678 return Err(format!(
679 "Match bindings must use '>' prefix to indicate stack extraction. \
680 Use '>{}' instead of '{}' in pattern for '{}'",
681 token, token, variant_name
682 ));
683 }
684 }
685
686 self.consume("}");
687 Pattern::VariantWithBindings {
688 name: variant_name,
689 bindings,
690 }
691 } else {
692 Pattern::Variant(variant_name.clone())
693 };
694
695 self.skip_comments();
696
697 if !self.consume("->") {
699 return Err(format!(
700 "Expected '->' after pattern '{}', got '{}'",
701 match &pattern {
702 Pattern::Variant(n) => n.clone(),
703 Pattern::VariantWithBindings { name, .. } => name.clone(),
704 },
705 self.current()
706 ));
707 }
708
709 let mut body = Vec::new();
711 loop {
712 self.skip_comments();
713
714 if self.check("end") {
716 break;
717 }
718
719 if let Some(token) = self.current_token()
723 && let Some(first_char) = token.text.chars().next()
724 && first_char.is_uppercase()
725 {
726 if let Some(next) = self.peek_at(1)
728 && (next == "->" || next == "{")
729 {
730 break;
732 }
733 }
735
736 if self.is_at_end() {
737 return Err("Unexpected end of file in match arm body".to_string());
738 }
739
740 body.push(self.parse_statement()?);
741 }
742
743 Ok(MatchArm { pattern, body })
744 }
745
746 fn parse_stack_effect(&mut self) -> Result<Effect, String> {
749 if !self.consume("(") {
751 return Err("Expected '(' to start stack effect".to_string());
752 }
753
754 let (input_row_var, input_types) =
756 self.parse_type_list_until(&["--", ")"], "stack effect inputs", 0)?;
757
758 if !self.consume("--") {
760 return Err("Expected '--' separator in stack effect".to_string());
761 }
762
763 let (output_row_var, output_types) =
765 self.parse_type_list_until(&[")", "|"], "stack effect outputs", 0)?;
766
767 let effects = if self.consume("|") {
769 self.parse_effect_annotations()?
770 } else {
771 Vec::new()
772 };
773
774 if !self.consume(")") {
776 return Err("Expected ')' to end stack effect".to_string());
777 }
778
779 let inputs = self.build_stack_type(input_row_var, input_types);
781 let outputs = self.build_stack_type(output_row_var, output_types);
782
783 Ok(Effect::with_effects(inputs, outputs, effects))
784 }
785
786 fn parse_effect_annotations(&mut self) -> Result<Vec<SideEffect>, String> {
789 let mut effects = Vec::new();
790
791 while let Some(token) = self.peek_at(0) {
793 if token == ")" {
794 break;
795 }
796
797 match token {
798 "Yield" => {
799 self.advance(); if let Some(type_token) = self.current_token() {
802 if type_token.text == ")" {
803 return Err("Expected type after 'Yield'".to_string());
804 }
805 let type_token = type_token.clone();
806 self.advance();
807 let yield_type = self.parse_type(&type_token)?;
808 effects.push(SideEffect::Yield(Box::new(yield_type)));
809 } else {
810 return Err("Expected type after 'Yield'".to_string());
811 }
812 }
813 _ => {
814 return Err(format!("Unknown effect '{}'. Expected 'Yield'", token));
815 }
816 }
817 }
818
819 if effects.is_empty() {
820 return Err("Expected at least one effect after '|'".to_string());
821 }
822
823 Ok(effects)
824 }
825
826 fn parse_type(&self, token: &Token) -> Result<Type, String> {
828 match token.text.as_str() {
829 "Int" => Ok(Type::Int),
830 "Float" => Ok(Type::Float),
831 "Bool" => Ok(Type::Bool),
832 "String" => Ok(Type::String),
833 "Quotation" => Err(format!(
836 "'Quotation' is not a valid type at line {}, column {}. Use explicit quotation syntax like [Int -- Int] or [ -- ] instead.",
837 token.line + 1,
838 token.column + 1
839 )),
840 _ => {
841 if let Some(first_char) = token.text.chars().next() {
843 if first_char.is_uppercase() {
844 Ok(Type::Var(token.text.to_string()))
845 } else {
846 Err(format!(
847 "Unknown type: '{}' at line {}, column {}. Expected Int, Bool, String, Closure, or a type variable (uppercase)",
848 token.text.escape_default(),
849 token.line + 1, token.column + 1
851 ))
852 }
853 } else {
854 Err(format!(
855 "Invalid type: '{}' at line {}, column {}",
856 token.text.escape_default(),
857 token.line + 1,
858 token.column + 1
859 ))
860 }
861 }
862 }
863 }
864
865 fn validate_row_var_name(&self, name: &str) -> Result<(), String> {
868 if name.is_empty() {
869 return Err("Row variable must have a name after '..'".to_string());
870 }
871
872 let first_char = name.chars().next().unwrap();
874 if !first_char.is_ascii_lowercase() {
875 return Err(format!(
876 "Row variable '..{}' must start with a lowercase letter (a-z)",
877 name
878 ));
879 }
880
881 for ch in name.chars() {
883 if !ch.is_alphanumeric() && ch != '_' {
884 return Err(format!(
885 "Row variable '..{}' can only contain letters, numbers, and underscores",
886 name
887 ));
888 }
889 }
890
891 match name {
893 "Int" | "Bool" | "String" => {
894 return Err(format!(
895 "Row variable '..{}' cannot use type name as identifier",
896 name
897 ));
898 }
899 _ => {}
900 }
901
902 Ok(())
903 }
904
905 fn parse_type_list_until(
911 &mut self,
912 terminators: &[&str],
913 context: &str,
914 depth: usize,
915 ) -> Result<(Option<String>, Vec<Type>), String> {
916 const MAX_QUOTATION_DEPTH: usize = 32;
917
918 if depth > MAX_QUOTATION_DEPTH {
919 return Err(format!(
920 "Quotation type nesting exceeds maximum depth of {} (possible deeply nested types or DOS attack)",
921 MAX_QUOTATION_DEPTH
922 ));
923 }
924
925 let mut types = Vec::new();
926 let mut row_var = None;
927
928 while !terminators.iter().any(|t| self.check(t)) {
929 self.skip_comments();
931
932 if terminators.iter().any(|t| self.check(t)) {
934 break;
935 }
936
937 if self.is_at_end() {
938 return Err(format!(
939 "Unexpected end while parsing {} - expected one of: {}",
940 context,
941 terminators.join(", ")
942 ));
943 }
944
945 let token = self
946 .advance_token()
947 .ok_or_else(|| format!("Unexpected end in {}", context))?
948 .clone();
949
950 if token.text.starts_with("..") {
952 let var_name = token.text.trim_start_matches("..").to_string();
953 self.validate_row_var_name(&var_name)?;
954 row_var = Some(var_name);
955 } else if token.text == "Closure" {
956 if !self.consume("[") {
958 return Err("Expected '[' after 'Closure' in type signature".to_string());
959 }
960 let effect_type = self.parse_quotation_type(depth)?;
961 match effect_type {
962 Type::Quotation(effect) => {
963 types.push(Type::Closure {
964 effect,
965 captures: Vec::new(), });
967 }
968 _ => unreachable!("parse_quotation_type should return Quotation"),
969 }
970 } else if token.text == "[" {
971 types.push(self.parse_quotation_type(depth)?);
973 } else {
974 types.push(self.parse_type(&token)?);
976 }
977 }
978
979 Ok((row_var, types))
980 }
981
982 fn parse_quotation_type(&mut self, depth: usize) -> Result<Type, String> {
987 let (input_row_var, input_types) =
989 self.parse_type_list_until(&["--", "]"], "quotation type inputs", depth + 1)?;
990
991 if !self.consume("--") {
993 if self.check("]") {
995 return Err(
996 "Quotation types require '--' separator. Did you mean '[Int -- ]' or '[ -- Int]'?"
997 .to_string(),
998 );
999 }
1000 return Err("Expected '--' separator in quotation type".to_string());
1001 }
1002
1003 let (output_row_var, output_types) =
1005 self.parse_type_list_until(&["]"], "quotation type outputs", depth + 1)?;
1006
1007 if !self.consume("]") {
1009 return Err("Expected ']' to end quotation type".to_string());
1010 }
1011
1012 let inputs = self.build_stack_type(input_row_var, input_types);
1014 let outputs = self.build_stack_type(output_row_var, output_types);
1015
1016 Ok(Type::Quotation(Box::new(Effect::new(inputs, outputs))))
1017 }
1018
1019 fn build_stack_type(&self, row_var: Option<String>, types: Vec<Type>) -> StackType {
1029 let base = match row_var {
1031 Some(name) => StackType::RowVar(name),
1032 None => StackType::RowVar("rest".to_string()),
1033 };
1034
1035 types.into_iter().fold(base, |stack, ty| stack.push(ty))
1037 }
1038
1039 fn skip_comments(&mut self) {
1040 loop {
1041 if self.check("#") {
1042 while !self.is_at_end() && self.current() != "\n" {
1044 self.advance();
1045 }
1046 if !self.is_at_end() {
1047 self.advance(); }
1049 } else if self.check("\n") {
1050 self.advance();
1052 } else {
1053 break;
1054 }
1055 }
1056 }
1057
1058 fn check(&self, expected: &str) -> bool {
1059 if self.is_at_end() {
1060 return false;
1061 }
1062 self.current() == expected
1063 }
1064
1065 fn consume(&mut self, expected: &str) -> bool {
1066 if self.check(expected) {
1067 self.advance();
1068 true
1069 } else {
1070 false
1071 }
1072 }
1073
1074 fn current(&self) -> &str {
1076 if self.is_at_end() {
1077 ""
1078 } else {
1079 &self.tokens[self.pos].text
1080 }
1081 }
1082
1083 fn current_token(&self) -> Option<&Token> {
1085 if self.is_at_end() {
1086 None
1087 } else {
1088 Some(&self.tokens[self.pos])
1089 }
1090 }
1091
1092 fn peek_at(&self, n: usize) -> Option<&str> {
1094 let idx = self.pos + n;
1095 if idx < self.tokens.len() {
1096 Some(&self.tokens[idx].text)
1097 } else {
1098 None
1099 }
1100 }
1101
1102 fn advance(&mut self) -> Option<&String> {
1104 if self.is_at_end() {
1105 None
1106 } else {
1107 let token = &self.tokens[self.pos];
1108 self.pos += 1;
1109 Some(&token.text)
1110 }
1111 }
1112
1113 fn advance_token(&mut self) -> Option<&Token> {
1115 if self.is_at_end() {
1116 None
1117 } else {
1118 let token = &self.tokens[self.pos];
1119 self.pos += 1;
1120 Some(token)
1121 }
1122 }
1123
1124 fn is_at_end(&self) -> bool {
1125 self.pos >= self.tokens.len()
1126 }
1127}
1128
1129fn is_float_literal(token: &str) -> bool {
1138 let s = token.strip_prefix('-').unwrap_or(token);
1140
1141 if s.is_empty() {
1143 return false;
1144 }
1145
1146 s.contains('.') || s.contains('e') || s.contains('E')
1148}
1149
1150fn unescape_string(s: &str) -> Result<String, String> {
1162 let mut result = String::new();
1163 let mut chars = s.chars();
1164
1165 while let Some(ch) = chars.next() {
1166 if ch == '\\' {
1167 match chars.next() {
1168 Some('"') => result.push('"'),
1169 Some('\\') => result.push('\\'),
1170 Some('n') => result.push('\n'),
1171 Some('r') => result.push('\r'),
1172 Some('t') => result.push('\t'),
1173 Some(c) => {
1174 return Err(format!(
1175 "Unknown escape sequence '\\{}' in string literal. \
1176 Supported: \\\" \\\\ \\n \\r \\t",
1177 c
1178 ));
1179 }
1180 None => {
1181 return Err("String ends with incomplete escape sequence '\\'".to_string());
1182 }
1183 }
1184 } else {
1185 result.push(ch);
1186 }
1187 }
1188
1189 Ok(result)
1190}
1191
1192fn tokenize(source: &str) -> Vec<Token> {
1193 let mut tokens = Vec::new();
1194 let mut current = String::new();
1195 let mut current_start_line = 0;
1196 let mut current_start_col = 0;
1197 let mut in_string = false;
1198 let mut prev_was_backslash = false;
1199
1200 let mut line = 0;
1202 let mut col = 0;
1203
1204 for ch in source.chars() {
1205 if in_string {
1206 current.push(ch);
1207 if ch == '"' && !prev_was_backslash {
1208 in_string = false;
1210 tokens.push(Token::new(
1211 current.clone(),
1212 current_start_line,
1213 current_start_col,
1214 ));
1215 current.clear();
1216 prev_was_backslash = false;
1217 } else if ch == '\\' && !prev_was_backslash {
1218 prev_was_backslash = true;
1220 } else {
1221 prev_was_backslash = false;
1223 }
1224 if ch == '\n' {
1226 line += 1;
1227 col = 0;
1228 } else {
1229 col += 1;
1230 }
1231 } else if ch == '"' {
1232 if !current.is_empty() {
1233 tokens.push(Token::new(
1234 current.clone(),
1235 current_start_line,
1236 current_start_col,
1237 ));
1238 current.clear();
1239 }
1240 in_string = true;
1241 current_start_line = line;
1242 current_start_col = col;
1243 current.push(ch);
1244 prev_was_backslash = false;
1245 col += 1;
1246 } else if ch.is_whitespace() {
1247 if !current.is_empty() {
1248 tokens.push(Token::new(
1249 current.clone(),
1250 current_start_line,
1251 current_start_col,
1252 ));
1253 current.clear();
1254 }
1255 if ch == '\n' {
1257 tokens.push(Token::new("\n".to_string(), line, col));
1258 line += 1;
1259 col = 0;
1260 } else {
1261 col += 1;
1262 }
1263 } else if "():;[]{},".contains(ch) {
1264 if !current.is_empty() {
1265 tokens.push(Token::new(
1266 current.clone(),
1267 current_start_line,
1268 current_start_col,
1269 ));
1270 current.clear();
1271 }
1272 tokens.push(Token::new(ch.to_string(), line, col));
1273 col += 1;
1274 } else {
1275 if current.is_empty() {
1276 current_start_line = line;
1277 current_start_col = col;
1278 }
1279 current.push(ch);
1280 col += 1;
1281 }
1282 }
1283
1284 if in_string {
1286 tokens.push(Token::new(
1289 "<<<UNCLOSED_STRING>>>".to_string(),
1290 current_start_line,
1291 current_start_col,
1292 ));
1293 } else if !current.is_empty() {
1294 tokens.push(Token::new(current, current_start_line, current_start_col));
1295 }
1296
1297 tokens
1298}
1299
1300#[cfg(test)]
1301mod tests {
1302 use super::*;
1303
1304 #[test]
1305 fn test_parse_hello_world() {
1306 let source = r#"
1307: main ( -- )
1308 "Hello, World!" write_line ;
1309"#;
1310
1311 let mut parser = Parser::new(source);
1312 let program = parser.parse().unwrap();
1313
1314 assert_eq!(program.words.len(), 1);
1315 assert_eq!(program.words[0].name, "main");
1316 assert_eq!(program.words[0].body.len(), 2);
1317
1318 match &program.words[0].body[0] {
1319 Statement::StringLiteral(s) => assert_eq!(s, "Hello, World!"),
1320 _ => panic!("Expected StringLiteral"),
1321 }
1322
1323 match &program.words[0].body[1] {
1324 Statement::WordCall { name, .. } => assert_eq!(name, "write_line"),
1325 _ => panic!("Expected WordCall"),
1326 }
1327 }
1328
1329 #[test]
1330 fn test_parse_with_numbers() {
1331 let source = ": add-example ( -- ) 2 3 add ;";
1332
1333 let mut parser = Parser::new(source);
1334 let program = parser.parse().unwrap();
1335
1336 assert_eq!(program.words[0].body.len(), 3);
1337 assert_eq!(program.words[0].body[0], Statement::IntLiteral(2));
1338 assert_eq!(program.words[0].body[1], Statement::IntLiteral(3));
1339 assert!(matches!(
1340 &program.words[0].body[2],
1341 Statement::WordCall { name, .. } if name == "add"
1342 ));
1343 }
1344
1345 #[test]
1346 fn test_parse_hex_literals() {
1347 let source = ": test ( -- ) 0xFF 0x10 0X1A ;";
1348 let mut parser = Parser::new(source);
1349 let program = parser.parse().unwrap();
1350
1351 assert_eq!(program.words[0].body[0], Statement::IntLiteral(255));
1352 assert_eq!(program.words[0].body[1], Statement::IntLiteral(16));
1353 assert_eq!(program.words[0].body[2], Statement::IntLiteral(26));
1354 }
1355
1356 #[test]
1357 fn test_parse_binary_literals() {
1358 let source = ": test ( -- ) 0b1010 0B1111 0b0 ;";
1359 let mut parser = Parser::new(source);
1360 let program = parser.parse().unwrap();
1361
1362 assert_eq!(program.words[0].body[0], Statement::IntLiteral(10));
1363 assert_eq!(program.words[0].body[1], Statement::IntLiteral(15));
1364 assert_eq!(program.words[0].body[2], Statement::IntLiteral(0));
1365 }
1366
1367 #[test]
1368 fn test_parse_invalid_hex_literal() {
1369 let source = ": test ( -- ) 0xGG ;";
1370 let mut parser = Parser::new(source);
1371 let err = parser.parse().unwrap_err();
1372 assert!(err.contains("Invalid hex literal"));
1373 }
1374
1375 #[test]
1376 fn test_parse_invalid_binary_literal() {
1377 let source = ": test ( -- ) 0b123 ;";
1378 let mut parser = Parser::new(source);
1379 let err = parser.parse().unwrap_err();
1380 assert!(err.contains("Invalid binary literal"));
1381 }
1382
1383 #[test]
1384 fn test_parse_escaped_quotes() {
1385 let source = r#": main ( -- ) "Say \"hello\" there" write_line ;"#;
1386
1387 let mut parser = Parser::new(source);
1388 let program = parser.parse().unwrap();
1389
1390 assert_eq!(program.words.len(), 1);
1391 assert_eq!(program.words[0].body.len(), 2);
1392
1393 match &program.words[0].body[0] {
1394 Statement::StringLiteral(s) => assert_eq!(s, "Say \"hello\" there"),
1396 _ => panic!("Expected StringLiteral with escaped quotes"),
1397 }
1398 }
1399
1400 #[test]
1403 fn test_escaped_quote_at_end_of_string() {
1404 let source = r#": main ( -- ) "hello\"" io.write-line ;"#;
1405
1406 let mut parser = Parser::new(source);
1407 let program = parser.parse().unwrap();
1408
1409 assert_eq!(program.words.len(), 1);
1410 match &program.words[0].body[0] {
1411 Statement::StringLiteral(s) => assert_eq!(s, "hello\""),
1412 _ => panic!("Expected StringLiteral ending with escaped quote"),
1413 }
1414 }
1415
1416 #[test]
1418 fn test_escaped_quote_at_start_of_string() {
1419 let source = r#": main ( -- ) "\"hello" io.write-line ;"#;
1420
1421 let mut parser = Parser::new(source);
1422 let program = parser.parse().unwrap();
1423
1424 match &program.words[0].body[0] {
1425 Statement::StringLiteral(s) => assert_eq!(s, "\"hello"),
1426 _ => panic!("Expected StringLiteral starting with escaped quote"),
1427 }
1428 }
1429
1430 #[test]
1431 fn test_escape_sequences() {
1432 let source = r#": main ( -- ) "Line 1\nLine 2\tTabbed" write_line ;"#;
1433
1434 let mut parser = Parser::new(source);
1435 let program = parser.parse().unwrap();
1436
1437 match &program.words[0].body[0] {
1438 Statement::StringLiteral(s) => assert_eq!(s, "Line 1\nLine 2\tTabbed"),
1439 _ => panic!("Expected StringLiteral"),
1440 }
1441 }
1442
1443 #[test]
1444 fn test_unknown_escape_sequence() {
1445 let source = r#": main ( -- ) "Bad \x sequence" write_line ;"#;
1446
1447 let mut parser = Parser::new(source);
1448 let result = parser.parse();
1449
1450 assert!(result.is_err());
1451 assert!(result.unwrap_err().contains("Unknown escape sequence"));
1452 }
1453
1454 #[test]
1455 fn test_unclosed_string_literal() {
1456 let source = r#": main ( -- ) "unclosed string ;"#;
1457
1458 let mut parser = Parser::new(source);
1459 let result = parser.parse();
1460
1461 assert!(result.is_err());
1462 let err_msg = result.unwrap_err();
1463 assert!(err_msg.contains("Unclosed string literal"));
1464 assert!(
1466 err_msg.contains("line 1"),
1467 "Expected line number in error: {}",
1468 err_msg
1469 );
1470 assert!(
1471 err_msg.contains("column 15"),
1472 "Expected column number in error: {}",
1473 err_msg
1474 );
1475 }
1476
1477 #[test]
1478 fn test_multiple_word_definitions() {
1479 let source = r#"
1480: double ( Int -- Int )
1481 2 multiply ;
1482
1483: quadruple ( Int -- Int )
1484 double double ;
1485"#;
1486
1487 let mut parser = Parser::new(source);
1488 let program = parser.parse().unwrap();
1489
1490 assert_eq!(program.words.len(), 2);
1491 assert_eq!(program.words[0].name, "double");
1492 assert_eq!(program.words[1].name, "quadruple");
1493
1494 assert!(program.words[0].effect.is_some());
1496 assert!(program.words[1].effect.is_some());
1497 }
1498
1499 #[test]
1500 fn test_user_word_calling_user_word() {
1501 let source = r#"
1502: helper ( -- )
1503 "helper called" write_line ;
1504
1505: main ( -- )
1506 helper ;
1507"#;
1508
1509 let mut parser = Parser::new(source);
1510 let program = parser.parse().unwrap();
1511
1512 assert_eq!(program.words.len(), 2);
1513
1514 match &program.words[1].body[0] {
1516 Statement::WordCall { name, .. } => assert_eq!(name, "helper"),
1517 _ => panic!("Expected WordCall to helper"),
1518 }
1519 }
1520
1521 #[test]
1522 fn test_parse_simple_stack_effect() {
1523 let source = ": test ( Int -- Bool ) 1 ;";
1526 let mut parser = Parser::new(source);
1527 let program = parser.parse().unwrap();
1528
1529 assert_eq!(program.words.len(), 1);
1530 let word = &program.words[0];
1531 assert!(word.effect.is_some());
1532
1533 let effect = word.effect.as_ref().unwrap();
1534
1535 assert_eq!(
1537 effect.inputs,
1538 StackType::Cons {
1539 rest: Box::new(StackType::RowVar("rest".to_string())),
1540 top: Type::Int
1541 }
1542 );
1543
1544 assert_eq!(
1546 effect.outputs,
1547 StackType::Cons {
1548 rest: Box::new(StackType::RowVar("rest".to_string())),
1549 top: Type::Bool
1550 }
1551 );
1552 }
1553
1554 #[test]
1555 fn test_parse_row_polymorphic_stack_effect() {
1556 let source = ": test ( ..a Int -- ..a Bool ) 1 ;";
1558 let mut parser = Parser::new(source);
1559 let program = parser.parse().unwrap();
1560
1561 assert_eq!(program.words.len(), 1);
1562 let word = &program.words[0];
1563 assert!(word.effect.is_some());
1564
1565 let effect = word.effect.as_ref().unwrap();
1566
1567 assert_eq!(
1569 effect.inputs,
1570 StackType::Cons {
1571 rest: Box::new(StackType::RowVar("a".to_string())),
1572 top: Type::Int
1573 }
1574 );
1575
1576 assert_eq!(
1578 effect.outputs,
1579 StackType::Cons {
1580 rest: Box::new(StackType::RowVar("a".to_string())),
1581 top: Type::Bool
1582 }
1583 );
1584 }
1585
1586 #[test]
1587 fn test_parse_invalid_row_var_starts_with_digit() {
1588 let source = ": test ( ..123 Int -- ) ;";
1590 let mut parser = Parser::new(source);
1591 let result = parser.parse();
1592
1593 assert!(result.is_err());
1594 let err_msg = result.unwrap_err();
1595 assert!(
1596 err_msg.contains("lowercase letter"),
1597 "Expected error about lowercase letter, got: {}",
1598 err_msg
1599 );
1600 }
1601
1602 #[test]
1603 fn test_parse_invalid_row_var_starts_with_uppercase() {
1604 let source = ": test ( ..Int Int -- ) ;";
1606 let mut parser = Parser::new(source);
1607 let result = parser.parse();
1608
1609 assert!(result.is_err());
1610 let err_msg = result.unwrap_err();
1611 assert!(
1612 err_msg.contains("lowercase letter") || err_msg.contains("type name"),
1613 "Expected error about lowercase letter or type name, got: {}",
1614 err_msg
1615 );
1616 }
1617
1618 #[test]
1619 fn test_parse_invalid_row_var_with_special_chars() {
1620 let source = ": test ( ..a-b Int -- ) ;";
1622 let mut parser = Parser::new(source);
1623 let result = parser.parse();
1624
1625 assert!(result.is_err());
1626 let err_msg = result.unwrap_err();
1627 assert!(
1628 err_msg.contains("letters, numbers, and underscores")
1629 || err_msg.contains("Unknown type"),
1630 "Expected error about valid characters, got: {}",
1631 err_msg
1632 );
1633 }
1634
1635 #[test]
1636 fn test_parse_valid_row_var_with_underscore() {
1637 let source = ": test ( ..my_row Int -- ..my_row Bool ) ;";
1639 let mut parser = Parser::new(source);
1640 let result = parser.parse();
1641
1642 assert!(result.is_ok(), "Should accept row variable with underscore");
1643 }
1644
1645 #[test]
1646 fn test_parse_multiple_types_stack_effect() {
1647 let source = ": test ( Int String -- Bool ) 1 ;";
1650 let mut parser = Parser::new(source);
1651 let program = parser.parse().unwrap();
1652
1653 let effect = program.words[0].effect.as_ref().unwrap();
1654
1655 let (rest, top) = effect.inputs.clone().pop().unwrap();
1657 assert_eq!(top, Type::String);
1658 let (rest2, top2) = rest.pop().unwrap();
1659 assert_eq!(top2, Type::Int);
1660 assert_eq!(rest2, StackType::RowVar("rest".to_string()));
1661
1662 assert_eq!(
1664 effect.outputs,
1665 StackType::Cons {
1666 rest: Box::new(StackType::RowVar("rest".to_string())),
1667 top: Type::Bool
1668 }
1669 );
1670 }
1671
1672 #[test]
1673 fn test_parse_type_variable() {
1674 let source = ": dup ( ..a T -- ..a T T ) ;";
1676 let mut parser = Parser::new(source);
1677 let program = parser.parse().unwrap();
1678
1679 let effect = program.words[0].effect.as_ref().unwrap();
1680
1681 assert_eq!(
1683 effect.inputs,
1684 StackType::Cons {
1685 rest: Box::new(StackType::RowVar("a".to_string())),
1686 top: Type::Var("T".to_string())
1687 }
1688 );
1689
1690 let (rest, top) = effect.outputs.clone().pop().unwrap();
1692 assert_eq!(top, Type::Var("T".to_string()));
1693 let (rest2, top2) = rest.pop().unwrap();
1694 assert_eq!(top2, Type::Var("T".to_string()));
1695 assert_eq!(rest2, StackType::RowVar("a".to_string()));
1696 }
1697
1698 #[test]
1699 fn test_parse_empty_stack_effect() {
1700 let source = ": test ( -- ) ;";
1704 let mut parser = Parser::new(source);
1705 let program = parser.parse().unwrap();
1706
1707 let effect = program.words[0].effect.as_ref().unwrap();
1708
1709 assert_eq!(effect.inputs, StackType::RowVar("rest".to_string()));
1711 assert_eq!(effect.outputs, StackType::RowVar("rest".to_string()));
1712 }
1713
1714 #[test]
1715 fn test_parse_invalid_type() {
1716 let source = ": test ( invalid -- Bool ) ;";
1718 let mut parser = Parser::new(source);
1719 let result = parser.parse();
1720
1721 assert!(result.is_err());
1722 assert!(result.unwrap_err().contains("Unknown type"));
1723 }
1724
1725 #[test]
1726 fn test_parse_unclosed_stack_effect() {
1727 let source = ": test ( Int -- Bool body ;";
1730 let mut parser = Parser::new(source);
1731 let result = parser.parse();
1732
1733 assert!(result.is_err());
1734 let err_msg = result.unwrap_err();
1735 assert!(err_msg.contains("Unknown type"));
1737 }
1738
1739 #[test]
1740 fn test_parse_simple_quotation_type() {
1741 let source = ": apply ( [Int -- Int] -- ) ;";
1743 let mut parser = Parser::new(source);
1744 let program = parser.parse().unwrap();
1745
1746 let effect = program.words[0].effect.as_ref().unwrap();
1747
1748 let (rest, top) = effect.inputs.clone().pop().unwrap();
1750 match top {
1751 Type::Quotation(quot_effect) => {
1752 assert_eq!(
1754 quot_effect.inputs,
1755 StackType::Cons {
1756 rest: Box::new(StackType::RowVar("rest".to_string())),
1757 top: Type::Int
1758 }
1759 );
1760 assert_eq!(
1762 quot_effect.outputs,
1763 StackType::Cons {
1764 rest: Box::new(StackType::RowVar("rest".to_string())),
1765 top: Type::Int
1766 }
1767 );
1768 }
1769 _ => panic!("Expected Quotation type, got {:?}", top),
1770 }
1771 assert_eq!(rest, StackType::RowVar("rest".to_string()));
1772 }
1773
1774 #[test]
1775 fn test_parse_quotation_type_with_row_vars() {
1776 let source = ": test ( ..a [..a T -- ..a Bool] -- ..a ) ;";
1778 let mut parser = Parser::new(source);
1779 let program = parser.parse().unwrap();
1780
1781 let effect = program.words[0].effect.as_ref().unwrap();
1782
1783 let (rest, top) = effect.inputs.clone().pop().unwrap();
1785 match top {
1786 Type::Quotation(quot_effect) => {
1787 let (q_in_rest, q_in_top) = quot_effect.inputs.clone().pop().unwrap();
1789 assert_eq!(q_in_top, Type::Var("T".to_string()));
1790 assert_eq!(q_in_rest, StackType::RowVar("a".to_string()));
1791
1792 let (q_out_rest, q_out_top) = quot_effect.outputs.clone().pop().unwrap();
1794 assert_eq!(q_out_top, Type::Bool);
1795 assert_eq!(q_out_rest, StackType::RowVar("a".to_string()));
1796 }
1797 _ => panic!("Expected Quotation type, got {:?}", top),
1798 }
1799 assert_eq!(rest, StackType::RowVar("a".to_string()));
1800 }
1801
1802 #[test]
1803 fn test_parse_nested_quotation_type() {
1804 let source = ": nested ( [[Int -- Int] -- Bool] -- ) ;";
1806 let mut parser = Parser::new(source);
1807 let program = parser.parse().unwrap();
1808
1809 let effect = program.words[0].effect.as_ref().unwrap();
1810
1811 let (_, top) = effect.inputs.clone().pop().unwrap();
1813 match top {
1814 Type::Quotation(outer_effect) => {
1815 let (_, outer_in_top) = outer_effect.inputs.clone().pop().unwrap();
1817 match outer_in_top {
1818 Type::Quotation(inner_effect) => {
1819 assert!(matches!(
1821 inner_effect.inputs.clone().pop().unwrap().1,
1822 Type::Int
1823 ));
1824 assert!(matches!(
1825 inner_effect.outputs.clone().pop().unwrap().1,
1826 Type::Int
1827 ));
1828 }
1829 _ => panic!("Expected nested Quotation type"),
1830 }
1831
1832 let (_, outer_out_top) = outer_effect.outputs.clone().pop().unwrap();
1834 assert_eq!(outer_out_top, Type::Bool);
1835 }
1836 _ => panic!("Expected Quotation type"),
1837 }
1838 }
1839
1840 #[test]
1841 fn test_parse_deeply_nested_quotation_type_exceeds_limit() {
1842 let mut source = String::from(": deep ( ");
1845
1846 for _ in 0..35 {
1848 source.push_str("[ -- ");
1849 }
1850
1851 source.push_str("Int");
1852
1853 for _ in 0..35 {
1855 source.push_str(" ]");
1856 }
1857
1858 source.push_str(" -- ) ;");
1859
1860 let mut parser = Parser::new(&source);
1861 let result = parser.parse();
1862
1863 assert!(result.is_err());
1865 let err_msg = result.unwrap_err();
1866 assert!(
1867 err_msg.contains("depth") || err_msg.contains("32"),
1868 "Expected depth limit error, got: {}",
1869 err_msg
1870 );
1871 }
1872
1873 #[test]
1874 fn test_parse_empty_quotation_type() {
1875 let source = ": empty-quot ( [ -- ] -- ) ;";
1878 let mut parser = Parser::new(source);
1879 let program = parser.parse().unwrap();
1880
1881 let effect = program.words[0].effect.as_ref().unwrap();
1882
1883 let (_, top) = effect.inputs.clone().pop().unwrap();
1884 match top {
1885 Type::Quotation(quot_effect) => {
1886 assert_eq!(quot_effect.inputs, StackType::RowVar("rest".to_string()));
1888 assert_eq!(quot_effect.outputs, StackType::RowVar("rest".to_string()));
1889 }
1890 _ => panic!("Expected Quotation type"),
1891 }
1892 }
1893
1894 #[test]
1895 fn test_parse_quotation_type_in_output() {
1896 let source = ": maker ( -- [Int -- Int] ) ;";
1898 let mut parser = Parser::new(source);
1899 let program = parser.parse().unwrap();
1900
1901 let effect = program.words[0].effect.as_ref().unwrap();
1902
1903 let (_, top) = effect.outputs.clone().pop().unwrap();
1905 match top {
1906 Type::Quotation(quot_effect) => {
1907 assert!(matches!(
1908 quot_effect.inputs.clone().pop().unwrap().1,
1909 Type::Int
1910 ));
1911 assert!(matches!(
1912 quot_effect.outputs.clone().pop().unwrap().1,
1913 Type::Int
1914 ));
1915 }
1916 _ => panic!("Expected Quotation type"),
1917 }
1918 }
1919
1920 #[test]
1921 fn test_parse_unclosed_quotation_type() {
1922 let source = ": broken ( [Int -- Int -- ) ;";
1924 let mut parser = Parser::new(source);
1925 let result = parser.parse();
1926
1927 assert!(result.is_err());
1928 let err_msg = result.unwrap_err();
1929 assert!(
1932 err_msg.contains("Unclosed")
1933 || err_msg.contains("Expected")
1934 || err_msg.contains("Unexpected"),
1935 "Got error: {}",
1936 err_msg
1937 );
1938 }
1939
1940 #[test]
1941 fn test_parse_multiple_quotation_types() {
1942 let source = ": multi ( [Int -- Int] [String -- Bool] -- ) ;";
1944 let mut parser = Parser::new(source);
1945 let program = parser.parse().unwrap();
1946
1947 let effect = program.words[0].effect.as_ref().unwrap();
1948
1949 let (rest, top) = effect.inputs.clone().pop().unwrap();
1951 match top {
1952 Type::Quotation(quot_effect) => {
1953 assert!(matches!(
1954 quot_effect.inputs.clone().pop().unwrap().1,
1955 Type::String
1956 ));
1957 assert!(matches!(
1958 quot_effect.outputs.clone().pop().unwrap().1,
1959 Type::Bool
1960 ));
1961 }
1962 _ => panic!("Expected Quotation type"),
1963 }
1964
1965 let (_, top2) = rest.pop().unwrap();
1967 match top2 {
1968 Type::Quotation(quot_effect) => {
1969 assert!(matches!(
1970 quot_effect.inputs.clone().pop().unwrap().1,
1971 Type::Int
1972 ));
1973 assert!(matches!(
1974 quot_effect.outputs.clone().pop().unwrap().1,
1975 Type::Int
1976 ));
1977 }
1978 _ => panic!("Expected Quotation type"),
1979 }
1980 }
1981
1982 #[test]
1983 fn test_parse_quotation_type_without_separator() {
1984 let source = ": consumer ( [Int] -- ) ;";
1995 let mut parser = Parser::new(source);
1996 let result = parser.parse();
1997
1998 assert!(result.is_err());
2000 let err_msg = result.unwrap_err();
2001 assert!(
2002 err_msg.contains("require") && err_msg.contains("--"),
2003 "Expected error about missing '--' separator, got: {}",
2004 err_msg
2005 );
2006 }
2007
2008 #[test]
2009 fn test_parse_bare_quotation_type_rejected() {
2010 let source = ": apply-twice ( Int Quotation -- Int ) ;";
2015 let mut parser = Parser::new(source);
2016 let result = parser.parse();
2017
2018 assert!(result.is_err());
2019 let err_msg = result.unwrap_err();
2020 assert!(
2021 err_msg.contains("Quotation") && err_msg.contains("not a valid type"),
2022 "Expected error about 'Quotation' not being valid, got: {}",
2023 err_msg
2024 );
2025 assert!(
2026 err_msg.contains("[Int -- Int]") || err_msg.contains("[ -- ]"),
2027 "Expected error to suggest explicit syntax, got: {}",
2028 err_msg
2029 );
2030 }
2031
2032 #[test]
2033 fn test_parse_no_stack_effect() {
2034 let source = ": test 1 2 add ;";
2036 let mut parser = Parser::new(source);
2037 let program = parser.parse().unwrap();
2038
2039 assert_eq!(program.words.len(), 1);
2040 assert!(program.words[0].effect.is_none());
2041 }
2042
2043 #[test]
2044 fn test_parse_simple_quotation() {
2045 let source = r#"
2046: test ( -- Quot )
2047 [ 1 add ] ;
2048"#;
2049
2050 let mut parser = Parser::new(source);
2051 let program = parser.parse().unwrap();
2052
2053 assert_eq!(program.words.len(), 1);
2054 assert_eq!(program.words[0].name, "test");
2055 assert_eq!(program.words[0].body.len(), 1);
2056
2057 match &program.words[0].body[0] {
2058 Statement::Quotation { body, .. } => {
2059 assert_eq!(body.len(), 2);
2060 assert_eq!(body[0], Statement::IntLiteral(1));
2061 assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "add"));
2062 }
2063 _ => panic!("Expected Quotation statement"),
2064 }
2065 }
2066
2067 #[test]
2068 fn test_parse_empty_quotation() {
2069 let source = ": test [ ] ;";
2070
2071 let mut parser = Parser::new(source);
2072 let program = parser.parse().unwrap();
2073
2074 assert_eq!(program.words.len(), 1);
2075
2076 match &program.words[0].body[0] {
2077 Statement::Quotation { body, .. } => {
2078 assert_eq!(body.len(), 0);
2079 }
2080 _ => panic!("Expected Quotation statement"),
2081 }
2082 }
2083
2084 #[test]
2085 fn test_parse_quotation_with_call() {
2086 let source = r#"
2087: test ( -- )
2088 5 [ 1 add ] call ;
2089"#;
2090
2091 let mut parser = Parser::new(source);
2092 let program = parser.parse().unwrap();
2093
2094 assert_eq!(program.words.len(), 1);
2095 assert_eq!(program.words[0].body.len(), 3);
2096
2097 assert_eq!(program.words[0].body[0], Statement::IntLiteral(5));
2098
2099 match &program.words[0].body[1] {
2100 Statement::Quotation { body, .. } => {
2101 assert_eq!(body.len(), 2);
2102 }
2103 _ => panic!("Expected Quotation"),
2104 }
2105
2106 assert!(matches!(
2107 &program.words[0].body[2],
2108 Statement::WordCall { name, .. } if name == "call"
2109 ));
2110 }
2111
2112 #[test]
2113 fn test_parse_nested_quotation() {
2114 let source = ": test [ [ 1 add ] call ] ;";
2115
2116 let mut parser = Parser::new(source);
2117 let program = parser.parse().unwrap();
2118
2119 assert_eq!(program.words.len(), 1);
2120
2121 match &program.words[0].body[0] {
2122 Statement::Quotation {
2123 body: outer_body, ..
2124 } => {
2125 assert_eq!(outer_body.len(), 2);
2126
2127 match &outer_body[0] {
2128 Statement::Quotation {
2129 body: inner_body, ..
2130 } => {
2131 assert_eq!(inner_body.len(), 2);
2132 assert_eq!(inner_body[0], Statement::IntLiteral(1));
2133 assert!(
2134 matches!(&inner_body[1], Statement::WordCall { name, .. } if name == "add")
2135 );
2136 }
2137 _ => panic!("Expected nested Quotation"),
2138 }
2139
2140 assert!(
2141 matches!(&outer_body[1], Statement::WordCall { name, .. } if name == "call")
2142 );
2143 }
2144 _ => panic!("Expected Quotation"),
2145 }
2146 }
2147
2148 #[test]
2149 fn test_parse_while_with_quotations() {
2150 let source = r#"
2151: countdown ( Int -- )
2152 [ dup 0 > ] [ 1 subtract ] while drop ;
2153"#;
2154
2155 let mut parser = Parser::new(source);
2156 let program = parser.parse().unwrap();
2157
2158 assert_eq!(program.words.len(), 1);
2159 assert_eq!(program.words[0].body.len(), 4);
2160
2161 match &program.words[0].body[0] {
2163 Statement::Quotation { body: pred, .. } => {
2164 assert_eq!(pred.len(), 3);
2165 assert!(matches!(&pred[0], Statement::WordCall { name, .. } if name == "dup"));
2166 assert_eq!(pred[1], Statement::IntLiteral(0));
2167 assert!(matches!(&pred[2], Statement::WordCall { name, .. } if name == ">"));
2168 }
2169 _ => panic!("Expected predicate quotation"),
2170 }
2171
2172 match &program.words[0].body[1] {
2174 Statement::Quotation { body, .. } => {
2175 assert_eq!(body.len(), 2);
2176 assert_eq!(body[0], Statement::IntLiteral(1));
2177 assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "subtract"));
2178 }
2179 _ => panic!("Expected body quotation"),
2180 }
2181
2182 assert!(matches!(
2184 &program.words[0].body[2],
2185 Statement::WordCall { name, .. } if name == "while"
2186 ));
2187
2188 assert!(matches!(
2190 &program.words[0].body[3],
2191 Statement::WordCall { name, .. } if name == "drop"
2192 ));
2193 }
2194
2195 #[test]
2196 fn test_parse_simple_closure_type() {
2197 let source = ": make-adder ( Int -- Closure[Int -- Int] ) ;";
2199 let mut parser = Parser::new(source);
2200 let program = parser.parse().unwrap();
2201
2202 assert_eq!(program.words.len(), 1);
2203 let word = &program.words[0];
2204 assert!(word.effect.is_some());
2205
2206 let effect = word.effect.as_ref().unwrap();
2207
2208 let (input_rest, input_top) = effect.inputs.clone().pop().unwrap();
2210 assert_eq!(input_top, Type::Int);
2211 assert_eq!(input_rest, StackType::RowVar("rest".to_string()));
2212
2213 let (output_rest, output_top) = effect.outputs.clone().pop().unwrap();
2215 match output_top {
2216 Type::Closure { effect, captures } => {
2217 assert_eq!(
2219 effect.inputs,
2220 StackType::Cons {
2221 rest: Box::new(StackType::RowVar("rest".to_string())),
2222 top: Type::Int
2223 }
2224 );
2225 assert_eq!(
2226 effect.outputs,
2227 StackType::Cons {
2228 rest: Box::new(StackType::RowVar("rest".to_string())),
2229 top: Type::Int
2230 }
2231 );
2232 assert_eq!(captures.len(), 0);
2234 }
2235 _ => panic!("Expected Closure type, got {:?}", output_top),
2236 }
2237 assert_eq!(output_rest, StackType::RowVar("rest".to_string()));
2238 }
2239
2240 #[test]
2241 fn test_parse_closure_type_with_row_vars() {
2242 let source = ": make-handler ( ..a Config -- ..a Closure[Request -- Response] ) ;";
2244 let mut parser = Parser::new(source);
2245 let program = parser.parse().unwrap();
2246
2247 let effect = program.words[0].effect.as_ref().unwrap();
2248
2249 let (rest, top) = effect.outputs.clone().pop().unwrap();
2251 match top {
2252 Type::Closure { effect, .. } => {
2253 let (_, in_top) = effect.inputs.clone().pop().unwrap();
2255 assert_eq!(in_top, Type::Var("Request".to_string()));
2256 let (_, out_top) = effect.outputs.clone().pop().unwrap();
2257 assert_eq!(out_top, Type::Var("Response".to_string()));
2258 }
2259 _ => panic!("Expected Closure type"),
2260 }
2261 assert_eq!(rest, StackType::RowVar("a".to_string()));
2262 }
2263
2264 #[test]
2265 fn test_parse_closure_type_missing_bracket() {
2266 let source = ": broken ( Int -- Closure ) ;";
2268 let mut parser = Parser::new(source);
2269 let result = parser.parse();
2270
2271 assert!(result.is_err());
2272 let err_msg = result.unwrap_err();
2273 assert!(
2274 err_msg.contains("[") && err_msg.contains("Closure"),
2275 "Expected error about missing '[' after Closure, got: {}",
2276 err_msg
2277 );
2278 }
2279
2280 #[test]
2281 fn test_parse_closure_type_in_input() {
2282 let source = ": apply-closure ( Closure[Int -- Int] -- ) ;";
2284 let mut parser = Parser::new(source);
2285 let program = parser.parse().unwrap();
2286
2287 let effect = program.words[0].effect.as_ref().unwrap();
2288
2289 let (_, top) = effect.inputs.clone().pop().unwrap();
2291 match top {
2292 Type::Closure { effect, .. } => {
2293 assert!(matches!(effect.inputs.clone().pop().unwrap().1, Type::Int));
2295 assert!(matches!(effect.outputs.clone().pop().unwrap().1, Type::Int));
2296 }
2297 _ => panic!("Expected Closure type in input"),
2298 }
2299 }
2300
2301 #[test]
2304 fn test_token_position_single_line() {
2305 let source = ": main ( -- ) ;";
2307 let tokens = tokenize(source);
2308
2309 assert_eq!(tokens[0].text, ":");
2311 assert_eq!(tokens[0].line, 0);
2312 assert_eq!(tokens[0].column, 0);
2313
2314 assert_eq!(tokens[1].text, "main");
2316 assert_eq!(tokens[1].line, 0);
2317 assert_eq!(tokens[1].column, 2);
2318
2319 assert_eq!(tokens[2].text, "(");
2321 assert_eq!(tokens[2].line, 0);
2322 assert_eq!(tokens[2].column, 7);
2323 }
2324
2325 #[test]
2326 fn test_token_position_multiline() {
2327 let source = ": main ( -- )\n 42\n;";
2329 let tokens = tokenize(source);
2330
2331 let token_42 = tokens.iter().find(|t| t.text == "42").unwrap();
2333 assert_eq!(token_42.line, 1);
2334 assert_eq!(token_42.column, 2); let token_semi = tokens.iter().find(|t| t.text == ";").unwrap();
2338 assert_eq!(token_semi.line, 2);
2339 assert_eq!(token_semi.column, 0);
2340 }
2341
2342 #[test]
2343 fn test_word_def_source_location_span() {
2344 let source = r#": helper ( -- )
2346 "hello"
2347 write_line
2348;
2349
2350: main ( -- )
2351 helper
2352;"#;
2353
2354 let mut parser = Parser::new(source);
2355 let program = parser.parse().unwrap();
2356
2357 assert_eq!(program.words.len(), 2);
2358
2359 let helper = &program.words[0];
2361 assert_eq!(helper.name, "helper");
2362 let helper_source = helper.source.as_ref().unwrap();
2363 assert_eq!(helper_source.start_line, 0);
2364 assert_eq!(helper_source.end_line, 3);
2365
2366 let main_word = &program.words[1];
2368 assert_eq!(main_word.name, "main");
2369 let main_source = main_word.source.as_ref().unwrap();
2370 assert_eq!(main_source.start_line, 5);
2371 assert_eq!(main_source.end_line, 7);
2372 }
2373
2374 #[test]
2375 fn test_token_position_string_with_newline() {
2376 let source = "\"line1\\nline2\"";
2378 let tokens = tokenize(source);
2379
2380 assert_eq!(tokens.len(), 1);
2382 assert_eq!(tokens[0].line, 0);
2383 assert_eq!(tokens[0].column, 0);
2384 }
2385
2386 #[test]
2391 fn test_parse_simple_union() {
2392 let source = r#"
2393union Message {
2394 Get { response-chan: Int }
2395 Set { value: Int }
2396}
2397
2398: main ( -- ) ;
2399"#;
2400
2401 let mut parser = Parser::new(source);
2402 let program = parser.parse().unwrap();
2403
2404 assert_eq!(program.unions.len(), 1);
2405 let union_def = &program.unions[0];
2406 assert_eq!(union_def.name, "Message");
2407 assert_eq!(union_def.variants.len(), 2);
2408
2409 assert_eq!(union_def.variants[0].name, "Get");
2411 assert_eq!(union_def.variants[0].fields.len(), 1);
2412 assert_eq!(union_def.variants[0].fields[0].name, "response-chan");
2413 assert_eq!(union_def.variants[0].fields[0].type_name, "Int");
2414
2415 assert_eq!(union_def.variants[1].name, "Set");
2417 assert_eq!(union_def.variants[1].fields.len(), 1);
2418 assert_eq!(union_def.variants[1].fields[0].name, "value");
2419 assert_eq!(union_def.variants[1].fields[0].type_name, "Int");
2420 }
2421
2422 #[test]
2423 fn test_parse_union_with_multiple_fields() {
2424 let source = r#"
2425union Report {
2426 Data { op: Int, delta: Int, total: Int }
2427 Empty
2428}
2429
2430: main ( -- ) ;
2431"#;
2432
2433 let mut parser = Parser::new(source);
2434 let program = parser.parse().unwrap();
2435
2436 assert_eq!(program.unions.len(), 1);
2437 let union_def = &program.unions[0];
2438 assert_eq!(union_def.name, "Report");
2439 assert_eq!(union_def.variants.len(), 2);
2440
2441 let data_variant = &union_def.variants[0];
2443 assert_eq!(data_variant.name, "Data");
2444 assert_eq!(data_variant.fields.len(), 3);
2445 assert_eq!(data_variant.fields[0].name, "op");
2446 assert_eq!(data_variant.fields[1].name, "delta");
2447 assert_eq!(data_variant.fields[2].name, "total");
2448
2449 let empty_variant = &union_def.variants[1];
2451 assert_eq!(empty_variant.name, "Empty");
2452 assert_eq!(empty_variant.fields.len(), 0);
2453 }
2454
2455 #[test]
2456 fn test_parse_union_lowercase_name_error() {
2457 let source = r#"
2458union message {
2459 Get { }
2460}
2461"#;
2462
2463 let mut parser = Parser::new(source);
2464 let result = parser.parse();
2465 assert!(result.is_err());
2466 assert!(result.unwrap_err().contains("uppercase"));
2467 }
2468
2469 #[test]
2470 fn test_parse_union_empty_error() {
2471 let source = r#"
2472union Message {
2473}
2474"#;
2475
2476 let mut parser = Parser::new(source);
2477 let result = parser.parse();
2478 assert!(result.is_err());
2479 assert!(result.unwrap_err().contains("at least one variant"));
2480 }
2481
2482 #[test]
2483 fn test_parse_union_duplicate_variant_error() {
2484 let source = r#"
2485union Message {
2486 Get { x: Int }
2487 Get { y: String }
2488}
2489"#;
2490
2491 let mut parser = Parser::new(source);
2492 let result = parser.parse();
2493 assert!(result.is_err());
2494 let err = result.unwrap_err();
2495 assert!(err.contains("Duplicate variant name"));
2496 assert!(err.contains("Get"));
2497 }
2498
2499 #[test]
2500 fn test_parse_union_duplicate_field_error() {
2501 let source = r#"
2502union Data {
2503 Record { x: Int, x: String }
2504}
2505"#;
2506
2507 let mut parser = Parser::new(source);
2508 let result = parser.parse();
2509 assert!(result.is_err());
2510 let err = result.unwrap_err();
2511 assert!(err.contains("Duplicate field name"));
2512 assert!(err.contains("x"));
2513 }
2514
2515 #[test]
2516 fn test_parse_simple_match() {
2517 let source = r#"
2518: handle ( -- )
2519 match
2520 Get -> send-response
2521 Set -> process-set
2522 end
2523;
2524"#;
2525
2526 let mut parser = Parser::new(source);
2527 let program = parser.parse().unwrap();
2528
2529 assert_eq!(program.words.len(), 1);
2530 assert_eq!(program.words[0].body.len(), 1);
2531
2532 match &program.words[0].body[0] {
2533 Statement::Match { arms } => {
2534 assert_eq!(arms.len(), 2);
2535
2536 match &arms[0].pattern {
2538 Pattern::Variant(name) => assert_eq!(name, "Get"),
2539 _ => panic!("Expected Variant pattern"),
2540 }
2541 assert_eq!(arms[0].body.len(), 1);
2542
2543 match &arms[1].pattern {
2545 Pattern::Variant(name) => assert_eq!(name, "Set"),
2546 _ => panic!("Expected Variant pattern"),
2547 }
2548 assert_eq!(arms[1].body.len(), 1);
2549 }
2550 _ => panic!("Expected Match statement"),
2551 }
2552 }
2553
2554 #[test]
2555 fn test_parse_match_with_bindings() {
2556 let source = r#"
2557: handle ( -- )
2558 match
2559 Get { >chan } -> chan send-response
2560 Report { >delta >total } -> delta total process
2561 end
2562;
2563"#;
2564
2565 let mut parser = Parser::new(source);
2566 let program = parser.parse().unwrap();
2567
2568 assert_eq!(program.words.len(), 1);
2569
2570 match &program.words[0].body[0] {
2571 Statement::Match { arms } => {
2572 assert_eq!(arms.len(), 2);
2573
2574 match &arms[0].pattern {
2576 Pattern::VariantWithBindings { name, bindings } => {
2577 assert_eq!(name, "Get");
2578 assert_eq!(bindings.len(), 1);
2579 assert_eq!(bindings[0], "chan");
2580 }
2581 _ => panic!("Expected VariantWithBindings pattern"),
2582 }
2583
2584 match &arms[1].pattern {
2586 Pattern::VariantWithBindings { name, bindings } => {
2587 assert_eq!(name, "Report");
2588 assert_eq!(bindings.len(), 2);
2589 assert_eq!(bindings[0], "delta");
2590 assert_eq!(bindings[1], "total");
2591 }
2592 _ => panic!("Expected VariantWithBindings pattern"),
2593 }
2594 }
2595 _ => panic!("Expected Match statement"),
2596 }
2597 }
2598
2599 #[test]
2600 fn test_parse_match_bindings_require_prefix() {
2601 let source = r#"
2603: handle ( -- )
2604 match
2605 Get { chan } -> chan send-response
2606 end
2607;
2608"#;
2609
2610 let mut parser = Parser::new(source);
2611 let result = parser.parse();
2612 assert!(result.is_err());
2613 let err = result.unwrap_err();
2614 assert!(err.contains(">chan"));
2615 assert!(err.contains("stack extraction"));
2616 }
2617
2618 #[test]
2619 fn test_parse_match_with_body_statements() {
2620 let source = r#"
2621: handle ( -- )
2622 match
2623 Get -> 1 2 add send-response
2624 Set -> process-value store
2625 end
2626;
2627"#;
2628
2629 let mut parser = Parser::new(source);
2630 let program = parser.parse().unwrap();
2631
2632 match &program.words[0].body[0] {
2633 Statement::Match { arms } => {
2634 assert_eq!(arms[0].body.len(), 4);
2636 assert_eq!(arms[0].body[0], Statement::IntLiteral(1));
2637 assert_eq!(arms[0].body[1], Statement::IntLiteral(2));
2638 assert!(
2639 matches!(&arms[0].body[2], Statement::WordCall { name, .. } if name == "add")
2640 );
2641
2642 assert_eq!(arms[1].body.len(), 2);
2644 }
2645 _ => panic!("Expected Match statement"),
2646 }
2647 }
2648
2649 #[test]
2650 fn test_parse_match_empty_error() {
2651 let source = r#"
2652: handle ( -- )
2653 match
2654 end
2655;
2656"#;
2657
2658 let mut parser = Parser::new(source);
2659 let result = parser.parse();
2660 assert!(result.is_err());
2661 assert!(result.unwrap_err().contains("at least one arm"));
2662 }
2663
2664 #[test]
2665 fn test_parse_symbol_literal() {
2666 let source = r#"
2667: main ( -- )
2668 :hello drop
2669;
2670"#;
2671
2672 let mut parser = Parser::new(source);
2673 let program = parser.parse().unwrap();
2674 assert_eq!(program.words.len(), 1);
2675
2676 let main = &program.words[0];
2677 assert_eq!(main.body.len(), 2);
2678
2679 match &main.body[0] {
2680 Statement::Symbol(name) => assert_eq!(name, "hello"),
2681 _ => panic!("Expected Symbol statement, got {:?}", main.body[0]),
2682 }
2683 }
2684
2685 #[test]
2686 fn test_parse_symbol_with_hyphen() {
2687 let source = r#"
2688: main ( -- )
2689 :hello-world drop
2690;
2691"#;
2692
2693 let mut parser = Parser::new(source);
2694 let program = parser.parse().unwrap();
2695
2696 match &program.words[0].body[0] {
2697 Statement::Symbol(name) => assert_eq!(name, "hello-world"),
2698 _ => panic!("Expected Symbol statement"),
2699 }
2700 }
2701
2702 #[test]
2703 fn test_parse_symbol_starting_with_digit_fails() {
2704 let source = r#"
2705: main ( -- )
2706 :123abc drop
2707;
2708"#;
2709
2710 let mut parser = Parser::new(source);
2711 let result = parser.parse();
2712 assert!(result.is_err());
2713 assert!(result.unwrap_err().contains("cannot start with a digit"));
2714 }
2715
2716 #[test]
2717 fn test_parse_symbol_with_invalid_char_fails() {
2718 let source = r#"
2719: main ( -- )
2720 :hello@world drop
2721;
2722"#;
2723
2724 let mut parser = Parser::new(source);
2725 let result = parser.parse();
2726 assert!(result.is_err());
2727 assert!(result.unwrap_err().contains("invalid character"));
2728 }
2729
2730 #[test]
2731 fn test_parse_symbol_special_chars_allowed() {
2732 let source = r#"
2734: main ( -- )
2735 :empty? drop
2736 :save! drop
2737;
2738"#;
2739
2740 let mut parser = Parser::new(source);
2741 let program = parser.parse().unwrap();
2742
2743 match &program.words[0].body[0] {
2744 Statement::Symbol(name) => assert_eq!(name, "empty?"),
2745 _ => panic!("Expected Symbol statement"),
2746 }
2747 match &program.words[0].body[2] {
2748 Statement::Symbol(name) => assert_eq!(name, "save!"),
2749 _ => panic!("Expected Symbol statement"),
2750 }
2751 }
2752}