1use crate::ast::{
12 Include, MatchArm, Pattern, Program, SourceLocation, Statement, UnionDef, UnionField,
13 UnionVariant, WordDef,
14};
15use crate::types::{Effect, SideEffect, StackType, Type};
16
17#[derive(Debug, Clone)]
19pub struct Token {
20 pub text: String,
21 pub line: usize,
23 pub column: usize,
25}
26
27impl Token {
28 fn new(text: String, line: usize, column: usize) -> Self {
29 Token { text, line, column }
30 }
31}
32
33impl PartialEq<&str> for Token {
34 fn eq(&self, other: &&str) -> bool {
35 self.text == *other
36 }
37}
38
39impl PartialEq<str> for Token {
40 fn eq(&self, other: &str) -> bool {
41 self.text == other
42 }
43}
44
45pub struct Parser {
46 tokens: Vec<Token>,
47 pos: usize,
48 next_quotation_id: usize,
51}
52
53impl Parser {
54 pub fn new(source: &str) -> Self {
55 let tokens = tokenize(source);
56 Parser {
57 tokens,
58 pos: 0,
59 next_quotation_id: 0,
60 }
61 }
62
63 pub fn parse(&mut self) -> Result<Program, String> {
64 let mut program = Program::new();
65
66 if let Some(error_token) = self.tokens.iter().find(|t| *t == "<<<UNCLOSED_STRING>>>") {
68 return Err(format!(
69 "Unclosed string literal at line {}, column {} - missing closing quote",
70 error_token.line + 1, error_token.column + 1
72 ));
73 }
74
75 while !self.is_at_end() {
76 self.skip_comments();
77 if self.is_at_end() {
78 break;
79 }
80
81 if self.check("include") {
83 let include = self.parse_include()?;
84 program.includes.push(include);
85 continue;
86 }
87
88 if self.check("union") {
90 let union_def = self.parse_union_def()?;
91 program.unions.push(union_def);
92 continue;
93 }
94
95 let word = self.parse_word_def()?;
96 program.words.push(word);
97 }
98
99 Ok(program)
100 }
101
102 fn parse_include(&mut self) -> Result<Include, String> {
107 self.consume("include");
108
109 let token = self
110 .advance()
111 .ok_or("Expected module name after 'include'")?
112 .clone();
113
114 if token == "std" {
116 if !self.consume(":") {
118 return Err("Expected ':' after 'std' in include statement".to_string());
119 }
120 let name = self
122 .advance()
123 .ok_or("Expected module name after 'std:'")?
124 .clone();
125 return Ok(Include::Std(name));
126 }
127
128 if token == "ffi" {
130 if !self.consume(":") {
132 return Err("Expected ':' after 'ffi' in include statement".to_string());
133 }
134 let name = self
136 .advance()
137 .ok_or("Expected library name after 'ffi:'")?
138 .clone();
139 return Ok(Include::Ffi(name));
140 }
141
142 if token.starts_with('"') && token.ends_with('"') {
144 let path = token.trim_start_matches('"').trim_end_matches('"');
145 return Ok(Include::Relative(path.to_string()));
146 }
147
148 Err(format!(
149 "Invalid include syntax '{}'. Use 'include std:name', 'include ffi:lib', or 'include \"path\"'",
150 token
151 ))
152 }
153
154 fn parse_union_def(&mut self) -> Result<UnionDef, String> {
161 let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
163
164 self.consume("union");
166
167 let name = self
169 .advance()
170 .ok_or("Expected union name after 'union'")?
171 .clone();
172
173 if !name
174 .chars()
175 .next()
176 .map(|c| c.is_uppercase())
177 .unwrap_or(false)
178 {
179 return Err(format!(
180 "Union name '{}' must start with an uppercase letter",
181 name
182 ));
183 }
184
185 self.skip_comments();
187
188 if !self.consume("{") {
190 return Err(format!(
191 "Expected '{{' after union name '{}', got '{}'",
192 name,
193 self.current()
194 ));
195 }
196
197 let mut variants = Vec::new();
199 loop {
200 self.skip_comments();
201
202 if self.check("}") {
203 break;
204 }
205
206 if self.is_at_end() {
207 return Err(format!("Unexpected end of file in union '{}'", name));
208 }
209
210 variants.push(self.parse_union_variant()?);
211 }
212
213 let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
215
216 self.consume("}");
218
219 if variants.is_empty() {
220 return Err(format!("Union '{}' must have at least one variant", name));
221 }
222
223 let mut seen_variants = std::collections::HashSet::new();
225 for variant in &variants {
226 if !seen_variants.insert(&variant.name) {
227 return Err(format!(
228 "Duplicate variant name '{}' in union '{}'",
229 variant.name, name
230 ));
231 }
232 }
233
234 Ok(UnionDef {
235 name,
236 variants,
237 source: Some(SourceLocation::span(
238 std::path::PathBuf::new(),
239 start_line,
240 end_line,
241 )),
242 })
243 }
244
245 fn parse_union_variant(&mut self) -> Result<UnionVariant, String> {
249 let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
250
251 let name = self.advance().ok_or("Expected variant name")?.clone();
253
254 if !name
255 .chars()
256 .next()
257 .map(|c| c.is_uppercase())
258 .unwrap_or(false)
259 {
260 return Err(format!(
261 "Variant name '{}' must start with an uppercase letter",
262 name
263 ));
264 }
265
266 self.skip_comments();
267
268 let fields = if self.check("{") {
270 self.consume("{");
271 let fields = self.parse_union_fields()?;
272 if !self.consume("}") {
273 return Err(format!("Expected '}}' after variant '{}' fields", name));
274 }
275 fields
276 } else {
277 Vec::new()
278 };
279
280 Ok(UnionVariant {
281 name,
282 fields,
283 source: Some(SourceLocation::new(std::path::PathBuf::new(), start_line)),
284 })
285 }
286
287 fn parse_union_fields(&mut self) -> Result<Vec<UnionField>, String> {
289 let mut fields = Vec::new();
290
291 loop {
292 self.skip_comments();
293
294 if self.check("}") {
295 break;
296 }
297
298 let field_name = self.advance().ok_or("Expected field name")?.clone();
300
301 if !self.consume(":") {
303 return Err(format!(
304 "Expected ':' after field name '{}', got '{}'",
305 field_name,
306 self.current()
307 ));
308 }
309
310 let type_name = self
312 .advance()
313 .ok_or("Expected type name after ':'")?
314 .clone();
315
316 fields.push(UnionField {
317 name: field_name,
318 type_name,
319 });
320
321 self.skip_comments();
323 self.consume(",");
324 }
325
326 let mut seen_fields = std::collections::HashSet::new();
328 for field in &fields {
329 if !seen_fields.insert(&field.name) {
330 return Err(format!("Duplicate field name '{}' in variant", field.name));
331 }
332 }
333
334 Ok(fields)
335 }
336
337 fn parse_word_def(&mut self) -> Result<WordDef, String> {
338 let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
340
341 if !self.consume(":") {
343 return Err(format!(
344 "Expected ':' to start word definition, got '{}'",
345 self.current()
346 ));
347 }
348
349 let name = self
351 .advance()
352 .ok_or("Expected word name after ':'")?
353 .clone();
354
355 let effect = if self.check("(") {
357 Some(self.parse_stack_effect()?)
358 } else {
359 None
360 };
361
362 let mut body = Vec::new();
364 while !self.check(";") {
365 if self.is_at_end() {
366 return Err(format!("Unexpected end of file in word '{}'", name));
367 }
368
369 self.skip_comments();
371 if self.check(";") {
372 break;
373 }
374
375 body.push(self.parse_statement()?);
376 }
377
378 let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
380
381 self.consume(";");
383
384 Ok(WordDef {
385 name,
386 effect,
387 body,
388 source: Some(crate::ast::SourceLocation::span(
389 std::path::PathBuf::new(),
390 start_line,
391 end_line,
392 )),
393 })
394 }
395
396 fn parse_statement(&mut self) -> Result<Statement, String> {
397 use crate::ast::Span;
398 let tok = self.advance_token().ok_or("Unexpected end of file")?;
399 let token = &tok.text;
400 let tok_line = tok.line;
401 let tok_column = tok.column;
402 let tok_len = tok.text.len();
403
404 if let Some(f) = is_float_literal(token)
407 .then(|| token.parse::<f64>().ok())
408 .flatten()
409 {
410 return Ok(Statement::FloatLiteral(f));
411 }
412
413 if let Some(hex) = token
415 .strip_prefix("0x")
416 .or_else(|| token.strip_prefix("0X"))
417 {
418 return i64::from_str_radix(hex, 16)
419 .map(Statement::IntLiteral)
420 .map_err(|_| format!("Invalid hex literal: {}", token));
421 }
422
423 if let Some(bin) = token
425 .strip_prefix("0b")
426 .or_else(|| token.strip_prefix("0B"))
427 {
428 return i64::from_str_radix(bin, 2)
429 .map(Statement::IntLiteral)
430 .map_err(|_| format!("Invalid binary literal: {}", token));
431 }
432
433 if let Ok(n) = token.parse::<i64>() {
435 return Ok(Statement::IntLiteral(n));
436 }
437
438 if token == "true" {
440 return Ok(Statement::BoolLiteral(true));
441 }
442 if token == "false" {
443 return Ok(Statement::BoolLiteral(false));
444 }
445
446 if token == ":" {
448 let name_tok = self
450 .advance_token()
451 .ok_or("Expected symbol name after ':', got end of input")?;
452 let name = &name_tok.text;
453 if name.is_empty() {
455 return Err("Symbol name cannot be empty".to_string());
456 }
457 if name.starts_with(|c: char| c.is_ascii_digit()) {
458 return Err(format!(
459 "Symbol name cannot start with a digit: ':{}'\n Hint: Symbol names must start with a letter",
460 name
461 ));
462 }
463 if let Some(bad_char) = name.chars().find(|c| {
464 !c.is_alphanumeric()
465 && *c != '-'
466 && *c != '_'
467 && *c != '.'
468 && *c != '?'
469 && *c != '!'
470 }) {
471 return Err(format!(
472 "Symbol name contains invalid character '{}': ':{}'\n Hint: Allowed: letters, digits, - _ . ? !",
473 bad_char, name
474 ));
475 }
476 return Ok(Statement::Symbol(name.clone()));
477 }
478
479 if token.starts_with('"') {
481 if token.len() < 2 || !token.ends_with('"') {
483 return Err(format!("Malformed string literal: {}", token));
484 }
485 let raw = &token[1..token.len() - 1];
488 let unescaped = unescape_string(raw)?;
489 return Ok(Statement::StringLiteral(unescaped));
490 }
491
492 if token == "if" {
494 return self.parse_if();
495 }
496
497 if token == "[" {
499 return self.parse_quotation(tok_line, tok_column);
500 }
501
502 if token == "match" {
504 return self.parse_match();
505 }
506
507 Ok(Statement::WordCall {
509 name: token.to_string(),
510 span: Some(Span::new(tok_line, tok_column, tok_len)),
511 })
512 }
513
514 fn parse_if(&mut self) -> Result<Statement, String> {
515 let mut then_branch = Vec::new();
516
517 loop {
519 if self.is_at_end() {
520 return Err("Unexpected end of file in 'if' statement".to_string());
521 }
522
523 self.skip_comments();
525
526 if self.check("else") {
527 self.advance();
528 break;
530 }
531
532 if self.check("then") {
533 self.advance();
534 return Ok(Statement::If {
536 then_branch,
537 else_branch: None,
538 });
539 }
540
541 then_branch.push(self.parse_statement()?);
542 }
543
544 let mut else_branch = Vec::new();
546 loop {
547 if self.is_at_end() {
548 return Err("Unexpected end of file in 'else' branch".to_string());
549 }
550
551 self.skip_comments();
553
554 if self.check("then") {
555 self.advance();
556 return Ok(Statement::If {
557 then_branch,
558 else_branch: Some(else_branch),
559 });
560 }
561
562 else_branch.push(self.parse_statement()?);
563 }
564 }
565
566 fn parse_quotation(
567 &mut self,
568 start_line: usize,
569 start_column: usize,
570 ) -> Result<Statement, String> {
571 use crate::ast::QuotationSpan;
572 let mut body = Vec::new();
573
574 loop {
576 if self.is_at_end() {
577 return Err("Unexpected end of file in quotation".to_string());
578 }
579
580 self.skip_comments();
582
583 if self.check("]") {
584 let end_tok = self.advance_token().unwrap();
585 let end_line = end_tok.line;
586 let end_column = end_tok.column + 1; let id = self.next_quotation_id;
588 self.next_quotation_id += 1;
589 let span = QuotationSpan::new(start_line, start_column, end_line, end_column);
591 return Ok(Statement::Quotation {
592 id,
593 body,
594 span: Some(span),
595 });
596 }
597
598 body.push(self.parse_statement()?);
599 }
600 }
601
602 fn parse_match(&mut self) -> Result<Statement, String> {
609 let mut arms = Vec::new();
610
611 loop {
612 self.skip_comments();
613
614 if self.check("end") {
616 self.advance();
617 break;
618 }
619
620 if self.is_at_end() {
621 return Err("Unexpected end of file in match expression".to_string());
622 }
623
624 arms.push(self.parse_match_arm()?);
625 }
626
627 if arms.is_empty() {
628 return Err("Match expression must have at least one arm".to_string());
629 }
630
631 Ok(Statement::Match { arms })
632 }
633
634 fn parse_match_arm(&mut self) -> Result<MatchArm, String> {
639 let variant_name = self
641 .advance()
642 .ok_or("Expected variant name in match arm")?
643 .clone();
644
645 self.skip_comments();
646
647 let pattern = if self.check("{") {
649 self.consume("{");
650 let mut bindings = Vec::new();
651
652 loop {
653 self.skip_comments();
654
655 if self.check("}") {
656 break;
657 }
658
659 if self.is_at_end() {
660 return Err(format!(
661 "Unexpected end of file in match arm bindings for '{}'",
662 variant_name
663 ));
664 }
665
666 let token = self.advance().ok_or("Expected binding name")?.clone();
667
668 if let Some(field_name) = token.strip_prefix('>') {
670 if field_name.is_empty() {
671 return Err(format!(
672 "Expected field name after '>' in match bindings for '{}'",
673 variant_name
674 ));
675 }
676 bindings.push(field_name.to_string());
677 } else {
678 return Err(format!(
679 "Match bindings must use '>' prefix to indicate stack extraction. \
680 Use '>{}' instead of '{}' in pattern for '{}'",
681 token, token, variant_name
682 ));
683 }
684 }
685
686 self.consume("}");
687 Pattern::VariantWithBindings {
688 name: variant_name,
689 bindings,
690 }
691 } else {
692 Pattern::Variant(variant_name.clone())
693 };
694
695 self.skip_comments();
696
697 if !self.consume("->") {
699 return Err(format!(
700 "Expected '->' after pattern '{}', got '{}'",
701 match &pattern {
702 Pattern::Variant(n) => n.clone(),
703 Pattern::VariantWithBindings { name, .. } => name.clone(),
704 },
705 self.current()
706 ));
707 }
708
709 let mut body = Vec::new();
711 loop {
712 self.skip_comments();
713
714 if self.check("end") {
716 break;
717 }
718
719 if let Some(token) = self.current_token()
723 && let Some(first_char) = token.text.chars().next()
724 && first_char.is_uppercase()
725 {
726 if let Some(next) = self.peek_at(1)
728 && (next == "->" || next == "{")
729 {
730 break;
732 }
733 }
735
736 if self.is_at_end() {
737 return Err("Unexpected end of file in match arm body".to_string());
738 }
739
740 body.push(self.parse_statement()?);
741 }
742
743 Ok(MatchArm { pattern, body })
744 }
745
746 fn parse_stack_effect(&mut self) -> Result<Effect, String> {
749 if !self.consume("(") {
751 return Err("Expected '(' to start stack effect".to_string());
752 }
753
754 let (input_row_var, input_types) =
756 self.parse_type_list_until(&["--", ")"], "stack effect inputs", 0)?;
757
758 if !self.consume("--") {
760 return Err("Expected '--' separator in stack effect".to_string());
761 }
762
763 let (output_row_var, output_types) =
765 self.parse_type_list_until(&[")", "|"], "stack effect outputs", 0)?;
766
767 let effects = if self.consume("|") {
769 self.parse_effect_annotations()?
770 } else {
771 Vec::new()
772 };
773
774 if !self.consume(")") {
776 return Err("Expected ')' to end stack effect".to_string());
777 }
778
779 let inputs = self.build_stack_type(input_row_var, input_types);
781 let outputs = self.build_stack_type(output_row_var, output_types);
782
783 Ok(Effect::with_effects(inputs, outputs, effects))
784 }
785
786 fn parse_effect_annotations(&mut self) -> Result<Vec<SideEffect>, String> {
789 let mut effects = Vec::new();
790
791 while let Some(token) = self.peek_at(0) {
793 if token == ")" {
794 break;
795 }
796
797 match token {
798 "Yield" => {
799 self.advance(); if let Some(type_token) = self.current_token() {
802 if type_token.text == ")" {
803 return Err("Expected type after 'Yield'".to_string());
804 }
805 let type_token = type_token.clone();
806 self.advance();
807 let yield_type = self.parse_type(&type_token)?;
808 effects.push(SideEffect::Yield(Box::new(yield_type)));
809 } else {
810 return Err("Expected type after 'Yield'".to_string());
811 }
812 }
813 _ => {
814 return Err(format!("Unknown effect '{}'. Expected 'Yield'", token));
815 }
816 }
817 }
818
819 if effects.is_empty() {
820 return Err("Expected at least one effect after '|'".to_string());
821 }
822
823 Ok(effects)
824 }
825
826 fn parse_type(&self, token: &Token) -> Result<Type, String> {
828 match token.text.as_str() {
829 "Int" => Ok(Type::Int),
830 "Float" => Ok(Type::Float),
831 "Bool" => Ok(Type::Bool),
832 "String" => Ok(Type::String),
833 _ => {
834 if let Some(first_char) = token.text.chars().next() {
836 if first_char.is_uppercase() {
837 Ok(Type::Var(token.text.to_string()))
838 } else {
839 Err(format!(
840 "Unknown type: '{}' at line {}, column {}. Expected Int, Bool, String, Closure, or a type variable (uppercase)",
841 token.text.escape_default(),
842 token.line + 1, token.column + 1
844 ))
845 }
846 } else {
847 Err(format!(
848 "Invalid type: '{}' at line {}, column {}",
849 token.text.escape_default(),
850 token.line + 1,
851 token.column + 1
852 ))
853 }
854 }
855 }
856 }
857
858 fn validate_row_var_name(&self, name: &str) -> Result<(), String> {
861 if name.is_empty() {
862 return Err("Row variable must have a name after '..'".to_string());
863 }
864
865 let first_char = name.chars().next().unwrap();
867 if !first_char.is_ascii_lowercase() {
868 return Err(format!(
869 "Row variable '..{}' must start with a lowercase letter (a-z)",
870 name
871 ));
872 }
873
874 for ch in name.chars() {
876 if !ch.is_alphanumeric() && ch != '_' {
877 return Err(format!(
878 "Row variable '..{}' can only contain letters, numbers, and underscores",
879 name
880 ));
881 }
882 }
883
884 match name {
886 "Int" | "Bool" | "String" => {
887 return Err(format!(
888 "Row variable '..{}' cannot use type name as identifier",
889 name
890 ));
891 }
892 _ => {}
893 }
894
895 Ok(())
896 }
897
898 fn parse_type_list_until(
904 &mut self,
905 terminators: &[&str],
906 context: &str,
907 depth: usize,
908 ) -> Result<(Option<String>, Vec<Type>), String> {
909 const MAX_QUOTATION_DEPTH: usize = 32;
910
911 if depth > MAX_QUOTATION_DEPTH {
912 return Err(format!(
913 "Quotation type nesting exceeds maximum depth of {} (possible deeply nested types or DOS attack)",
914 MAX_QUOTATION_DEPTH
915 ));
916 }
917
918 let mut types = Vec::new();
919 let mut row_var = None;
920
921 while !terminators.iter().any(|t| self.check(t)) {
922 self.skip_comments();
924
925 if terminators.iter().any(|t| self.check(t)) {
927 break;
928 }
929
930 if self.is_at_end() {
931 return Err(format!(
932 "Unexpected end while parsing {} - expected one of: {}",
933 context,
934 terminators.join(", ")
935 ));
936 }
937
938 let token = self
939 .advance_token()
940 .ok_or_else(|| format!("Unexpected end in {}", context))?
941 .clone();
942
943 if token.text.starts_with("..") {
945 let var_name = token.text.trim_start_matches("..").to_string();
946 self.validate_row_var_name(&var_name)?;
947 row_var = Some(var_name);
948 } else if token.text == "Closure" {
949 if !self.consume("[") {
951 return Err("Expected '[' after 'Closure' in type signature".to_string());
952 }
953 let effect_type = self.parse_quotation_type(depth)?;
954 match effect_type {
955 Type::Quotation(effect) => {
956 types.push(Type::Closure {
957 effect,
958 captures: Vec::new(), });
960 }
961 _ => unreachable!("parse_quotation_type should return Quotation"),
962 }
963 } else if token.text == "[" {
964 types.push(self.parse_quotation_type(depth)?);
966 } else {
967 types.push(self.parse_type(&token)?);
969 }
970 }
971
972 Ok((row_var, types))
973 }
974
975 fn parse_quotation_type(&mut self, depth: usize) -> Result<Type, String> {
980 let (input_row_var, input_types) =
982 self.parse_type_list_until(&["--", "]"], "quotation type inputs", depth + 1)?;
983
984 if !self.consume("--") {
986 if self.check("]") {
988 return Err(
989 "Quotation types require '--' separator. Did you mean '[Int -- ]' or '[ -- Int]'?"
990 .to_string(),
991 );
992 }
993 return Err("Expected '--' separator in quotation type".to_string());
994 }
995
996 let (output_row_var, output_types) =
998 self.parse_type_list_until(&["]"], "quotation type outputs", depth + 1)?;
999
1000 if !self.consume("]") {
1002 return Err("Expected ']' to end quotation type".to_string());
1003 }
1004
1005 let inputs = self.build_stack_type(input_row_var, input_types);
1007 let outputs = self.build_stack_type(output_row_var, output_types);
1008
1009 Ok(Type::Quotation(Box::new(Effect::new(inputs, outputs))))
1010 }
1011
1012 fn build_stack_type(&self, row_var: Option<String>, types: Vec<Type>) -> StackType {
1022 let base = match row_var {
1024 Some(name) => StackType::RowVar(name),
1025 None => StackType::RowVar("rest".to_string()),
1026 };
1027
1028 types.into_iter().fold(base, |stack, ty| stack.push(ty))
1030 }
1031
1032 fn skip_comments(&mut self) {
1033 loop {
1034 if self.check("#") {
1035 while !self.is_at_end() && self.current() != "\n" {
1037 self.advance();
1038 }
1039 if !self.is_at_end() {
1040 self.advance(); }
1042 } else if self.check("\n") {
1043 self.advance();
1045 } else {
1046 break;
1047 }
1048 }
1049 }
1050
1051 fn check(&self, expected: &str) -> bool {
1052 if self.is_at_end() {
1053 return false;
1054 }
1055 self.current() == expected
1056 }
1057
1058 fn consume(&mut self, expected: &str) -> bool {
1059 if self.check(expected) {
1060 self.advance();
1061 true
1062 } else {
1063 false
1064 }
1065 }
1066
1067 fn current(&self) -> &str {
1069 if self.is_at_end() {
1070 ""
1071 } else {
1072 &self.tokens[self.pos].text
1073 }
1074 }
1075
1076 fn current_token(&self) -> Option<&Token> {
1078 if self.is_at_end() {
1079 None
1080 } else {
1081 Some(&self.tokens[self.pos])
1082 }
1083 }
1084
1085 fn peek_at(&self, n: usize) -> Option<&str> {
1087 let idx = self.pos + n;
1088 if idx < self.tokens.len() {
1089 Some(&self.tokens[idx].text)
1090 } else {
1091 None
1092 }
1093 }
1094
1095 fn advance(&mut self) -> Option<&String> {
1097 if self.is_at_end() {
1098 None
1099 } else {
1100 let token = &self.tokens[self.pos];
1101 self.pos += 1;
1102 Some(&token.text)
1103 }
1104 }
1105
1106 fn advance_token(&mut self) -> Option<&Token> {
1108 if self.is_at_end() {
1109 None
1110 } else {
1111 let token = &self.tokens[self.pos];
1112 self.pos += 1;
1113 Some(token)
1114 }
1115 }
1116
1117 fn is_at_end(&self) -> bool {
1118 self.pos >= self.tokens.len()
1119 }
1120}
1121
1122fn is_float_literal(token: &str) -> bool {
1131 let s = token.strip_prefix('-').unwrap_or(token);
1133
1134 if s.is_empty() {
1136 return false;
1137 }
1138
1139 s.contains('.') || s.contains('e') || s.contains('E')
1141}
1142
1143fn unescape_string(s: &str) -> Result<String, String> {
1155 let mut result = String::new();
1156 let mut chars = s.chars();
1157
1158 while let Some(ch) = chars.next() {
1159 if ch == '\\' {
1160 match chars.next() {
1161 Some('"') => result.push('"'),
1162 Some('\\') => result.push('\\'),
1163 Some('n') => result.push('\n'),
1164 Some('r') => result.push('\r'),
1165 Some('t') => result.push('\t'),
1166 Some(c) => {
1167 return Err(format!(
1168 "Unknown escape sequence '\\{}' in string literal. \
1169 Supported: \\\" \\\\ \\n \\r \\t",
1170 c
1171 ));
1172 }
1173 None => {
1174 return Err("String ends with incomplete escape sequence '\\'".to_string());
1175 }
1176 }
1177 } else {
1178 result.push(ch);
1179 }
1180 }
1181
1182 Ok(result)
1183}
1184
1185fn tokenize(source: &str) -> Vec<Token> {
1186 let mut tokens = Vec::new();
1187 let mut current = String::new();
1188 let mut current_start_line = 0;
1189 let mut current_start_col = 0;
1190 let mut in_string = false;
1191 let mut prev_was_backslash = false;
1192
1193 let mut line = 0;
1195 let mut col = 0;
1196
1197 for ch in source.chars() {
1198 if in_string {
1199 current.push(ch);
1200 if ch == '"' && !prev_was_backslash {
1201 in_string = false;
1203 tokens.push(Token::new(
1204 current.clone(),
1205 current_start_line,
1206 current_start_col,
1207 ));
1208 current.clear();
1209 prev_was_backslash = false;
1210 } else if ch == '\\' && !prev_was_backslash {
1211 prev_was_backslash = true;
1213 } else {
1214 prev_was_backslash = false;
1216 }
1217 if ch == '\n' {
1219 line += 1;
1220 col = 0;
1221 } else {
1222 col += 1;
1223 }
1224 } else if ch == '"' {
1225 if !current.is_empty() {
1226 tokens.push(Token::new(
1227 current.clone(),
1228 current_start_line,
1229 current_start_col,
1230 ));
1231 current.clear();
1232 }
1233 in_string = true;
1234 current_start_line = line;
1235 current_start_col = col;
1236 current.push(ch);
1237 prev_was_backslash = false;
1238 col += 1;
1239 } else if ch.is_whitespace() {
1240 if !current.is_empty() {
1241 tokens.push(Token::new(
1242 current.clone(),
1243 current_start_line,
1244 current_start_col,
1245 ));
1246 current.clear();
1247 }
1248 if ch == '\n' {
1250 tokens.push(Token::new("\n".to_string(), line, col));
1251 line += 1;
1252 col = 0;
1253 } else {
1254 col += 1;
1255 }
1256 } else if "():;[]{},".contains(ch) {
1257 if !current.is_empty() {
1258 tokens.push(Token::new(
1259 current.clone(),
1260 current_start_line,
1261 current_start_col,
1262 ));
1263 current.clear();
1264 }
1265 tokens.push(Token::new(ch.to_string(), line, col));
1266 col += 1;
1267 } else {
1268 if current.is_empty() {
1269 current_start_line = line;
1270 current_start_col = col;
1271 }
1272 current.push(ch);
1273 col += 1;
1274 }
1275 }
1276
1277 if in_string {
1279 tokens.push(Token::new(
1282 "<<<UNCLOSED_STRING>>>".to_string(),
1283 current_start_line,
1284 current_start_col,
1285 ));
1286 } else if !current.is_empty() {
1287 tokens.push(Token::new(current, current_start_line, current_start_col));
1288 }
1289
1290 tokens
1291}
1292
1293#[cfg(test)]
1294mod tests {
1295 use super::*;
1296
1297 #[test]
1298 fn test_parse_hello_world() {
1299 let source = r#"
1300: main ( -- )
1301 "Hello, World!" write_line ;
1302"#;
1303
1304 let mut parser = Parser::new(source);
1305 let program = parser.parse().unwrap();
1306
1307 assert_eq!(program.words.len(), 1);
1308 assert_eq!(program.words[0].name, "main");
1309 assert_eq!(program.words[0].body.len(), 2);
1310
1311 match &program.words[0].body[0] {
1312 Statement::StringLiteral(s) => assert_eq!(s, "Hello, World!"),
1313 _ => panic!("Expected StringLiteral"),
1314 }
1315
1316 match &program.words[0].body[1] {
1317 Statement::WordCall { name, .. } => assert_eq!(name, "write_line"),
1318 _ => panic!("Expected WordCall"),
1319 }
1320 }
1321
1322 #[test]
1323 fn test_parse_with_numbers() {
1324 let source = ": add-example ( -- ) 2 3 add ;";
1325
1326 let mut parser = Parser::new(source);
1327 let program = parser.parse().unwrap();
1328
1329 assert_eq!(program.words[0].body.len(), 3);
1330 assert_eq!(program.words[0].body[0], Statement::IntLiteral(2));
1331 assert_eq!(program.words[0].body[1], Statement::IntLiteral(3));
1332 assert!(matches!(
1333 &program.words[0].body[2],
1334 Statement::WordCall { name, .. } if name == "add"
1335 ));
1336 }
1337
1338 #[test]
1339 fn test_parse_hex_literals() {
1340 let source = ": test ( -- ) 0xFF 0x10 0X1A ;";
1341 let mut parser = Parser::new(source);
1342 let program = parser.parse().unwrap();
1343
1344 assert_eq!(program.words[0].body[0], Statement::IntLiteral(255));
1345 assert_eq!(program.words[0].body[1], Statement::IntLiteral(16));
1346 assert_eq!(program.words[0].body[2], Statement::IntLiteral(26));
1347 }
1348
1349 #[test]
1350 fn test_parse_binary_literals() {
1351 let source = ": test ( -- ) 0b1010 0B1111 0b0 ;";
1352 let mut parser = Parser::new(source);
1353 let program = parser.parse().unwrap();
1354
1355 assert_eq!(program.words[0].body[0], Statement::IntLiteral(10));
1356 assert_eq!(program.words[0].body[1], Statement::IntLiteral(15));
1357 assert_eq!(program.words[0].body[2], Statement::IntLiteral(0));
1358 }
1359
1360 #[test]
1361 fn test_parse_invalid_hex_literal() {
1362 let source = ": test ( -- ) 0xGG ;";
1363 let mut parser = Parser::new(source);
1364 let err = parser.parse().unwrap_err();
1365 assert!(err.contains("Invalid hex literal"));
1366 }
1367
1368 #[test]
1369 fn test_parse_invalid_binary_literal() {
1370 let source = ": test ( -- ) 0b123 ;";
1371 let mut parser = Parser::new(source);
1372 let err = parser.parse().unwrap_err();
1373 assert!(err.contains("Invalid binary literal"));
1374 }
1375
1376 #[test]
1377 fn test_parse_escaped_quotes() {
1378 let source = r#": main ( -- ) "Say \"hello\" there" write_line ;"#;
1379
1380 let mut parser = Parser::new(source);
1381 let program = parser.parse().unwrap();
1382
1383 assert_eq!(program.words.len(), 1);
1384 assert_eq!(program.words[0].body.len(), 2);
1385
1386 match &program.words[0].body[0] {
1387 Statement::StringLiteral(s) => assert_eq!(s, "Say \"hello\" there"),
1389 _ => panic!("Expected StringLiteral with escaped quotes"),
1390 }
1391 }
1392
1393 #[test]
1396 fn test_escaped_quote_at_end_of_string() {
1397 let source = r#": main ( -- ) "hello\"" io.write-line ;"#;
1398
1399 let mut parser = Parser::new(source);
1400 let program = parser.parse().unwrap();
1401
1402 assert_eq!(program.words.len(), 1);
1403 match &program.words[0].body[0] {
1404 Statement::StringLiteral(s) => assert_eq!(s, "hello\""),
1405 _ => panic!("Expected StringLiteral ending with escaped quote"),
1406 }
1407 }
1408
1409 #[test]
1411 fn test_escaped_quote_at_start_of_string() {
1412 let source = r#": main ( -- ) "\"hello" io.write-line ;"#;
1413
1414 let mut parser = Parser::new(source);
1415 let program = parser.parse().unwrap();
1416
1417 match &program.words[0].body[0] {
1418 Statement::StringLiteral(s) => assert_eq!(s, "\"hello"),
1419 _ => panic!("Expected StringLiteral starting with escaped quote"),
1420 }
1421 }
1422
1423 #[test]
1424 fn test_escape_sequences() {
1425 let source = r#": main ( -- ) "Line 1\nLine 2\tTabbed" write_line ;"#;
1426
1427 let mut parser = Parser::new(source);
1428 let program = parser.parse().unwrap();
1429
1430 match &program.words[0].body[0] {
1431 Statement::StringLiteral(s) => assert_eq!(s, "Line 1\nLine 2\tTabbed"),
1432 _ => panic!("Expected StringLiteral"),
1433 }
1434 }
1435
1436 #[test]
1437 fn test_unknown_escape_sequence() {
1438 let source = r#": main ( -- ) "Bad \x sequence" write_line ;"#;
1439
1440 let mut parser = Parser::new(source);
1441 let result = parser.parse();
1442
1443 assert!(result.is_err());
1444 assert!(result.unwrap_err().contains("Unknown escape sequence"));
1445 }
1446
1447 #[test]
1448 fn test_unclosed_string_literal() {
1449 let source = r#": main ( -- ) "unclosed string ;"#;
1450
1451 let mut parser = Parser::new(source);
1452 let result = parser.parse();
1453
1454 assert!(result.is_err());
1455 let err_msg = result.unwrap_err();
1456 assert!(err_msg.contains("Unclosed string literal"));
1457 assert!(
1459 err_msg.contains("line 1"),
1460 "Expected line number in error: {}",
1461 err_msg
1462 );
1463 assert!(
1464 err_msg.contains("column 15"),
1465 "Expected column number in error: {}",
1466 err_msg
1467 );
1468 }
1469
1470 #[test]
1471 fn test_multiple_word_definitions() {
1472 let source = r#"
1473: double ( Int -- Int )
1474 2 multiply ;
1475
1476: quadruple ( Int -- Int )
1477 double double ;
1478"#;
1479
1480 let mut parser = Parser::new(source);
1481 let program = parser.parse().unwrap();
1482
1483 assert_eq!(program.words.len(), 2);
1484 assert_eq!(program.words[0].name, "double");
1485 assert_eq!(program.words[1].name, "quadruple");
1486
1487 assert!(program.words[0].effect.is_some());
1489 assert!(program.words[1].effect.is_some());
1490 }
1491
1492 #[test]
1493 fn test_user_word_calling_user_word() {
1494 let source = r#"
1495: helper ( -- )
1496 "helper called" write_line ;
1497
1498: main ( -- )
1499 helper ;
1500"#;
1501
1502 let mut parser = Parser::new(source);
1503 let program = parser.parse().unwrap();
1504
1505 assert_eq!(program.words.len(), 2);
1506
1507 match &program.words[1].body[0] {
1509 Statement::WordCall { name, .. } => assert_eq!(name, "helper"),
1510 _ => panic!("Expected WordCall to helper"),
1511 }
1512 }
1513
1514 #[test]
1515 fn test_parse_simple_stack_effect() {
1516 let source = ": test ( Int -- Bool ) 1 ;";
1519 let mut parser = Parser::new(source);
1520 let program = parser.parse().unwrap();
1521
1522 assert_eq!(program.words.len(), 1);
1523 let word = &program.words[0];
1524 assert!(word.effect.is_some());
1525
1526 let effect = word.effect.as_ref().unwrap();
1527
1528 assert_eq!(
1530 effect.inputs,
1531 StackType::Cons {
1532 rest: Box::new(StackType::RowVar("rest".to_string())),
1533 top: Type::Int
1534 }
1535 );
1536
1537 assert_eq!(
1539 effect.outputs,
1540 StackType::Cons {
1541 rest: Box::new(StackType::RowVar("rest".to_string())),
1542 top: Type::Bool
1543 }
1544 );
1545 }
1546
1547 #[test]
1548 fn test_parse_row_polymorphic_stack_effect() {
1549 let source = ": test ( ..a Int -- ..a Bool ) 1 ;";
1551 let mut parser = Parser::new(source);
1552 let program = parser.parse().unwrap();
1553
1554 assert_eq!(program.words.len(), 1);
1555 let word = &program.words[0];
1556 assert!(word.effect.is_some());
1557
1558 let effect = word.effect.as_ref().unwrap();
1559
1560 assert_eq!(
1562 effect.inputs,
1563 StackType::Cons {
1564 rest: Box::new(StackType::RowVar("a".to_string())),
1565 top: Type::Int
1566 }
1567 );
1568
1569 assert_eq!(
1571 effect.outputs,
1572 StackType::Cons {
1573 rest: Box::new(StackType::RowVar("a".to_string())),
1574 top: Type::Bool
1575 }
1576 );
1577 }
1578
1579 #[test]
1580 fn test_parse_invalid_row_var_starts_with_digit() {
1581 let source = ": test ( ..123 Int -- ) ;";
1583 let mut parser = Parser::new(source);
1584 let result = parser.parse();
1585
1586 assert!(result.is_err());
1587 let err_msg = result.unwrap_err();
1588 assert!(
1589 err_msg.contains("lowercase letter"),
1590 "Expected error about lowercase letter, got: {}",
1591 err_msg
1592 );
1593 }
1594
1595 #[test]
1596 fn test_parse_invalid_row_var_starts_with_uppercase() {
1597 let source = ": test ( ..Int Int -- ) ;";
1599 let mut parser = Parser::new(source);
1600 let result = parser.parse();
1601
1602 assert!(result.is_err());
1603 let err_msg = result.unwrap_err();
1604 assert!(
1605 err_msg.contains("lowercase letter") || err_msg.contains("type name"),
1606 "Expected error about lowercase letter or type name, got: {}",
1607 err_msg
1608 );
1609 }
1610
1611 #[test]
1612 fn test_parse_invalid_row_var_with_special_chars() {
1613 let source = ": test ( ..a-b Int -- ) ;";
1615 let mut parser = Parser::new(source);
1616 let result = parser.parse();
1617
1618 assert!(result.is_err());
1619 let err_msg = result.unwrap_err();
1620 assert!(
1621 err_msg.contains("letters, numbers, and underscores")
1622 || err_msg.contains("Unknown type"),
1623 "Expected error about valid characters, got: {}",
1624 err_msg
1625 );
1626 }
1627
1628 #[test]
1629 fn test_parse_valid_row_var_with_underscore() {
1630 let source = ": test ( ..my_row Int -- ..my_row Bool ) ;";
1632 let mut parser = Parser::new(source);
1633 let result = parser.parse();
1634
1635 assert!(result.is_ok(), "Should accept row variable with underscore");
1636 }
1637
1638 #[test]
1639 fn test_parse_multiple_types_stack_effect() {
1640 let source = ": test ( Int String -- Bool ) 1 ;";
1643 let mut parser = Parser::new(source);
1644 let program = parser.parse().unwrap();
1645
1646 let effect = program.words[0].effect.as_ref().unwrap();
1647
1648 let (rest, top) = effect.inputs.clone().pop().unwrap();
1650 assert_eq!(top, Type::String);
1651 let (rest2, top2) = rest.pop().unwrap();
1652 assert_eq!(top2, Type::Int);
1653 assert_eq!(rest2, StackType::RowVar("rest".to_string()));
1654
1655 assert_eq!(
1657 effect.outputs,
1658 StackType::Cons {
1659 rest: Box::new(StackType::RowVar("rest".to_string())),
1660 top: Type::Bool
1661 }
1662 );
1663 }
1664
1665 #[test]
1666 fn test_parse_type_variable() {
1667 let source = ": dup ( ..a T -- ..a T T ) ;";
1669 let mut parser = Parser::new(source);
1670 let program = parser.parse().unwrap();
1671
1672 let effect = program.words[0].effect.as_ref().unwrap();
1673
1674 assert_eq!(
1676 effect.inputs,
1677 StackType::Cons {
1678 rest: Box::new(StackType::RowVar("a".to_string())),
1679 top: Type::Var("T".to_string())
1680 }
1681 );
1682
1683 let (rest, top) = effect.outputs.clone().pop().unwrap();
1685 assert_eq!(top, Type::Var("T".to_string()));
1686 let (rest2, top2) = rest.pop().unwrap();
1687 assert_eq!(top2, Type::Var("T".to_string()));
1688 assert_eq!(rest2, StackType::RowVar("a".to_string()));
1689 }
1690
1691 #[test]
1692 fn test_parse_empty_stack_effect() {
1693 let source = ": test ( -- ) ;";
1697 let mut parser = Parser::new(source);
1698 let program = parser.parse().unwrap();
1699
1700 let effect = program.words[0].effect.as_ref().unwrap();
1701
1702 assert_eq!(effect.inputs, StackType::RowVar("rest".to_string()));
1704 assert_eq!(effect.outputs, StackType::RowVar("rest".to_string()));
1705 }
1706
1707 #[test]
1708 fn test_parse_invalid_type() {
1709 let source = ": test ( invalid -- Bool ) ;";
1711 let mut parser = Parser::new(source);
1712 let result = parser.parse();
1713
1714 assert!(result.is_err());
1715 assert!(result.unwrap_err().contains("Unknown type"));
1716 }
1717
1718 #[test]
1719 fn test_parse_unclosed_stack_effect() {
1720 let source = ": test ( Int -- Bool body ;";
1723 let mut parser = Parser::new(source);
1724 let result = parser.parse();
1725
1726 assert!(result.is_err());
1727 let err_msg = result.unwrap_err();
1728 assert!(err_msg.contains("Unknown type"));
1730 }
1731
1732 #[test]
1733 fn test_parse_simple_quotation_type() {
1734 let source = ": apply ( [Int -- Int] -- ) ;";
1736 let mut parser = Parser::new(source);
1737 let program = parser.parse().unwrap();
1738
1739 let effect = program.words[0].effect.as_ref().unwrap();
1740
1741 let (rest, top) = effect.inputs.clone().pop().unwrap();
1743 match top {
1744 Type::Quotation(quot_effect) => {
1745 assert_eq!(
1747 quot_effect.inputs,
1748 StackType::Cons {
1749 rest: Box::new(StackType::RowVar("rest".to_string())),
1750 top: Type::Int
1751 }
1752 );
1753 assert_eq!(
1755 quot_effect.outputs,
1756 StackType::Cons {
1757 rest: Box::new(StackType::RowVar("rest".to_string())),
1758 top: Type::Int
1759 }
1760 );
1761 }
1762 _ => panic!("Expected Quotation type, got {:?}", top),
1763 }
1764 assert_eq!(rest, StackType::RowVar("rest".to_string()));
1765 }
1766
1767 #[test]
1768 fn test_parse_quotation_type_with_row_vars() {
1769 let source = ": test ( ..a [..a T -- ..a Bool] -- ..a ) ;";
1771 let mut parser = Parser::new(source);
1772 let program = parser.parse().unwrap();
1773
1774 let effect = program.words[0].effect.as_ref().unwrap();
1775
1776 let (rest, top) = effect.inputs.clone().pop().unwrap();
1778 match top {
1779 Type::Quotation(quot_effect) => {
1780 let (q_in_rest, q_in_top) = quot_effect.inputs.clone().pop().unwrap();
1782 assert_eq!(q_in_top, Type::Var("T".to_string()));
1783 assert_eq!(q_in_rest, StackType::RowVar("a".to_string()));
1784
1785 let (q_out_rest, q_out_top) = quot_effect.outputs.clone().pop().unwrap();
1787 assert_eq!(q_out_top, Type::Bool);
1788 assert_eq!(q_out_rest, StackType::RowVar("a".to_string()));
1789 }
1790 _ => panic!("Expected Quotation type, got {:?}", top),
1791 }
1792 assert_eq!(rest, StackType::RowVar("a".to_string()));
1793 }
1794
1795 #[test]
1796 fn test_parse_nested_quotation_type() {
1797 let source = ": nested ( [[Int -- Int] -- Bool] -- ) ;";
1799 let mut parser = Parser::new(source);
1800 let program = parser.parse().unwrap();
1801
1802 let effect = program.words[0].effect.as_ref().unwrap();
1803
1804 let (_, top) = effect.inputs.clone().pop().unwrap();
1806 match top {
1807 Type::Quotation(outer_effect) => {
1808 let (_, outer_in_top) = outer_effect.inputs.clone().pop().unwrap();
1810 match outer_in_top {
1811 Type::Quotation(inner_effect) => {
1812 assert!(matches!(
1814 inner_effect.inputs.clone().pop().unwrap().1,
1815 Type::Int
1816 ));
1817 assert!(matches!(
1818 inner_effect.outputs.clone().pop().unwrap().1,
1819 Type::Int
1820 ));
1821 }
1822 _ => panic!("Expected nested Quotation type"),
1823 }
1824
1825 let (_, outer_out_top) = outer_effect.outputs.clone().pop().unwrap();
1827 assert_eq!(outer_out_top, Type::Bool);
1828 }
1829 _ => panic!("Expected Quotation type"),
1830 }
1831 }
1832
1833 #[test]
1834 fn test_parse_deeply_nested_quotation_type_exceeds_limit() {
1835 let mut source = String::from(": deep ( ");
1838
1839 for _ in 0..35 {
1841 source.push_str("[ -- ");
1842 }
1843
1844 source.push_str("Int");
1845
1846 for _ in 0..35 {
1848 source.push_str(" ]");
1849 }
1850
1851 source.push_str(" -- ) ;");
1852
1853 let mut parser = Parser::new(&source);
1854 let result = parser.parse();
1855
1856 assert!(result.is_err());
1858 let err_msg = result.unwrap_err();
1859 assert!(
1860 err_msg.contains("depth") || err_msg.contains("32"),
1861 "Expected depth limit error, got: {}",
1862 err_msg
1863 );
1864 }
1865
1866 #[test]
1867 fn test_parse_empty_quotation_type() {
1868 let source = ": empty-quot ( [ -- ] -- ) ;";
1871 let mut parser = Parser::new(source);
1872 let program = parser.parse().unwrap();
1873
1874 let effect = program.words[0].effect.as_ref().unwrap();
1875
1876 let (_, top) = effect.inputs.clone().pop().unwrap();
1877 match top {
1878 Type::Quotation(quot_effect) => {
1879 assert_eq!(quot_effect.inputs, StackType::RowVar("rest".to_string()));
1881 assert_eq!(quot_effect.outputs, StackType::RowVar("rest".to_string()));
1882 }
1883 _ => panic!("Expected Quotation type"),
1884 }
1885 }
1886
1887 #[test]
1888 fn test_parse_quotation_type_in_output() {
1889 let source = ": maker ( -- [Int -- Int] ) ;";
1891 let mut parser = Parser::new(source);
1892 let program = parser.parse().unwrap();
1893
1894 let effect = program.words[0].effect.as_ref().unwrap();
1895
1896 let (_, top) = effect.outputs.clone().pop().unwrap();
1898 match top {
1899 Type::Quotation(quot_effect) => {
1900 assert!(matches!(
1901 quot_effect.inputs.clone().pop().unwrap().1,
1902 Type::Int
1903 ));
1904 assert!(matches!(
1905 quot_effect.outputs.clone().pop().unwrap().1,
1906 Type::Int
1907 ));
1908 }
1909 _ => panic!("Expected Quotation type"),
1910 }
1911 }
1912
1913 #[test]
1914 fn test_parse_unclosed_quotation_type() {
1915 let source = ": broken ( [Int -- Int -- ) ;";
1917 let mut parser = Parser::new(source);
1918 let result = parser.parse();
1919
1920 assert!(result.is_err());
1921 let err_msg = result.unwrap_err();
1922 assert!(
1925 err_msg.contains("Unclosed")
1926 || err_msg.contains("Expected")
1927 || err_msg.contains("Unexpected"),
1928 "Got error: {}",
1929 err_msg
1930 );
1931 }
1932
1933 #[test]
1934 fn test_parse_multiple_quotation_types() {
1935 let source = ": multi ( [Int -- Int] [String -- Bool] -- ) ;";
1937 let mut parser = Parser::new(source);
1938 let program = parser.parse().unwrap();
1939
1940 let effect = program.words[0].effect.as_ref().unwrap();
1941
1942 let (rest, top) = effect.inputs.clone().pop().unwrap();
1944 match top {
1945 Type::Quotation(quot_effect) => {
1946 assert!(matches!(
1947 quot_effect.inputs.clone().pop().unwrap().1,
1948 Type::String
1949 ));
1950 assert!(matches!(
1951 quot_effect.outputs.clone().pop().unwrap().1,
1952 Type::Bool
1953 ));
1954 }
1955 _ => panic!("Expected Quotation type"),
1956 }
1957
1958 let (_, top2) = rest.pop().unwrap();
1960 match top2 {
1961 Type::Quotation(quot_effect) => {
1962 assert!(matches!(
1963 quot_effect.inputs.clone().pop().unwrap().1,
1964 Type::Int
1965 ));
1966 assert!(matches!(
1967 quot_effect.outputs.clone().pop().unwrap().1,
1968 Type::Int
1969 ));
1970 }
1971 _ => panic!("Expected Quotation type"),
1972 }
1973 }
1974
1975 #[test]
1976 fn test_parse_quotation_type_without_separator() {
1977 let source = ": consumer ( [Int] -- ) ;";
1988 let mut parser = Parser::new(source);
1989 let result = parser.parse();
1990
1991 assert!(result.is_err());
1993 let err_msg = result.unwrap_err();
1994 assert!(
1995 err_msg.contains("require") && err_msg.contains("--"),
1996 "Expected error about missing '--' separator, got: {}",
1997 err_msg
1998 );
1999 }
2000
2001 #[test]
2002 fn test_parse_no_stack_effect() {
2003 let source = ": test 1 2 add ;";
2005 let mut parser = Parser::new(source);
2006 let program = parser.parse().unwrap();
2007
2008 assert_eq!(program.words.len(), 1);
2009 assert!(program.words[0].effect.is_none());
2010 }
2011
2012 #[test]
2013 fn test_parse_simple_quotation() {
2014 let source = r#"
2015: test ( -- Quot )
2016 [ 1 add ] ;
2017"#;
2018
2019 let mut parser = Parser::new(source);
2020 let program = parser.parse().unwrap();
2021
2022 assert_eq!(program.words.len(), 1);
2023 assert_eq!(program.words[0].name, "test");
2024 assert_eq!(program.words[0].body.len(), 1);
2025
2026 match &program.words[0].body[0] {
2027 Statement::Quotation { body, .. } => {
2028 assert_eq!(body.len(), 2);
2029 assert_eq!(body[0], Statement::IntLiteral(1));
2030 assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "add"));
2031 }
2032 _ => panic!("Expected Quotation statement"),
2033 }
2034 }
2035
2036 #[test]
2037 fn test_parse_empty_quotation() {
2038 let source = ": test [ ] ;";
2039
2040 let mut parser = Parser::new(source);
2041 let program = parser.parse().unwrap();
2042
2043 assert_eq!(program.words.len(), 1);
2044
2045 match &program.words[0].body[0] {
2046 Statement::Quotation { body, .. } => {
2047 assert_eq!(body.len(), 0);
2048 }
2049 _ => panic!("Expected Quotation statement"),
2050 }
2051 }
2052
2053 #[test]
2054 fn test_parse_quotation_with_call() {
2055 let source = r#"
2056: test ( -- )
2057 5 [ 1 add ] call ;
2058"#;
2059
2060 let mut parser = Parser::new(source);
2061 let program = parser.parse().unwrap();
2062
2063 assert_eq!(program.words.len(), 1);
2064 assert_eq!(program.words[0].body.len(), 3);
2065
2066 assert_eq!(program.words[0].body[0], Statement::IntLiteral(5));
2067
2068 match &program.words[0].body[1] {
2069 Statement::Quotation { body, .. } => {
2070 assert_eq!(body.len(), 2);
2071 }
2072 _ => panic!("Expected Quotation"),
2073 }
2074
2075 assert!(matches!(
2076 &program.words[0].body[2],
2077 Statement::WordCall { name, .. } if name == "call"
2078 ));
2079 }
2080
2081 #[test]
2082 fn test_parse_nested_quotation() {
2083 let source = ": test [ [ 1 add ] call ] ;";
2084
2085 let mut parser = Parser::new(source);
2086 let program = parser.parse().unwrap();
2087
2088 assert_eq!(program.words.len(), 1);
2089
2090 match &program.words[0].body[0] {
2091 Statement::Quotation {
2092 body: outer_body, ..
2093 } => {
2094 assert_eq!(outer_body.len(), 2);
2095
2096 match &outer_body[0] {
2097 Statement::Quotation {
2098 body: inner_body, ..
2099 } => {
2100 assert_eq!(inner_body.len(), 2);
2101 assert_eq!(inner_body[0], Statement::IntLiteral(1));
2102 assert!(
2103 matches!(&inner_body[1], Statement::WordCall { name, .. } if name == "add")
2104 );
2105 }
2106 _ => panic!("Expected nested Quotation"),
2107 }
2108
2109 assert!(
2110 matches!(&outer_body[1], Statement::WordCall { name, .. } if name == "call")
2111 );
2112 }
2113 _ => panic!("Expected Quotation"),
2114 }
2115 }
2116
2117 #[test]
2118 fn test_parse_while_with_quotations() {
2119 let source = r#"
2120: countdown ( Int -- )
2121 [ dup 0 > ] [ 1 subtract ] while drop ;
2122"#;
2123
2124 let mut parser = Parser::new(source);
2125 let program = parser.parse().unwrap();
2126
2127 assert_eq!(program.words.len(), 1);
2128 assert_eq!(program.words[0].body.len(), 4);
2129
2130 match &program.words[0].body[0] {
2132 Statement::Quotation { body: pred, .. } => {
2133 assert_eq!(pred.len(), 3);
2134 assert!(matches!(&pred[0], Statement::WordCall { name, .. } if name == "dup"));
2135 assert_eq!(pred[1], Statement::IntLiteral(0));
2136 assert!(matches!(&pred[2], Statement::WordCall { name, .. } if name == ">"));
2137 }
2138 _ => panic!("Expected predicate quotation"),
2139 }
2140
2141 match &program.words[0].body[1] {
2143 Statement::Quotation { body, .. } => {
2144 assert_eq!(body.len(), 2);
2145 assert_eq!(body[0], Statement::IntLiteral(1));
2146 assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "subtract"));
2147 }
2148 _ => panic!("Expected body quotation"),
2149 }
2150
2151 assert!(matches!(
2153 &program.words[0].body[2],
2154 Statement::WordCall { name, .. } if name == "while"
2155 ));
2156
2157 assert!(matches!(
2159 &program.words[0].body[3],
2160 Statement::WordCall { name, .. } if name == "drop"
2161 ));
2162 }
2163
2164 #[test]
2165 fn test_parse_simple_closure_type() {
2166 let source = ": make-adder ( Int -- Closure[Int -- Int] ) ;";
2168 let mut parser = Parser::new(source);
2169 let program = parser.parse().unwrap();
2170
2171 assert_eq!(program.words.len(), 1);
2172 let word = &program.words[0];
2173 assert!(word.effect.is_some());
2174
2175 let effect = word.effect.as_ref().unwrap();
2176
2177 let (input_rest, input_top) = effect.inputs.clone().pop().unwrap();
2179 assert_eq!(input_top, Type::Int);
2180 assert_eq!(input_rest, StackType::RowVar("rest".to_string()));
2181
2182 let (output_rest, output_top) = effect.outputs.clone().pop().unwrap();
2184 match output_top {
2185 Type::Closure { effect, captures } => {
2186 assert_eq!(
2188 effect.inputs,
2189 StackType::Cons {
2190 rest: Box::new(StackType::RowVar("rest".to_string())),
2191 top: Type::Int
2192 }
2193 );
2194 assert_eq!(
2195 effect.outputs,
2196 StackType::Cons {
2197 rest: Box::new(StackType::RowVar("rest".to_string())),
2198 top: Type::Int
2199 }
2200 );
2201 assert_eq!(captures.len(), 0);
2203 }
2204 _ => panic!("Expected Closure type, got {:?}", output_top),
2205 }
2206 assert_eq!(output_rest, StackType::RowVar("rest".to_string()));
2207 }
2208
2209 #[test]
2210 fn test_parse_closure_type_with_row_vars() {
2211 let source = ": make-handler ( ..a Config -- ..a Closure[Request -- Response] ) ;";
2213 let mut parser = Parser::new(source);
2214 let program = parser.parse().unwrap();
2215
2216 let effect = program.words[0].effect.as_ref().unwrap();
2217
2218 let (rest, top) = effect.outputs.clone().pop().unwrap();
2220 match top {
2221 Type::Closure { effect, .. } => {
2222 let (_, in_top) = effect.inputs.clone().pop().unwrap();
2224 assert_eq!(in_top, Type::Var("Request".to_string()));
2225 let (_, out_top) = effect.outputs.clone().pop().unwrap();
2226 assert_eq!(out_top, Type::Var("Response".to_string()));
2227 }
2228 _ => panic!("Expected Closure type"),
2229 }
2230 assert_eq!(rest, StackType::RowVar("a".to_string()));
2231 }
2232
2233 #[test]
2234 fn test_parse_closure_type_missing_bracket() {
2235 let source = ": broken ( Int -- Closure ) ;";
2237 let mut parser = Parser::new(source);
2238 let result = parser.parse();
2239
2240 assert!(result.is_err());
2241 let err_msg = result.unwrap_err();
2242 assert!(
2243 err_msg.contains("[") && err_msg.contains("Closure"),
2244 "Expected error about missing '[' after Closure, got: {}",
2245 err_msg
2246 );
2247 }
2248
2249 #[test]
2250 fn test_parse_closure_type_in_input() {
2251 let source = ": apply-closure ( Closure[Int -- Int] -- ) ;";
2253 let mut parser = Parser::new(source);
2254 let program = parser.parse().unwrap();
2255
2256 let effect = program.words[0].effect.as_ref().unwrap();
2257
2258 let (_, top) = effect.inputs.clone().pop().unwrap();
2260 match top {
2261 Type::Closure { effect, .. } => {
2262 assert!(matches!(effect.inputs.clone().pop().unwrap().1, Type::Int));
2264 assert!(matches!(effect.outputs.clone().pop().unwrap().1, Type::Int));
2265 }
2266 _ => panic!("Expected Closure type in input"),
2267 }
2268 }
2269
2270 #[test]
2273 fn test_token_position_single_line() {
2274 let source = ": main ( -- ) ;";
2276 let tokens = tokenize(source);
2277
2278 assert_eq!(tokens[0].text, ":");
2280 assert_eq!(tokens[0].line, 0);
2281 assert_eq!(tokens[0].column, 0);
2282
2283 assert_eq!(tokens[1].text, "main");
2285 assert_eq!(tokens[1].line, 0);
2286 assert_eq!(tokens[1].column, 2);
2287
2288 assert_eq!(tokens[2].text, "(");
2290 assert_eq!(tokens[2].line, 0);
2291 assert_eq!(tokens[2].column, 7);
2292 }
2293
2294 #[test]
2295 fn test_token_position_multiline() {
2296 let source = ": main ( -- )\n 42\n;";
2298 let tokens = tokenize(source);
2299
2300 let token_42 = tokens.iter().find(|t| t.text == "42").unwrap();
2302 assert_eq!(token_42.line, 1);
2303 assert_eq!(token_42.column, 2); let token_semi = tokens.iter().find(|t| t.text == ";").unwrap();
2307 assert_eq!(token_semi.line, 2);
2308 assert_eq!(token_semi.column, 0);
2309 }
2310
2311 #[test]
2312 fn test_word_def_source_location_span() {
2313 let source = r#": helper ( -- )
2315 "hello"
2316 write_line
2317;
2318
2319: main ( -- )
2320 helper
2321;"#;
2322
2323 let mut parser = Parser::new(source);
2324 let program = parser.parse().unwrap();
2325
2326 assert_eq!(program.words.len(), 2);
2327
2328 let helper = &program.words[0];
2330 assert_eq!(helper.name, "helper");
2331 let helper_source = helper.source.as_ref().unwrap();
2332 assert_eq!(helper_source.start_line, 0);
2333 assert_eq!(helper_source.end_line, 3);
2334
2335 let main_word = &program.words[1];
2337 assert_eq!(main_word.name, "main");
2338 let main_source = main_word.source.as_ref().unwrap();
2339 assert_eq!(main_source.start_line, 5);
2340 assert_eq!(main_source.end_line, 7);
2341 }
2342
2343 #[test]
2344 fn test_token_position_string_with_newline() {
2345 let source = "\"line1\\nline2\"";
2347 let tokens = tokenize(source);
2348
2349 assert_eq!(tokens.len(), 1);
2351 assert_eq!(tokens[0].line, 0);
2352 assert_eq!(tokens[0].column, 0);
2353 }
2354
2355 #[test]
2360 fn test_parse_simple_union() {
2361 let source = r#"
2362union Message {
2363 Get { response-chan: Int }
2364 Set { value: Int }
2365}
2366
2367: main ( -- ) ;
2368"#;
2369
2370 let mut parser = Parser::new(source);
2371 let program = parser.parse().unwrap();
2372
2373 assert_eq!(program.unions.len(), 1);
2374 let union_def = &program.unions[0];
2375 assert_eq!(union_def.name, "Message");
2376 assert_eq!(union_def.variants.len(), 2);
2377
2378 assert_eq!(union_def.variants[0].name, "Get");
2380 assert_eq!(union_def.variants[0].fields.len(), 1);
2381 assert_eq!(union_def.variants[0].fields[0].name, "response-chan");
2382 assert_eq!(union_def.variants[0].fields[0].type_name, "Int");
2383
2384 assert_eq!(union_def.variants[1].name, "Set");
2386 assert_eq!(union_def.variants[1].fields.len(), 1);
2387 assert_eq!(union_def.variants[1].fields[0].name, "value");
2388 assert_eq!(union_def.variants[1].fields[0].type_name, "Int");
2389 }
2390
2391 #[test]
2392 fn test_parse_union_with_multiple_fields() {
2393 let source = r#"
2394union Report {
2395 Data { op: Int, delta: Int, total: Int }
2396 Empty
2397}
2398
2399: main ( -- ) ;
2400"#;
2401
2402 let mut parser = Parser::new(source);
2403 let program = parser.parse().unwrap();
2404
2405 assert_eq!(program.unions.len(), 1);
2406 let union_def = &program.unions[0];
2407 assert_eq!(union_def.name, "Report");
2408 assert_eq!(union_def.variants.len(), 2);
2409
2410 let data_variant = &union_def.variants[0];
2412 assert_eq!(data_variant.name, "Data");
2413 assert_eq!(data_variant.fields.len(), 3);
2414 assert_eq!(data_variant.fields[0].name, "op");
2415 assert_eq!(data_variant.fields[1].name, "delta");
2416 assert_eq!(data_variant.fields[2].name, "total");
2417
2418 let empty_variant = &union_def.variants[1];
2420 assert_eq!(empty_variant.name, "Empty");
2421 assert_eq!(empty_variant.fields.len(), 0);
2422 }
2423
2424 #[test]
2425 fn test_parse_union_lowercase_name_error() {
2426 let source = r#"
2427union message {
2428 Get { }
2429}
2430"#;
2431
2432 let mut parser = Parser::new(source);
2433 let result = parser.parse();
2434 assert!(result.is_err());
2435 assert!(result.unwrap_err().contains("uppercase"));
2436 }
2437
2438 #[test]
2439 fn test_parse_union_empty_error() {
2440 let source = r#"
2441union Message {
2442}
2443"#;
2444
2445 let mut parser = Parser::new(source);
2446 let result = parser.parse();
2447 assert!(result.is_err());
2448 assert!(result.unwrap_err().contains("at least one variant"));
2449 }
2450
2451 #[test]
2452 fn test_parse_union_duplicate_variant_error() {
2453 let source = r#"
2454union Message {
2455 Get { x: Int }
2456 Get { y: String }
2457}
2458"#;
2459
2460 let mut parser = Parser::new(source);
2461 let result = parser.parse();
2462 assert!(result.is_err());
2463 let err = result.unwrap_err();
2464 assert!(err.contains("Duplicate variant name"));
2465 assert!(err.contains("Get"));
2466 }
2467
2468 #[test]
2469 fn test_parse_union_duplicate_field_error() {
2470 let source = r#"
2471union Data {
2472 Record { x: Int, x: String }
2473}
2474"#;
2475
2476 let mut parser = Parser::new(source);
2477 let result = parser.parse();
2478 assert!(result.is_err());
2479 let err = result.unwrap_err();
2480 assert!(err.contains("Duplicate field name"));
2481 assert!(err.contains("x"));
2482 }
2483
2484 #[test]
2485 fn test_parse_simple_match() {
2486 let source = r#"
2487: handle ( -- )
2488 match
2489 Get -> send-response
2490 Set -> process-set
2491 end
2492;
2493"#;
2494
2495 let mut parser = Parser::new(source);
2496 let program = parser.parse().unwrap();
2497
2498 assert_eq!(program.words.len(), 1);
2499 assert_eq!(program.words[0].body.len(), 1);
2500
2501 match &program.words[0].body[0] {
2502 Statement::Match { arms } => {
2503 assert_eq!(arms.len(), 2);
2504
2505 match &arms[0].pattern {
2507 Pattern::Variant(name) => assert_eq!(name, "Get"),
2508 _ => panic!("Expected Variant pattern"),
2509 }
2510 assert_eq!(arms[0].body.len(), 1);
2511
2512 match &arms[1].pattern {
2514 Pattern::Variant(name) => assert_eq!(name, "Set"),
2515 _ => panic!("Expected Variant pattern"),
2516 }
2517 assert_eq!(arms[1].body.len(), 1);
2518 }
2519 _ => panic!("Expected Match statement"),
2520 }
2521 }
2522
2523 #[test]
2524 fn test_parse_match_with_bindings() {
2525 let source = r#"
2526: handle ( -- )
2527 match
2528 Get { >chan } -> chan send-response
2529 Report { >delta >total } -> delta total process
2530 end
2531;
2532"#;
2533
2534 let mut parser = Parser::new(source);
2535 let program = parser.parse().unwrap();
2536
2537 assert_eq!(program.words.len(), 1);
2538
2539 match &program.words[0].body[0] {
2540 Statement::Match { arms } => {
2541 assert_eq!(arms.len(), 2);
2542
2543 match &arms[0].pattern {
2545 Pattern::VariantWithBindings { name, bindings } => {
2546 assert_eq!(name, "Get");
2547 assert_eq!(bindings.len(), 1);
2548 assert_eq!(bindings[0], "chan");
2549 }
2550 _ => panic!("Expected VariantWithBindings pattern"),
2551 }
2552
2553 match &arms[1].pattern {
2555 Pattern::VariantWithBindings { name, bindings } => {
2556 assert_eq!(name, "Report");
2557 assert_eq!(bindings.len(), 2);
2558 assert_eq!(bindings[0], "delta");
2559 assert_eq!(bindings[1], "total");
2560 }
2561 _ => panic!("Expected VariantWithBindings pattern"),
2562 }
2563 }
2564 _ => panic!("Expected Match statement"),
2565 }
2566 }
2567
2568 #[test]
2569 fn test_parse_match_bindings_require_prefix() {
2570 let source = r#"
2572: handle ( -- )
2573 match
2574 Get { chan } -> chan send-response
2575 end
2576;
2577"#;
2578
2579 let mut parser = Parser::new(source);
2580 let result = parser.parse();
2581 assert!(result.is_err());
2582 let err = result.unwrap_err();
2583 assert!(err.contains(">chan"));
2584 assert!(err.contains("stack extraction"));
2585 }
2586
2587 #[test]
2588 fn test_parse_match_with_body_statements() {
2589 let source = r#"
2590: handle ( -- )
2591 match
2592 Get -> 1 2 add send-response
2593 Set -> process-value store
2594 end
2595;
2596"#;
2597
2598 let mut parser = Parser::new(source);
2599 let program = parser.parse().unwrap();
2600
2601 match &program.words[0].body[0] {
2602 Statement::Match { arms } => {
2603 assert_eq!(arms[0].body.len(), 4);
2605 assert_eq!(arms[0].body[0], Statement::IntLiteral(1));
2606 assert_eq!(arms[0].body[1], Statement::IntLiteral(2));
2607 assert!(
2608 matches!(&arms[0].body[2], Statement::WordCall { name, .. } if name == "add")
2609 );
2610
2611 assert_eq!(arms[1].body.len(), 2);
2613 }
2614 _ => panic!("Expected Match statement"),
2615 }
2616 }
2617
2618 #[test]
2619 fn test_parse_match_empty_error() {
2620 let source = r#"
2621: handle ( -- )
2622 match
2623 end
2624;
2625"#;
2626
2627 let mut parser = Parser::new(source);
2628 let result = parser.parse();
2629 assert!(result.is_err());
2630 assert!(result.unwrap_err().contains("at least one arm"));
2631 }
2632
2633 #[test]
2634 fn test_parse_symbol_literal() {
2635 let source = r#"
2636: main ( -- )
2637 :hello drop
2638;
2639"#;
2640
2641 let mut parser = Parser::new(source);
2642 let program = parser.parse().unwrap();
2643 assert_eq!(program.words.len(), 1);
2644
2645 let main = &program.words[0];
2646 assert_eq!(main.body.len(), 2);
2647
2648 match &main.body[0] {
2649 Statement::Symbol(name) => assert_eq!(name, "hello"),
2650 _ => panic!("Expected Symbol statement, got {:?}", main.body[0]),
2651 }
2652 }
2653
2654 #[test]
2655 fn test_parse_symbol_with_hyphen() {
2656 let source = r#"
2657: main ( -- )
2658 :hello-world drop
2659;
2660"#;
2661
2662 let mut parser = Parser::new(source);
2663 let program = parser.parse().unwrap();
2664
2665 match &program.words[0].body[0] {
2666 Statement::Symbol(name) => assert_eq!(name, "hello-world"),
2667 _ => panic!("Expected Symbol statement"),
2668 }
2669 }
2670
2671 #[test]
2672 fn test_parse_symbol_starting_with_digit_fails() {
2673 let source = r#"
2674: main ( -- )
2675 :123abc drop
2676;
2677"#;
2678
2679 let mut parser = Parser::new(source);
2680 let result = parser.parse();
2681 assert!(result.is_err());
2682 assert!(result.unwrap_err().contains("cannot start with a digit"));
2683 }
2684
2685 #[test]
2686 fn test_parse_symbol_with_invalid_char_fails() {
2687 let source = r#"
2688: main ( -- )
2689 :hello@world drop
2690;
2691"#;
2692
2693 let mut parser = Parser::new(source);
2694 let result = parser.parse();
2695 assert!(result.is_err());
2696 assert!(result.unwrap_err().contains("invalid character"));
2697 }
2698
2699 #[test]
2700 fn test_parse_symbol_special_chars_allowed() {
2701 let source = r#"
2703: main ( -- )
2704 :empty? drop
2705 :save! drop
2706;
2707"#;
2708
2709 let mut parser = Parser::new(source);
2710 let program = parser.parse().unwrap();
2711
2712 match &program.words[0].body[0] {
2713 Statement::Symbol(name) => assert_eq!(name, "empty?"),
2714 _ => panic!("Expected Symbol statement"),
2715 }
2716 match &program.words[0].body[2] {
2717 Statement::Symbol(name) => assert_eq!(name, "save!"),
2718 _ => panic!("Expected Symbol statement"),
2719 }
2720 }
2721}