seqc/
parser.rs

1//! Simple parser for Seq syntax
2//!
3//! Syntax:
4//! ```text
5//! : word-name ( stack-effect )
6//!   statement1
7//!   statement2
8//!   ... ;
9//! ```
10
11use crate::ast::{
12    Include, MatchArm, Pattern, Program, SourceLocation, Statement, UnionDef, UnionField,
13    UnionVariant, WordDef,
14};
15use crate::types::{Effect, SideEffect, StackType, Type};
16
17/// A token with source position information
18#[derive(Debug, Clone)]
19pub struct Token {
20    pub text: String,
21    /// Line number (0-indexed for LSP compatibility)
22    pub line: usize,
23    /// Column number (0-indexed)
24    pub column: usize,
25}
26
27impl Token {
28    fn new(text: String, line: usize, column: usize) -> Self {
29        Token { text, line, column }
30    }
31}
32
33impl PartialEq<&str> for Token {
34    fn eq(&self, other: &&str) -> bool {
35        self.text == *other
36    }
37}
38
39impl PartialEq<str> for Token {
40    fn eq(&self, other: &str) -> bool {
41        self.text == other
42    }
43}
44
45pub struct Parser {
46    tokens: Vec<Token>,
47    pos: usize,
48    /// Counter for assigning unique IDs to quotations
49    /// Used by the typechecker to track inferred types
50    next_quotation_id: usize,
51}
52
53impl Parser {
54    pub fn new(source: &str) -> Self {
55        let tokens = tokenize(source);
56        Parser {
57            tokens,
58            pos: 0,
59            next_quotation_id: 0,
60        }
61    }
62
63    pub fn parse(&mut self) -> Result<Program, String> {
64        let mut program = Program::new();
65
66        // Check for unclosed string error from tokenizer
67        if let Some(error_token) = self.tokens.iter().find(|t| *t == "<<<UNCLOSED_STRING>>>") {
68            return Err(format!(
69                "Unclosed string literal at line {}, column {} - missing closing quote",
70                error_token.line + 1, // 1-indexed for user display
71                error_token.column + 1
72            ));
73        }
74
75        while !self.is_at_end() {
76            self.skip_comments();
77            if self.is_at_end() {
78                break;
79            }
80
81            // Check for include statement
82            if self.check("include") {
83                let include = self.parse_include()?;
84                program.includes.push(include);
85                continue;
86            }
87
88            // Check for union definition
89            if self.check("union") {
90                let union_def = self.parse_union_def()?;
91                program.unions.push(union_def);
92                continue;
93            }
94
95            let word = self.parse_word_def()?;
96            program.words.push(word);
97        }
98
99        Ok(program)
100    }
101
102    /// Parse an include statement:
103    ///   include std:http     -> Include::Std("http")
104    ///   include ffi:readline -> Include::Ffi("readline")
105    ///   include "my-utils"   -> Include::Relative("my-utils")
106    fn parse_include(&mut self) -> Result<Include, String> {
107        self.consume("include");
108
109        let token = self
110            .advance()
111            .ok_or("Expected module name after 'include'")?
112            .clone();
113
114        // Check for std: prefix (tokenizer splits this into "std", ":", "name")
115        if token == "std" {
116            // Expect : token
117            if !self.consume(":") {
118                return Err("Expected ':' after 'std' in include statement".to_string());
119            }
120            // Get the module name
121            let name = self
122                .advance()
123                .ok_or("Expected module name after 'std:'")?
124                .clone();
125            return Ok(Include::Std(name));
126        }
127
128        // Check for ffi: prefix
129        if token == "ffi" {
130            // Expect : token
131            if !self.consume(":") {
132                return Err("Expected ':' after 'ffi' in include statement".to_string());
133            }
134            // Get the library name
135            let name = self
136                .advance()
137                .ok_or("Expected library name after 'ffi:'")?
138                .clone();
139            return Ok(Include::Ffi(name));
140        }
141
142        // Check for quoted string (relative path)
143        if token.starts_with('"') && token.ends_with('"') {
144            let path = token.trim_start_matches('"').trim_end_matches('"');
145            return Ok(Include::Relative(path.to_string()));
146        }
147
148        Err(format!(
149            "Invalid include syntax '{}'. Use 'include std:name', 'include ffi:lib', or 'include \"path\"'",
150            token
151        ))
152    }
153
154    /// Parse a union type definition:
155    ///   union Message {
156    ///     Get { response-chan: Int }
157    ///     Increment { response-chan: Int }
158    ///     Report { op: Int, delta: Int, total: Int }
159    ///   }
160    fn parse_union_def(&mut self) -> Result<UnionDef, String> {
161        // Capture start line from 'union' token
162        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
163
164        // Consume 'union' keyword
165        self.consume("union");
166
167        // Get union name (must start with uppercase)
168        let name = self
169            .advance()
170            .ok_or("Expected union name after 'union'")?
171            .clone();
172
173        if !name
174            .chars()
175            .next()
176            .map(|c| c.is_uppercase())
177            .unwrap_or(false)
178        {
179            return Err(format!(
180                "Union name '{}' must start with an uppercase letter",
181                name
182            ));
183        }
184
185        // Skip comments and newlines
186        self.skip_comments();
187
188        // Expect '{'
189        if !self.consume("{") {
190            return Err(format!(
191                "Expected '{{' after union name '{}', got '{}'",
192                name,
193                self.current()
194            ));
195        }
196
197        // Parse variants until '}'
198        let mut variants = Vec::new();
199        loop {
200            self.skip_comments();
201
202            if self.check("}") {
203                break;
204            }
205
206            if self.is_at_end() {
207                return Err(format!("Unexpected end of file in union '{}'", name));
208            }
209
210            variants.push(self.parse_union_variant()?);
211        }
212
213        // Capture end line from '}' token before consuming
214        let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
215
216        // Consume '}'
217        self.consume("}");
218
219        if variants.is_empty() {
220            return Err(format!("Union '{}' must have at least one variant", name));
221        }
222
223        // Check for duplicate variant names
224        let mut seen_variants = std::collections::HashSet::new();
225        for variant in &variants {
226            if !seen_variants.insert(&variant.name) {
227                return Err(format!(
228                    "Duplicate variant name '{}' in union '{}'",
229                    variant.name, name
230                ));
231            }
232        }
233
234        Ok(UnionDef {
235            name,
236            variants,
237            source: Some(SourceLocation::span(
238                std::path::PathBuf::new(),
239                start_line,
240                end_line,
241            )),
242        })
243    }
244
245    /// Parse a single union variant:
246    ///   Get { response-chan: Int }
247    ///   or just: Empty (no fields)
248    fn parse_union_variant(&mut self) -> Result<UnionVariant, String> {
249        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
250
251        // Get variant name (must start with uppercase)
252        let name = self.advance().ok_or("Expected variant name")?.clone();
253
254        if !name
255            .chars()
256            .next()
257            .map(|c| c.is_uppercase())
258            .unwrap_or(false)
259        {
260            return Err(format!(
261                "Variant name '{}' must start with an uppercase letter",
262                name
263            ));
264        }
265
266        self.skip_comments();
267
268        // Check for optional fields
269        let fields = if self.check("{") {
270            self.consume("{");
271            let fields = self.parse_union_fields()?;
272            if !self.consume("}") {
273                return Err(format!("Expected '}}' after variant '{}' fields", name));
274            }
275            fields
276        } else {
277            Vec::new()
278        };
279
280        Ok(UnionVariant {
281            name,
282            fields,
283            source: Some(SourceLocation::new(std::path::PathBuf::new(), start_line)),
284        })
285    }
286
287    /// Parse union fields: name: Type, name: Type, ...
288    fn parse_union_fields(&mut self) -> Result<Vec<UnionField>, String> {
289        let mut fields = Vec::new();
290
291        loop {
292            self.skip_comments();
293
294            if self.check("}") {
295                break;
296            }
297
298            // Get field name
299            let field_name = self.advance().ok_or("Expected field name")?.clone();
300
301            // Expect ':'
302            if !self.consume(":") {
303                return Err(format!(
304                    "Expected ':' after field name '{}', got '{}'",
305                    field_name,
306                    self.current()
307                ));
308            }
309
310            // Get type name
311            let type_name = self
312                .advance()
313                .ok_or("Expected type name after ':'")?
314                .clone();
315
316            fields.push(UnionField {
317                name: field_name,
318                type_name,
319            });
320
321            // Optional comma separator
322            self.skip_comments();
323            self.consume(",");
324        }
325
326        // Check for duplicate field names
327        let mut seen_fields = std::collections::HashSet::new();
328        for field in &fields {
329            if !seen_fields.insert(&field.name) {
330                return Err(format!("Duplicate field name '{}' in variant", field.name));
331            }
332        }
333
334        Ok(fields)
335    }
336
337    fn parse_word_def(&mut self) -> Result<WordDef, String> {
338        // Capture start line from ':' token
339        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
340
341        // Expect ':'
342        if !self.consume(":") {
343            return Err(format!(
344                "Expected ':' to start word definition, got '{}'",
345                self.current()
346            ));
347        }
348
349        // Get word name
350        let name = self
351            .advance()
352            .ok_or("Expected word name after ':'")?
353            .clone();
354
355        // Parse stack effect if present: ( ..a Int -- ..a Bool )
356        let effect = if self.check("(") {
357            Some(self.parse_stack_effect()?)
358        } else {
359            None
360        };
361
362        // Parse body until ';'
363        let mut body = Vec::new();
364        while !self.check(";") {
365            if self.is_at_end() {
366                return Err(format!("Unexpected end of file in word '{}'", name));
367            }
368
369            // Skip comments and newlines in body
370            self.skip_comments();
371            if self.check(";") {
372                break;
373            }
374
375            body.push(self.parse_statement()?);
376        }
377
378        // Capture end line from ';' token before consuming
379        let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
380
381        // Consume ';'
382        self.consume(";");
383
384        Ok(WordDef {
385            name,
386            effect,
387            body,
388            source: Some(crate::ast::SourceLocation::span(
389                std::path::PathBuf::new(),
390                start_line,
391                end_line,
392            )),
393        })
394    }
395
396    fn parse_statement(&mut self) -> Result<Statement, String> {
397        use crate::ast::Span;
398        let tok = self.advance_token().ok_or("Unexpected end of file")?;
399        let token = &tok.text;
400        let tok_line = tok.line;
401        let tok_column = tok.column;
402        let tok_len = tok.text.len();
403
404        // Check if it looks like a float literal (contains . or scientific notation)
405        // Must check this BEFORE integer parsing
406        if let Some(f) = is_float_literal(token)
407            .then(|| token.parse::<f64>().ok())
408            .flatten()
409        {
410            return Ok(Statement::FloatLiteral(f));
411        }
412
413        // Try to parse as hex literal (0x or 0X prefix)
414        if let Some(hex) = token
415            .strip_prefix("0x")
416            .or_else(|| token.strip_prefix("0X"))
417        {
418            return i64::from_str_radix(hex, 16)
419                .map(Statement::IntLiteral)
420                .map_err(|_| format!("Invalid hex literal: {}", token));
421        }
422
423        // Try to parse as binary literal (0b or 0B prefix)
424        if let Some(bin) = token
425            .strip_prefix("0b")
426            .or_else(|| token.strip_prefix("0B"))
427        {
428            return i64::from_str_radix(bin, 2)
429                .map(Statement::IntLiteral)
430                .map_err(|_| format!("Invalid binary literal: {}", token));
431        }
432
433        // Try to parse as decimal integer literal
434        if let Ok(n) = token.parse::<i64>() {
435            return Ok(Statement::IntLiteral(n));
436        }
437
438        // Try to parse as boolean literal
439        if token == "true" {
440            return Ok(Statement::BoolLiteral(true));
441        }
442        if token == "false" {
443            return Ok(Statement::BoolLiteral(false));
444        }
445
446        // Try to parse as symbol literal (:foo, :some-name)
447        if token == ":" {
448            // Get the next token as the symbol name
449            let name_tok = self
450                .advance_token()
451                .ok_or("Expected symbol name after ':', got end of input")?;
452            let name = &name_tok.text;
453            // Validate symbol name (identifier-like, kebab-case allowed)
454            if name.is_empty() {
455                return Err("Symbol name cannot be empty".to_string());
456            }
457            if name.starts_with(|c: char| c.is_ascii_digit()) {
458                return Err(format!(
459                    "Symbol name cannot start with a digit: ':{}'\n  Hint: Symbol names must start with a letter",
460                    name
461                ));
462            }
463            if let Some(bad_char) = name.chars().find(|c| {
464                !c.is_alphanumeric()
465                    && *c != '-'
466                    && *c != '_'
467                    && *c != '.'
468                    && *c != '?'
469                    && *c != '!'
470            }) {
471                return Err(format!(
472                    "Symbol name contains invalid character '{}': ':{}'\n  Hint: Allowed: letters, digits, - _ . ? !",
473                    bad_char, name
474                ));
475            }
476            return Ok(Statement::Symbol(name.clone()));
477        }
478
479        // Try to parse as string literal
480        if token.starts_with('"') {
481            // Validate token has at least opening and closing quotes
482            if token.len() < 2 || !token.ends_with('"') {
483                return Err(format!("Malformed string literal: {}", token));
484            }
485            // Strip exactly one quote from each end (not all quotes, which would
486            // incorrectly handle escaped quotes at string boundaries like "hello\"")
487            let raw = &token[1..token.len() - 1];
488            let unescaped = unescape_string(raw)?;
489            return Ok(Statement::StringLiteral(unescaped));
490        }
491
492        // Check for conditional
493        if token == "if" {
494            return self.parse_if();
495        }
496
497        // Check for quotation
498        if token == "[" {
499            return self.parse_quotation(tok_line, tok_column);
500        }
501
502        // Check for match expression
503        if token == "match" {
504            return self.parse_match();
505        }
506
507        // Otherwise it's a word call - preserve source span for precise diagnostics
508        Ok(Statement::WordCall {
509            name: token.to_string(),
510            span: Some(Span::new(tok_line, tok_column, tok_len)),
511        })
512    }
513
514    fn parse_if(&mut self) -> Result<Statement, String> {
515        let mut then_branch = Vec::new();
516
517        // Parse then branch until 'else' or 'then'
518        loop {
519            if self.is_at_end() {
520                return Err("Unexpected end of file in 'if' statement".to_string());
521            }
522
523            // Skip comments and newlines
524            self.skip_comments();
525
526            if self.check("else") {
527                self.advance();
528                // Parse else branch
529                break;
530            }
531
532            if self.check("then") {
533                self.advance();
534                // End of if without else
535                return Ok(Statement::If {
536                    then_branch,
537                    else_branch: None,
538                });
539            }
540
541            then_branch.push(self.parse_statement()?);
542        }
543
544        // Parse else branch until 'then'
545        let mut else_branch = Vec::new();
546        loop {
547            if self.is_at_end() {
548                return Err("Unexpected end of file in 'else' branch".to_string());
549            }
550
551            // Skip comments and newlines
552            self.skip_comments();
553
554            if self.check("then") {
555                self.advance();
556                return Ok(Statement::If {
557                    then_branch,
558                    else_branch: Some(else_branch),
559                });
560            }
561
562            else_branch.push(self.parse_statement()?);
563        }
564    }
565
566    fn parse_quotation(
567        &mut self,
568        start_line: usize,
569        start_column: usize,
570    ) -> Result<Statement, String> {
571        use crate::ast::QuotationSpan;
572        let mut body = Vec::new();
573
574        // Parse statements until ']'
575        loop {
576            if self.is_at_end() {
577                return Err("Unexpected end of file in quotation".to_string());
578            }
579
580            // Skip comments and newlines
581            self.skip_comments();
582
583            if self.check("]") {
584                let end_tok = self.advance_token().unwrap();
585                let end_line = end_tok.line;
586                let end_column = end_tok.column + 1; // exclusive
587                let id = self.next_quotation_id;
588                self.next_quotation_id += 1;
589                // Span from '[' to ']' inclusive
590                let span = QuotationSpan::new(start_line, start_column, end_line, end_column);
591                return Ok(Statement::Quotation {
592                    id,
593                    body,
594                    span: Some(span),
595                });
596            }
597
598            body.push(self.parse_statement()?);
599        }
600    }
601
602    /// Parse a match expression:
603    ///   match
604    ///     Get -> send-response
605    ///     Increment -> do-increment send-response
606    ///     Report -> aggregate-add
607    ///   end
608    fn parse_match(&mut self) -> Result<Statement, String> {
609        let mut arms = Vec::new();
610
611        loop {
612            self.skip_comments();
613
614            // Check for 'end' to terminate match
615            if self.check("end") {
616                self.advance();
617                break;
618            }
619
620            if self.is_at_end() {
621                return Err("Unexpected end of file in match expression".to_string());
622            }
623
624            arms.push(self.parse_match_arm()?);
625        }
626
627        if arms.is_empty() {
628            return Err("Match expression must have at least one arm".to_string());
629        }
630
631        Ok(Statement::Match { arms })
632    }
633
634    /// Parse a single match arm:
635    ///   Get -> send-response
636    ///   or with bindings:
637    ///   Get { chan } -> chan send-response
638    fn parse_match_arm(&mut self) -> Result<MatchArm, String> {
639        // Get variant name
640        let variant_name = self
641            .advance()
642            .ok_or("Expected variant name in match arm")?
643            .clone();
644
645        self.skip_comments();
646
647        // Check for optional bindings: { field1 field2 }
648        let pattern = if self.check("{") {
649            self.consume("{");
650            let mut bindings = Vec::new();
651
652            loop {
653                self.skip_comments();
654
655                if self.check("}") {
656                    break;
657                }
658
659                if self.is_at_end() {
660                    return Err(format!(
661                        "Unexpected end of file in match arm bindings for '{}'",
662                        variant_name
663                    ));
664                }
665
666                let token = self.advance().ok_or("Expected binding name")?.clone();
667
668                // Require > prefix to make clear these are stack extractions, not variables
669                if let Some(field_name) = token.strip_prefix('>') {
670                    if field_name.is_empty() {
671                        return Err(format!(
672                            "Expected field name after '>' in match bindings for '{}'",
673                            variant_name
674                        ));
675                    }
676                    bindings.push(field_name.to_string());
677                } else {
678                    return Err(format!(
679                        "Match bindings must use '>' prefix to indicate stack extraction. \
680                         Use '>{}' instead of '{}' in pattern for '{}'",
681                        token, token, variant_name
682                    ));
683                }
684            }
685
686            self.consume("}");
687            Pattern::VariantWithBindings {
688                name: variant_name,
689                bindings,
690            }
691        } else {
692            Pattern::Variant(variant_name.clone())
693        };
694
695        self.skip_comments();
696
697        // Expect '->' arrow
698        if !self.consume("->") {
699            return Err(format!(
700                "Expected '->' after pattern '{}', got '{}'",
701                match &pattern {
702                    Pattern::Variant(n) => n.clone(),
703                    Pattern::VariantWithBindings { name, .. } => name.clone(),
704                },
705                self.current()
706            ));
707        }
708
709        // Parse body until next pattern or 'end'
710        let mut body = Vec::new();
711        loop {
712            self.skip_comments();
713
714            // Check for end of arm (next pattern starts with uppercase, or 'end')
715            if self.check("end") {
716                break;
717            }
718
719            // Check if next token looks like a match pattern (not just any uppercase word).
720            // A pattern is: UppercaseName followed by '->' or '{'
721            // This prevents confusing 'Make-Get' (constructor call) with a pattern.
722            if let Some(token) = self.current_token()
723                && let Some(first_char) = token.text.chars().next()
724                && first_char.is_uppercase()
725            {
726                // Peek at next token to see if this is a pattern (followed by -> or {)
727                if let Some(next) = self.peek_at(1)
728                    && (next == "->" || next == "{")
729                {
730                    // This is the next pattern
731                    break;
732                }
733                // Otherwise it's just an uppercase word call (like Make-Get), continue parsing body
734            }
735
736            if self.is_at_end() {
737                return Err("Unexpected end of file in match arm body".to_string());
738            }
739
740            body.push(self.parse_statement()?);
741        }
742
743        Ok(MatchArm { pattern, body })
744    }
745
746    /// Parse a stack effect declaration: ( ..a Int -- ..a Bool )
747    /// With optional computational effects: ( ..a Int -- ..a Bool | Yield Int )
748    fn parse_stack_effect(&mut self) -> Result<Effect, String> {
749        // Consume '('
750        if !self.consume("(") {
751            return Err("Expected '(' to start stack effect".to_string());
752        }
753
754        // Parse input stack types (until '--' or ')')
755        let (input_row_var, input_types) =
756            self.parse_type_list_until(&["--", ")"], "stack effect inputs", 0)?;
757
758        // Consume '--'
759        if !self.consume("--") {
760            return Err("Expected '--' separator in stack effect".to_string());
761        }
762
763        // Parse output stack types (until ')' or '|')
764        let (output_row_var, output_types) =
765            self.parse_type_list_until(&[")", "|"], "stack effect outputs", 0)?;
766
767        // Parse optional computational effects after '|'
768        let effects = if self.consume("|") {
769            self.parse_effect_annotations()?
770        } else {
771            Vec::new()
772        };
773
774        // Consume ')'
775        if !self.consume(")") {
776            return Err("Expected ')' to end stack effect".to_string());
777        }
778
779        // Build input and output StackTypes
780        let inputs = self.build_stack_type(input_row_var, input_types);
781        let outputs = self.build_stack_type(output_row_var, output_types);
782
783        Ok(Effect::with_effects(inputs, outputs, effects))
784    }
785
786    /// Parse computational effect annotations after '|'
787    /// Example: | Yield Int
788    fn parse_effect_annotations(&mut self) -> Result<Vec<SideEffect>, String> {
789        let mut effects = Vec::new();
790
791        // Parse effects until we hit ')'
792        while let Some(token) = self.peek_at(0) {
793            if token == ")" {
794                break;
795            }
796
797            match token {
798                "Yield" => {
799                    self.advance(); // consume "Yield"
800                    // Parse the yield type
801                    if let Some(type_token) = self.current_token() {
802                        if type_token.text == ")" {
803                            return Err("Expected type after 'Yield'".to_string());
804                        }
805                        let type_token = type_token.clone();
806                        self.advance();
807                        let yield_type = self.parse_type(&type_token)?;
808                        effects.push(SideEffect::Yield(Box::new(yield_type)));
809                    } else {
810                        return Err("Expected type after 'Yield'".to_string());
811                    }
812                }
813                _ => {
814                    return Err(format!("Unknown effect '{}'. Expected 'Yield'", token));
815                }
816            }
817        }
818
819        if effects.is_empty() {
820            return Err("Expected at least one effect after '|'".to_string());
821        }
822
823        Ok(effects)
824    }
825
826    /// Parse a single type token into a Type
827    fn parse_type(&self, token: &Token) -> Result<Type, String> {
828        match token.text.as_str() {
829            "Int" => Ok(Type::Int),
830            "Float" => Ok(Type::Float),
831            "Bool" => Ok(Type::Bool),
832            "String" => Ok(Type::String),
833            // Reject 'Quotation' - it looks like a type but would be silently treated as a type variable.
834            // Users must use explicit effect syntax like [Int -- Int] instead.
835            "Quotation" => Err(format!(
836                "'Quotation' is not a valid type at line {}, column {}. Use explicit quotation syntax like [Int -- Int] or [ -- ] instead.",
837                token.line + 1,
838                token.column + 1
839            )),
840            _ => {
841                // Check if it's a type variable (starts with uppercase)
842                if let Some(first_char) = token.text.chars().next() {
843                    if first_char.is_uppercase() {
844                        Ok(Type::Var(token.text.to_string()))
845                    } else {
846                        Err(format!(
847                            "Unknown type: '{}' at line {}, column {}. Expected Int, Bool, String, Closure, or a type variable (uppercase)",
848                            token.text.escape_default(),
849                            token.line + 1, // 1-indexed for user display
850                            token.column + 1
851                        ))
852                    }
853                } else {
854                    Err(format!(
855                        "Invalid type: '{}' at line {}, column {}",
856                        token.text.escape_default(),
857                        token.line + 1,
858                        token.column + 1
859                    ))
860                }
861            }
862        }
863    }
864
865    /// Validate row variable name
866    /// Row variables must start with a lowercase letter and contain only alphanumeric characters
867    fn validate_row_var_name(&self, name: &str) -> Result<(), String> {
868        if name.is_empty() {
869            return Err("Row variable must have a name after '..'".to_string());
870        }
871
872        // Must start with lowercase letter
873        let first_char = name.chars().next().unwrap();
874        if !first_char.is_ascii_lowercase() {
875            return Err(format!(
876                "Row variable '..{}' must start with a lowercase letter (a-z)",
877                name
878            ));
879        }
880
881        // Rest must be alphanumeric or underscore
882        for ch in name.chars() {
883            if !ch.is_alphanumeric() && ch != '_' {
884                return Err(format!(
885                    "Row variable '..{}' can only contain letters, numbers, and underscores",
886                    name
887                ));
888            }
889        }
890
891        // Check for reserved keywords (type names that might confuse users)
892        match name {
893            "Int" | "Bool" | "String" => {
894                return Err(format!(
895                    "Row variable '..{}' cannot use type name as identifier",
896                    name
897                ));
898            }
899            _ => {}
900        }
901
902        Ok(())
903    }
904
905    /// Parse a list of types until one of the given terminators is reached
906    /// Returns (optional row variable, list of types)
907    /// Used by both parse_stack_effect and parse_quotation_type
908    ///
909    /// depth: Current nesting depth for quotation types (0 at top level)
910    fn parse_type_list_until(
911        &mut self,
912        terminators: &[&str],
913        context: &str,
914        depth: usize,
915    ) -> Result<(Option<String>, Vec<Type>), String> {
916        const MAX_QUOTATION_DEPTH: usize = 32;
917
918        if depth > MAX_QUOTATION_DEPTH {
919            return Err(format!(
920                "Quotation type nesting exceeds maximum depth of {} (possible deeply nested types or DOS attack)",
921                MAX_QUOTATION_DEPTH
922            ));
923        }
924
925        let mut types = Vec::new();
926        let mut row_var = None;
927
928        while !terminators.iter().any(|t| self.check(t)) {
929            // Skip comments and blank lines within type lists
930            self.skip_comments();
931
932            // Re-check terminators after skipping comments
933            if terminators.iter().any(|t| self.check(t)) {
934                break;
935            }
936
937            if self.is_at_end() {
938                return Err(format!(
939                    "Unexpected end while parsing {} - expected one of: {}",
940                    context,
941                    terminators.join(", ")
942                ));
943            }
944
945            let token = self
946                .advance_token()
947                .ok_or_else(|| format!("Unexpected end in {}", context))?
948                .clone();
949
950            // Check for row variable: ..name
951            if token.text.starts_with("..") {
952                let var_name = token.text.trim_start_matches("..").to_string();
953                self.validate_row_var_name(&var_name)?;
954                row_var = Some(var_name);
955            } else if token.text == "Closure" {
956                // Closure type: Closure[effect]
957                if !self.consume("[") {
958                    return Err("Expected '[' after 'Closure' in type signature".to_string());
959                }
960                let effect_type = self.parse_quotation_type(depth)?;
961                match effect_type {
962                    Type::Quotation(effect) => {
963                        types.push(Type::Closure {
964                            effect,
965                            captures: Vec::new(), // Filled in by type checker
966                        });
967                    }
968                    _ => unreachable!("parse_quotation_type should return Quotation"),
969                }
970            } else if token.text == "[" {
971                // Nested quotation type
972                types.push(self.parse_quotation_type(depth)?);
973            } else {
974                // Parse as concrete type
975                types.push(self.parse_type(&token)?);
976            }
977        }
978
979        Ok((row_var, types))
980    }
981
982    /// Parse a quotation type: [inputs -- outputs]
983    /// Note: The opening '[' has already been consumed
984    ///
985    /// depth: Current nesting depth (incremented for each nested quotation)
986    fn parse_quotation_type(&mut self, depth: usize) -> Result<Type, String> {
987        // Parse input stack types (until '--' or ']')
988        let (input_row_var, input_types) =
989            self.parse_type_list_until(&["--", "]"], "quotation type inputs", depth + 1)?;
990
991        // Require '--' separator for clarity
992        if !self.consume("--") {
993            // Check if user closed with ] without separator
994            if self.check("]") {
995                return Err(
996                    "Quotation types require '--' separator. Did you mean '[Int -- ]' or '[ -- Int]'?"
997                        .to_string(),
998                );
999            }
1000            return Err("Expected '--' separator in quotation type".to_string());
1001        }
1002
1003        // Parse output stack types (until ']')
1004        let (output_row_var, output_types) =
1005            self.parse_type_list_until(&["]"], "quotation type outputs", depth + 1)?;
1006
1007        // Consume ']'
1008        if !self.consume("]") {
1009            return Err("Expected ']' to end quotation type".to_string());
1010        }
1011
1012        // Build input and output StackTypes
1013        let inputs = self.build_stack_type(input_row_var, input_types);
1014        let outputs = self.build_stack_type(output_row_var, output_types);
1015
1016        Ok(Type::Quotation(Box::new(Effect::new(inputs, outputs))))
1017    }
1018
1019    /// Build a StackType from an optional row variable and a list of types
1020    /// Example: row_var="a", types=[Int, Bool] => RowVar("a") with Int on top of Bool
1021    ///
1022    /// IMPORTANT: ALL stack effects are implicitly row-polymorphic in concatenative languages.
1023    /// This means:
1024    ///   ( -- )        becomes  ( ..rest -- ..rest )       - no-op, preserves stack
1025    ///   ( -- Int )    becomes  ( ..rest -- ..rest Int )   - pushes Int
1026    ///   ( Int -- )    becomes  ( ..rest Int -- ..rest )   - consumes Int
1027    ///   ( Int -- Int) becomes  ( ..rest Int -- ..rest Int ) - transforms top
1028    fn build_stack_type(&self, row_var: Option<String>, types: Vec<Type>) -> StackType {
1029        // Always use row polymorphism - this is fundamental to concatenative semantics
1030        let base = match row_var {
1031            Some(name) => StackType::RowVar(name),
1032            None => StackType::RowVar("rest".to_string()),
1033        };
1034
1035        // Push types onto the stack (bottom to top order)
1036        types.into_iter().fold(base, |stack, ty| stack.push(ty))
1037    }
1038
1039    fn skip_comments(&mut self) {
1040        loop {
1041            if self.check("#") {
1042                // Skip until newline
1043                while !self.is_at_end() && self.current() != "\n" {
1044                    self.advance();
1045                }
1046                if !self.is_at_end() {
1047                    self.advance(); // skip newline
1048                }
1049            } else if self.check("\n") {
1050                // Skip blank lines
1051                self.advance();
1052            } else {
1053                break;
1054            }
1055        }
1056    }
1057
1058    fn check(&self, expected: &str) -> bool {
1059        if self.is_at_end() {
1060            return false;
1061        }
1062        self.current() == expected
1063    }
1064
1065    fn consume(&mut self, expected: &str) -> bool {
1066        if self.check(expected) {
1067            self.advance();
1068            true
1069        } else {
1070            false
1071        }
1072    }
1073
1074    /// Get the text of the current token
1075    fn current(&self) -> &str {
1076        if self.is_at_end() {
1077            ""
1078        } else {
1079            &self.tokens[self.pos].text
1080        }
1081    }
1082
1083    /// Get the full current token with position info
1084    fn current_token(&self) -> Option<&Token> {
1085        if self.is_at_end() {
1086            None
1087        } else {
1088            Some(&self.tokens[self.pos])
1089        }
1090    }
1091
1092    /// Peek at a token N positions ahead without consuming
1093    fn peek_at(&self, n: usize) -> Option<&str> {
1094        let idx = self.pos + n;
1095        if idx < self.tokens.len() {
1096            Some(&self.tokens[idx].text)
1097        } else {
1098            None
1099        }
1100    }
1101
1102    /// Advance and return the token text (for compatibility with existing code)
1103    fn advance(&mut self) -> Option<&String> {
1104        if self.is_at_end() {
1105            None
1106        } else {
1107            let token = &self.tokens[self.pos];
1108            self.pos += 1;
1109            Some(&token.text)
1110        }
1111    }
1112
1113    /// Advance and return the full token with position info
1114    fn advance_token(&mut self) -> Option<&Token> {
1115        if self.is_at_end() {
1116            None
1117        } else {
1118            let token = &self.tokens[self.pos];
1119            self.pos += 1;
1120            Some(token)
1121        }
1122    }
1123
1124    fn is_at_end(&self) -> bool {
1125        self.pos >= self.tokens.len()
1126    }
1127}
1128
1129/// Check if a token looks like a float literal
1130///
1131/// Float literals contain either:
1132/// - A decimal point: `3.14`, `.5`, `5.`
1133/// - Scientific notation: `1e10`, `1E-5`, `1.5e3`
1134///
1135/// This check must happen BEFORE integer parsing to avoid
1136/// parsing "5" in "5.0" as an integer.
1137fn is_float_literal(token: &str) -> bool {
1138    // Skip leading minus sign for negative numbers
1139    let s = token.strip_prefix('-').unwrap_or(token);
1140
1141    // Must have at least one digit
1142    if s.is_empty() {
1143        return false;
1144    }
1145
1146    // Check for decimal point or scientific notation
1147    s.contains('.') || s.contains('e') || s.contains('E')
1148}
1149
1150/// Process escape sequences in a string literal
1151///
1152/// Supported escape sequences:
1153/// - `\"` -> `"`  (quote)
1154/// - `\\` -> `\`  (backslash)
1155/// - `\n` -> newline
1156/// - `\r` -> carriage return
1157/// - `\t` -> tab
1158/// - `\xNN` -> Unicode code point U+00NN (hex value 00-FF)
1159///
1160/// # Note on `\xNN` encoding
1161///
1162/// The `\xNN` escape creates a Unicode code point U+00NN, not a raw byte.
1163/// For values 0x00-0x7F (ASCII), this maps directly to the byte value.
1164/// For values 0x80-0xFF (Latin-1 Supplement), the character is stored as
1165/// a multi-byte UTF-8 sequence. For example:
1166/// - `\x41` -> 'A' (1 byte in UTF-8)
1167/// - `\x1b` -> ESC (1 byte in UTF-8, used for ANSI terminal codes)
1168/// - `\xFF` -> 'ÿ' (U+00FF, 2 bytes in UTF-8: 0xC3 0xBF)
1169///
1170/// This matches Python 3 and Rust string behavior. For terminal ANSI codes,
1171/// which are the primary use case, all values are in the ASCII range.
1172///
1173/// # Errors
1174/// Returns error if an unknown escape sequence is encountered
1175fn unescape_string(s: &str) -> Result<String, String> {
1176    let mut result = String::new();
1177    let mut chars = s.chars();
1178
1179    while let Some(ch) = chars.next() {
1180        if ch == '\\' {
1181            match chars.next() {
1182                Some('"') => result.push('"'),
1183                Some('\\') => result.push('\\'),
1184                Some('n') => result.push('\n'),
1185                Some('r') => result.push('\r'),
1186                Some('t') => result.push('\t'),
1187                Some('x') => {
1188                    // Hex escape: \xNN
1189                    let hex1 = chars.next().ok_or_else(|| {
1190                        "Incomplete hex escape sequence '\\x' - expected 2 hex digits".to_string()
1191                    })?;
1192                    let hex2 = chars.next().ok_or_else(|| {
1193                        format!(
1194                            "Incomplete hex escape sequence '\\x{}' - expected 2 hex digits",
1195                            hex1
1196                        )
1197                    })?;
1198
1199                    let hex_str: String = [hex1, hex2].iter().collect();
1200                    let byte_val = u8::from_str_radix(&hex_str, 16).map_err(|_| {
1201                        format!(
1202                            "Invalid hex escape sequence '\\x{}' - expected 2 hex digits (00-FF)",
1203                            hex_str
1204                        )
1205                    })?;
1206
1207                    result.push(byte_val as char);
1208                }
1209                Some(c) => {
1210                    return Err(format!(
1211                        "Unknown escape sequence '\\{}' in string literal. \
1212                         Supported: \\\" \\\\ \\n \\r \\t \\xNN",
1213                        c
1214                    ));
1215                }
1216                None => {
1217                    return Err("String ends with incomplete escape sequence '\\'".to_string());
1218                }
1219            }
1220        } else {
1221            result.push(ch);
1222        }
1223    }
1224
1225    Ok(result)
1226}
1227
1228fn tokenize(source: &str) -> Vec<Token> {
1229    let mut tokens = Vec::new();
1230    let mut current = String::new();
1231    let mut current_start_line = 0;
1232    let mut current_start_col = 0;
1233    let mut in_string = false;
1234    let mut prev_was_backslash = false;
1235
1236    // Track current position (0-indexed)
1237    let mut line = 0;
1238    let mut col = 0;
1239
1240    for ch in source.chars() {
1241        if in_string {
1242            current.push(ch);
1243            if ch == '"' && !prev_was_backslash {
1244                // Unescaped quote ends the string
1245                in_string = false;
1246                tokens.push(Token::new(
1247                    current.clone(),
1248                    current_start_line,
1249                    current_start_col,
1250                ));
1251                current.clear();
1252                prev_was_backslash = false;
1253            } else if ch == '\\' && !prev_was_backslash {
1254                // Start of escape sequence
1255                prev_was_backslash = true;
1256            } else {
1257                // Regular character or escaped character
1258                prev_was_backslash = false;
1259            }
1260            // Track newlines inside strings
1261            if ch == '\n' {
1262                line += 1;
1263                col = 0;
1264            } else {
1265                col += 1;
1266            }
1267        } else if ch == '"' {
1268            if !current.is_empty() {
1269                tokens.push(Token::new(
1270                    current.clone(),
1271                    current_start_line,
1272                    current_start_col,
1273                ));
1274                current.clear();
1275            }
1276            in_string = true;
1277            current_start_line = line;
1278            current_start_col = col;
1279            current.push(ch);
1280            prev_was_backslash = false;
1281            col += 1;
1282        } else if ch.is_whitespace() {
1283            if !current.is_empty() {
1284                tokens.push(Token::new(
1285                    current.clone(),
1286                    current_start_line,
1287                    current_start_col,
1288                ));
1289                current.clear();
1290            }
1291            // Preserve newlines for comment handling
1292            if ch == '\n' {
1293                tokens.push(Token::new("\n".to_string(), line, col));
1294                line += 1;
1295                col = 0;
1296            } else {
1297                col += 1;
1298            }
1299        } else if "():;[]{},".contains(ch) {
1300            if !current.is_empty() {
1301                tokens.push(Token::new(
1302                    current.clone(),
1303                    current_start_line,
1304                    current_start_col,
1305                ));
1306                current.clear();
1307            }
1308            tokens.push(Token::new(ch.to_string(), line, col));
1309            col += 1;
1310        } else {
1311            if current.is_empty() {
1312                current_start_line = line;
1313                current_start_col = col;
1314            }
1315            current.push(ch);
1316            col += 1;
1317        }
1318    }
1319
1320    // Check for unclosed string literal
1321    if in_string {
1322        // Return error by adding a special error token
1323        // The parser will handle this as a parse error
1324        tokens.push(Token::new(
1325            "<<<UNCLOSED_STRING>>>".to_string(),
1326            current_start_line,
1327            current_start_col,
1328        ));
1329    } else if !current.is_empty() {
1330        tokens.push(Token::new(current, current_start_line, current_start_col));
1331    }
1332
1333    tokens
1334}
1335
1336#[cfg(test)]
1337mod tests {
1338    use super::*;
1339
1340    #[test]
1341    fn test_parse_hello_world() {
1342        let source = r#"
1343: main ( -- )
1344  "Hello, World!" write_line ;
1345"#;
1346
1347        let mut parser = Parser::new(source);
1348        let program = parser.parse().unwrap();
1349
1350        assert_eq!(program.words.len(), 1);
1351        assert_eq!(program.words[0].name, "main");
1352        assert_eq!(program.words[0].body.len(), 2);
1353
1354        match &program.words[0].body[0] {
1355            Statement::StringLiteral(s) => assert_eq!(s, "Hello, World!"),
1356            _ => panic!("Expected StringLiteral"),
1357        }
1358
1359        match &program.words[0].body[1] {
1360            Statement::WordCall { name, .. } => assert_eq!(name, "write_line"),
1361            _ => panic!("Expected WordCall"),
1362        }
1363    }
1364
1365    #[test]
1366    fn test_parse_with_numbers() {
1367        let source = ": add-example ( -- ) 2 3 add ;";
1368
1369        let mut parser = Parser::new(source);
1370        let program = parser.parse().unwrap();
1371
1372        assert_eq!(program.words[0].body.len(), 3);
1373        assert_eq!(program.words[0].body[0], Statement::IntLiteral(2));
1374        assert_eq!(program.words[0].body[1], Statement::IntLiteral(3));
1375        assert!(matches!(
1376            &program.words[0].body[2],
1377            Statement::WordCall { name, .. } if name == "add"
1378        ));
1379    }
1380
1381    #[test]
1382    fn test_parse_hex_literals() {
1383        let source = ": test ( -- ) 0xFF 0x10 0X1A ;";
1384        let mut parser = Parser::new(source);
1385        let program = parser.parse().unwrap();
1386
1387        assert_eq!(program.words[0].body[0], Statement::IntLiteral(255));
1388        assert_eq!(program.words[0].body[1], Statement::IntLiteral(16));
1389        assert_eq!(program.words[0].body[2], Statement::IntLiteral(26));
1390    }
1391
1392    #[test]
1393    fn test_parse_binary_literals() {
1394        let source = ": test ( -- ) 0b1010 0B1111 0b0 ;";
1395        let mut parser = Parser::new(source);
1396        let program = parser.parse().unwrap();
1397
1398        assert_eq!(program.words[0].body[0], Statement::IntLiteral(10));
1399        assert_eq!(program.words[0].body[1], Statement::IntLiteral(15));
1400        assert_eq!(program.words[0].body[2], Statement::IntLiteral(0));
1401    }
1402
1403    #[test]
1404    fn test_parse_invalid_hex_literal() {
1405        let source = ": test ( -- ) 0xGG ;";
1406        let mut parser = Parser::new(source);
1407        let err = parser.parse().unwrap_err();
1408        assert!(err.contains("Invalid hex literal"));
1409    }
1410
1411    #[test]
1412    fn test_parse_invalid_binary_literal() {
1413        let source = ": test ( -- ) 0b123 ;";
1414        let mut parser = Parser::new(source);
1415        let err = parser.parse().unwrap_err();
1416        assert!(err.contains("Invalid binary literal"));
1417    }
1418
1419    #[test]
1420    fn test_parse_escaped_quotes() {
1421        let source = r#": main ( -- ) "Say \"hello\" there" write_line ;"#;
1422
1423        let mut parser = Parser::new(source);
1424        let program = parser.parse().unwrap();
1425
1426        assert_eq!(program.words.len(), 1);
1427        assert_eq!(program.words[0].body.len(), 2);
1428
1429        match &program.words[0].body[0] {
1430            // Escape sequences should be processed: \" becomes actual quote
1431            Statement::StringLiteral(s) => assert_eq!(s, "Say \"hello\" there"),
1432            _ => panic!("Expected StringLiteral with escaped quotes"),
1433        }
1434    }
1435
1436    /// Regression test for issue #117: escaped quote at end of string
1437    /// Previously failed with "String ends with incomplete escape sequence"
1438    #[test]
1439    fn test_escaped_quote_at_end_of_string() {
1440        let source = r#": main ( -- ) "hello\"" io.write-line ;"#;
1441
1442        let mut parser = Parser::new(source);
1443        let program = parser.parse().unwrap();
1444
1445        assert_eq!(program.words.len(), 1);
1446        match &program.words[0].body[0] {
1447            Statement::StringLiteral(s) => assert_eq!(s, "hello\""),
1448            _ => panic!("Expected StringLiteral ending with escaped quote"),
1449        }
1450    }
1451
1452    /// Test escaped quote at start of string (boundary case)
1453    #[test]
1454    fn test_escaped_quote_at_start_of_string() {
1455        let source = r#": main ( -- ) "\"hello" io.write-line ;"#;
1456
1457        let mut parser = Parser::new(source);
1458        let program = parser.parse().unwrap();
1459
1460        match &program.words[0].body[0] {
1461            Statement::StringLiteral(s) => assert_eq!(s, "\"hello"),
1462            _ => panic!("Expected StringLiteral starting with escaped quote"),
1463        }
1464    }
1465
1466    #[test]
1467    fn test_escape_sequences() {
1468        let source = r#": main ( -- ) "Line 1\nLine 2\tTabbed" write_line ;"#;
1469
1470        let mut parser = Parser::new(source);
1471        let program = parser.parse().unwrap();
1472
1473        match &program.words[0].body[0] {
1474            Statement::StringLiteral(s) => assert_eq!(s, "Line 1\nLine 2\tTabbed"),
1475            _ => panic!("Expected StringLiteral"),
1476        }
1477    }
1478
1479    #[test]
1480    fn test_unknown_escape_sequence() {
1481        let source = r#": main ( -- ) "Bad \q sequence" write_line ;"#;
1482
1483        let mut parser = Parser::new(source);
1484        let result = parser.parse();
1485
1486        assert!(result.is_err());
1487        assert!(result.unwrap_err().contains("Unknown escape sequence"));
1488    }
1489
1490    #[test]
1491    fn test_hex_escape_sequence() {
1492        // \x1b is ESC (27), \x41 is 'A' (65)
1493        let source = r#": main ( -- ) "\x1b[2K\x41" io.write-line ;"#;
1494
1495        let mut parser = Parser::new(source);
1496        let program = parser.parse().unwrap();
1497
1498        match &program.words[0].body[0] {
1499            Statement::StringLiteral(s) => {
1500                assert_eq!(s.len(), 5); // ESC [ 2 K A
1501                assert_eq!(s.as_bytes()[0], 0x1b); // ESC
1502                assert_eq!(s.as_bytes()[4], 0x41); // 'A'
1503            }
1504            _ => panic!("Expected StringLiteral"),
1505        }
1506    }
1507
1508    #[test]
1509    fn test_hex_escape_null_byte() {
1510        let source = r#": main ( -- ) "before\x00after" io.write-line ;"#;
1511
1512        let mut parser = Parser::new(source);
1513        let program = parser.parse().unwrap();
1514
1515        match &program.words[0].body[0] {
1516            Statement::StringLiteral(s) => {
1517                assert_eq!(s.len(), 12); // "before" + NUL + "after"
1518                assert_eq!(s.as_bytes()[6], 0x00);
1519            }
1520            _ => panic!("Expected StringLiteral"),
1521        }
1522    }
1523
1524    #[test]
1525    fn test_hex_escape_uppercase() {
1526        // Both uppercase and lowercase hex digits should work
1527        // Note: Values > 0x7F become Unicode code points (U+00NN), multi-byte in UTF-8
1528        let source = r#": main ( -- ) "\x41\x42\x4F" io.write-line ;"#;
1529
1530        let mut parser = Parser::new(source);
1531        let program = parser.parse().unwrap();
1532
1533        match &program.words[0].body[0] {
1534            Statement::StringLiteral(s) => {
1535                assert_eq!(s, "ABO"); // 0x41='A', 0x42='B', 0x4F='O'
1536            }
1537            _ => panic!("Expected StringLiteral"),
1538        }
1539    }
1540
1541    #[test]
1542    fn test_hex_escape_high_bytes() {
1543        // Values > 0x7F become Unicode code points (Latin-1), which are multi-byte in UTF-8
1544        let source = r#": main ( -- ) "\xFF" io.write-line ;"#;
1545
1546        let mut parser = Parser::new(source);
1547        let program = parser.parse().unwrap();
1548
1549        match &program.words[0].body[0] {
1550            Statement::StringLiteral(s) => {
1551                // \xFF becomes U+00FF (ÿ), which is 2 bytes in UTF-8: C3 BF
1552                assert_eq!(s, "\u{00FF}");
1553                assert_eq!(s.chars().next().unwrap(), 'ÿ');
1554            }
1555            _ => panic!("Expected StringLiteral"),
1556        }
1557    }
1558
1559    #[test]
1560    fn test_hex_escape_incomplete() {
1561        // \x with only one hex digit
1562        let source = r#": main ( -- ) "\x1" io.write-line ;"#;
1563
1564        let mut parser = Parser::new(source);
1565        let result = parser.parse();
1566
1567        assert!(result.is_err());
1568        assert!(result.unwrap_err().contains("Incomplete hex escape"));
1569    }
1570
1571    #[test]
1572    fn test_hex_escape_invalid_digits() {
1573        // \xGG is not valid hex
1574        let source = r#": main ( -- ) "\xGG" io.write-line ;"#;
1575
1576        let mut parser = Parser::new(source);
1577        let result = parser.parse();
1578
1579        assert!(result.is_err());
1580        assert!(result.unwrap_err().contains("Invalid hex escape"));
1581    }
1582
1583    #[test]
1584    fn test_hex_escape_at_end_of_string() {
1585        // \x at end of string with no digits
1586        let source = r#": main ( -- ) "test\x" io.write-line ;"#;
1587
1588        let mut parser = Parser::new(source);
1589        let result = parser.parse();
1590
1591        assert!(result.is_err());
1592        assert!(result.unwrap_err().contains("Incomplete hex escape"));
1593    }
1594
1595    #[test]
1596    fn test_unclosed_string_literal() {
1597        let source = r#": main ( -- ) "unclosed string ;"#;
1598
1599        let mut parser = Parser::new(source);
1600        let result = parser.parse();
1601
1602        assert!(result.is_err());
1603        let err_msg = result.unwrap_err();
1604        assert!(err_msg.contains("Unclosed string literal"));
1605        // Should include position information (line 1, column 15 for the opening quote)
1606        assert!(
1607            err_msg.contains("line 1"),
1608            "Expected line number in error: {}",
1609            err_msg
1610        );
1611        assert!(
1612            err_msg.contains("column 15"),
1613            "Expected column number in error: {}",
1614            err_msg
1615        );
1616    }
1617
1618    #[test]
1619    fn test_multiple_word_definitions() {
1620        let source = r#"
1621: double ( Int -- Int )
1622  2 multiply ;
1623
1624: quadruple ( Int -- Int )
1625  double double ;
1626"#;
1627
1628        let mut parser = Parser::new(source);
1629        let program = parser.parse().unwrap();
1630
1631        assert_eq!(program.words.len(), 2);
1632        assert_eq!(program.words[0].name, "double");
1633        assert_eq!(program.words[1].name, "quadruple");
1634
1635        // Verify stack effects were parsed
1636        assert!(program.words[0].effect.is_some());
1637        assert!(program.words[1].effect.is_some());
1638    }
1639
1640    #[test]
1641    fn test_user_word_calling_user_word() {
1642        let source = r#"
1643: helper ( -- )
1644  "helper called" write_line ;
1645
1646: main ( -- )
1647  helper ;
1648"#;
1649
1650        let mut parser = Parser::new(source);
1651        let program = parser.parse().unwrap();
1652
1653        assert_eq!(program.words.len(), 2);
1654
1655        // Check main calls helper
1656        match &program.words[1].body[0] {
1657            Statement::WordCall { name, .. } => assert_eq!(name, "helper"),
1658            _ => panic!("Expected WordCall to helper"),
1659        }
1660    }
1661
1662    #[test]
1663    fn test_parse_simple_stack_effect() {
1664        // Test: ( Int -- Bool )
1665        // With implicit row polymorphism, this becomes: ( ..rest Int -- ..rest Bool )
1666        let source = ": test ( Int -- Bool ) 1 ;";
1667        let mut parser = Parser::new(source);
1668        let program = parser.parse().unwrap();
1669
1670        assert_eq!(program.words.len(), 1);
1671        let word = &program.words[0];
1672        assert!(word.effect.is_some());
1673
1674        let effect = word.effect.as_ref().unwrap();
1675
1676        // Input: Int on RowVar("rest") (implicit row polymorphism)
1677        assert_eq!(
1678            effect.inputs,
1679            StackType::Cons {
1680                rest: Box::new(StackType::RowVar("rest".to_string())),
1681                top: Type::Int
1682            }
1683        );
1684
1685        // Output: Bool on RowVar("rest") (implicit row polymorphism)
1686        assert_eq!(
1687            effect.outputs,
1688            StackType::Cons {
1689                rest: Box::new(StackType::RowVar("rest".to_string())),
1690                top: Type::Bool
1691            }
1692        );
1693    }
1694
1695    #[test]
1696    fn test_parse_row_polymorphic_stack_effect() {
1697        // Test: ( ..a Int -- ..a Bool )
1698        let source = ": test ( ..a Int -- ..a Bool ) 1 ;";
1699        let mut parser = Parser::new(source);
1700        let program = parser.parse().unwrap();
1701
1702        assert_eq!(program.words.len(), 1);
1703        let word = &program.words[0];
1704        assert!(word.effect.is_some());
1705
1706        let effect = word.effect.as_ref().unwrap();
1707
1708        // Input: Int on RowVar("a")
1709        assert_eq!(
1710            effect.inputs,
1711            StackType::Cons {
1712                rest: Box::new(StackType::RowVar("a".to_string())),
1713                top: Type::Int
1714            }
1715        );
1716
1717        // Output: Bool on RowVar("a")
1718        assert_eq!(
1719            effect.outputs,
1720            StackType::Cons {
1721                rest: Box::new(StackType::RowVar("a".to_string())),
1722                top: Type::Bool
1723            }
1724        );
1725    }
1726
1727    #[test]
1728    fn test_parse_invalid_row_var_starts_with_digit() {
1729        // Test: Row variable cannot start with digit
1730        let source = ": test ( ..123 Int -- ) ;";
1731        let mut parser = Parser::new(source);
1732        let result = parser.parse();
1733
1734        assert!(result.is_err());
1735        let err_msg = result.unwrap_err();
1736        assert!(
1737            err_msg.contains("lowercase letter"),
1738            "Expected error about lowercase letter, got: {}",
1739            err_msg
1740        );
1741    }
1742
1743    #[test]
1744    fn test_parse_invalid_row_var_starts_with_uppercase() {
1745        // Test: Row variable cannot start with uppercase (that's a type variable)
1746        let source = ": test ( ..Int Int -- ) ;";
1747        let mut parser = Parser::new(source);
1748        let result = parser.parse();
1749
1750        assert!(result.is_err());
1751        let err_msg = result.unwrap_err();
1752        assert!(
1753            err_msg.contains("lowercase letter") || err_msg.contains("type name"),
1754            "Expected error about lowercase letter or type name, got: {}",
1755            err_msg
1756        );
1757    }
1758
1759    #[test]
1760    fn test_parse_invalid_row_var_with_special_chars() {
1761        // Test: Row variable cannot contain special characters
1762        let source = ": test ( ..a-b Int -- ) ;";
1763        let mut parser = Parser::new(source);
1764        let result = parser.parse();
1765
1766        assert!(result.is_err());
1767        let err_msg = result.unwrap_err();
1768        assert!(
1769            err_msg.contains("letters, numbers, and underscores")
1770                || err_msg.contains("Unknown type"),
1771            "Expected error about valid characters, got: {}",
1772            err_msg
1773        );
1774    }
1775
1776    #[test]
1777    fn test_parse_valid_row_var_with_underscore() {
1778        // Test: Row variable CAN contain underscore
1779        let source = ": test ( ..my_row Int -- ..my_row Bool ) ;";
1780        let mut parser = Parser::new(source);
1781        let result = parser.parse();
1782
1783        assert!(result.is_ok(), "Should accept row variable with underscore");
1784    }
1785
1786    #[test]
1787    fn test_parse_multiple_types_stack_effect() {
1788        // Test: ( Int String -- Bool )
1789        // With implicit row polymorphism: ( ..rest Int String -- ..rest Bool )
1790        let source = ": test ( Int String -- Bool ) 1 ;";
1791        let mut parser = Parser::new(source);
1792        let program = parser.parse().unwrap();
1793
1794        let effect = program.words[0].effect.as_ref().unwrap();
1795
1796        // Input: String on Int on RowVar("rest")
1797        let (rest, top) = effect.inputs.clone().pop().unwrap();
1798        assert_eq!(top, Type::String);
1799        let (rest2, top2) = rest.pop().unwrap();
1800        assert_eq!(top2, Type::Int);
1801        assert_eq!(rest2, StackType::RowVar("rest".to_string()));
1802
1803        // Output: Bool on RowVar("rest") (implicit row polymorphism)
1804        assert_eq!(
1805            effect.outputs,
1806            StackType::Cons {
1807                rest: Box::new(StackType::RowVar("rest".to_string())),
1808                top: Type::Bool
1809            }
1810        );
1811    }
1812
1813    #[test]
1814    fn test_parse_type_variable() {
1815        // Test: ( ..a T -- ..a T T ) for dup
1816        let source = ": dup ( ..a T -- ..a T T ) ;";
1817        let mut parser = Parser::new(source);
1818        let program = parser.parse().unwrap();
1819
1820        let effect = program.words[0].effect.as_ref().unwrap();
1821
1822        // Input: T on RowVar("a")
1823        assert_eq!(
1824            effect.inputs,
1825            StackType::Cons {
1826                rest: Box::new(StackType::RowVar("a".to_string())),
1827                top: Type::Var("T".to_string())
1828            }
1829        );
1830
1831        // Output: T on T on RowVar("a")
1832        let (rest, top) = effect.outputs.clone().pop().unwrap();
1833        assert_eq!(top, Type::Var("T".to_string()));
1834        let (rest2, top2) = rest.pop().unwrap();
1835        assert_eq!(top2, Type::Var("T".to_string()));
1836        assert_eq!(rest2, StackType::RowVar("a".to_string()));
1837    }
1838
1839    #[test]
1840    fn test_parse_empty_stack_effect() {
1841        // Test: ( -- )
1842        // In concatenative languages, even empty effects are row-polymorphic
1843        // ( -- ) means ( ..rest -- ..rest ) - preserves stack
1844        let source = ": test ( -- ) ;";
1845        let mut parser = Parser::new(source);
1846        let program = parser.parse().unwrap();
1847
1848        let effect = program.words[0].effect.as_ref().unwrap();
1849
1850        // Both inputs and outputs should use the same implicit row variable
1851        assert_eq!(effect.inputs, StackType::RowVar("rest".to_string()));
1852        assert_eq!(effect.outputs, StackType::RowVar("rest".to_string()));
1853    }
1854
1855    #[test]
1856    fn test_parse_invalid_type() {
1857        // Test invalid type (lowercase, not a row var)
1858        let source = ": test ( invalid -- Bool ) ;";
1859        let mut parser = Parser::new(source);
1860        let result = parser.parse();
1861
1862        assert!(result.is_err());
1863        assert!(result.unwrap_err().contains("Unknown type"));
1864    }
1865
1866    #[test]
1867    fn test_parse_unclosed_stack_effect() {
1868        // Test unclosed stack effect - parser tries to parse all tokens until ')' or EOF
1869        // In this case, it encounters "body" which is an invalid type
1870        let source = ": test ( Int -- Bool body ;";
1871        let mut parser = Parser::new(source);
1872        let result = parser.parse();
1873
1874        assert!(result.is_err());
1875        let err_msg = result.unwrap_err();
1876        // Parser will try to parse "body" as a type and fail
1877        assert!(err_msg.contains("Unknown type"));
1878    }
1879
1880    #[test]
1881    fn test_parse_simple_quotation_type() {
1882        // Test: ( [Int -- Int] -- )
1883        let source = ": apply ( [Int -- Int] -- ) ;";
1884        let mut parser = Parser::new(source);
1885        let program = parser.parse().unwrap();
1886
1887        let effect = program.words[0].effect.as_ref().unwrap();
1888
1889        // Input should be: Quotation(Int -- Int) on RowVar("rest")
1890        let (rest, top) = effect.inputs.clone().pop().unwrap();
1891        match top {
1892            Type::Quotation(quot_effect) => {
1893                // Check quotation's input: Int on RowVar("rest")
1894                assert_eq!(
1895                    quot_effect.inputs,
1896                    StackType::Cons {
1897                        rest: Box::new(StackType::RowVar("rest".to_string())),
1898                        top: Type::Int
1899                    }
1900                );
1901                // Check quotation's output: Int on RowVar("rest")
1902                assert_eq!(
1903                    quot_effect.outputs,
1904                    StackType::Cons {
1905                        rest: Box::new(StackType::RowVar("rest".to_string())),
1906                        top: Type::Int
1907                    }
1908                );
1909            }
1910            _ => panic!("Expected Quotation type, got {:?}", top),
1911        }
1912        assert_eq!(rest, StackType::RowVar("rest".to_string()));
1913    }
1914
1915    #[test]
1916    fn test_parse_quotation_type_with_row_vars() {
1917        // Test: ( ..a [..a T -- ..a Bool] -- ..a )
1918        let source = ": test ( ..a [..a T -- ..a Bool] -- ..a ) ;";
1919        let mut parser = Parser::new(source);
1920        let program = parser.parse().unwrap();
1921
1922        let effect = program.words[0].effect.as_ref().unwrap();
1923
1924        // Input: Quotation on RowVar("a")
1925        let (rest, top) = effect.inputs.clone().pop().unwrap();
1926        match top {
1927            Type::Quotation(quot_effect) => {
1928                // Check quotation's input: T on RowVar("a")
1929                let (q_in_rest, q_in_top) = quot_effect.inputs.clone().pop().unwrap();
1930                assert_eq!(q_in_top, Type::Var("T".to_string()));
1931                assert_eq!(q_in_rest, StackType::RowVar("a".to_string()));
1932
1933                // Check quotation's output: Bool on RowVar("a")
1934                let (q_out_rest, q_out_top) = quot_effect.outputs.clone().pop().unwrap();
1935                assert_eq!(q_out_top, Type::Bool);
1936                assert_eq!(q_out_rest, StackType::RowVar("a".to_string()));
1937            }
1938            _ => panic!("Expected Quotation type, got {:?}", top),
1939        }
1940        assert_eq!(rest, StackType::RowVar("a".to_string()));
1941    }
1942
1943    #[test]
1944    fn test_parse_nested_quotation_type() {
1945        // Test: ( [[Int -- Int] -- Bool] -- )
1946        let source = ": nested ( [[Int -- Int] -- Bool] -- ) ;";
1947        let mut parser = Parser::new(source);
1948        let program = parser.parse().unwrap();
1949
1950        let effect = program.words[0].effect.as_ref().unwrap();
1951
1952        // Input: Quotation([Int -- Int] -- Bool) on RowVar("rest")
1953        let (_, top) = effect.inputs.clone().pop().unwrap();
1954        match top {
1955            Type::Quotation(outer_effect) => {
1956                // Outer quotation input: [Int -- Int] on RowVar("rest")
1957                let (_, outer_in_top) = outer_effect.inputs.clone().pop().unwrap();
1958                match outer_in_top {
1959                    Type::Quotation(inner_effect) => {
1960                        // Inner quotation: Int -- Int
1961                        assert!(matches!(
1962                            inner_effect.inputs.clone().pop().unwrap().1,
1963                            Type::Int
1964                        ));
1965                        assert!(matches!(
1966                            inner_effect.outputs.clone().pop().unwrap().1,
1967                            Type::Int
1968                        ));
1969                    }
1970                    _ => panic!("Expected nested Quotation type"),
1971                }
1972
1973                // Outer quotation output: Bool
1974                let (_, outer_out_top) = outer_effect.outputs.clone().pop().unwrap();
1975                assert_eq!(outer_out_top, Type::Bool);
1976            }
1977            _ => panic!("Expected Quotation type"),
1978        }
1979    }
1980
1981    #[test]
1982    fn test_parse_deeply_nested_quotation_type_exceeds_limit() {
1983        // Test: Deeply nested quotation types should fail with max depth error
1984        // Build a quotation type nested 35 levels deep (exceeds MAX_QUOTATION_DEPTH = 32)
1985        let mut source = String::from(": deep ( ");
1986
1987        // Build opening brackets: [[[[[[...
1988        for _ in 0..35 {
1989            source.push_str("[ -- ");
1990        }
1991
1992        source.push_str("Int");
1993
1994        // Build closing brackets: ...]]]]]]
1995        for _ in 0..35 {
1996            source.push_str(" ]");
1997        }
1998
1999        source.push_str(" -- ) ;");
2000
2001        let mut parser = Parser::new(&source);
2002        let result = parser.parse();
2003
2004        // Should fail with depth limit error
2005        assert!(result.is_err());
2006        let err_msg = result.unwrap_err();
2007        assert!(
2008            err_msg.contains("depth") || err_msg.contains("32"),
2009            "Expected depth limit error, got: {}",
2010            err_msg
2011        );
2012    }
2013
2014    #[test]
2015    fn test_parse_empty_quotation_type() {
2016        // Test: ( [ -- ] -- )
2017        // An empty quotation type is also row-polymorphic: [ ..rest -- ..rest ]
2018        let source = ": empty-quot ( [ -- ] -- ) ;";
2019        let mut parser = Parser::new(source);
2020        let program = parser.parse().unwrap();
2021
2022        let effect = program.words[0].effect.as_ref().unwrap();
2023
2024        let (_, top) = effect.inputs.clone().pop().unwrap();
2025        match top {
2026            Type::Quotation(quot_effect) => {
2027                // Empty quotation preserves the stack (row-polymorphic)
2028                assert_eq!(quot_effect.inputs, StackType::RowVar("rest".to_string()));
2029                assert_eq!(quot_effect.outputs, StackType::RowVar("rest".to_string()));
2030            }
2031            _ => panic!("Expected Quotation type"),
2032        }
2033    }
2034
2035    #[test]
2036    fn test_parse_quotation_type_in_output() {
2037        // Test: ( -- [Int -- Int] )
2038        let source = ": maker ( -- [Int -- Int] ) ;";
2039        let mut parser = Parser::new(source);
2040        let program = parser.parse().unwrap();
2041
2042        let effect = program.words[0].effect.as_ref().unwrap();
2043
2044        // Output should be: Quotation(Int -- Int) on RowVar("rest")
2045        let (_, top) = effect.outputs.clone().pop().unwrap();
2046        match top {
2047            Type::Quotation(quot_effect) => {
2048                assert!(matches!(
2049                    quot_effect.inputs.clone().pop().unwrap().1,
2050                    Type::Int
2051                ));
2052                assert!(matches!(
2053                    quot_effect.outputs.clone().pop().unwrap().1,
2054                    Type::Int
2055                ));
2056            }
2057            _ => panic!("Expected Quotation type"),
2058        }
2059    }
2060
2061    #[test]
2062    fn test_parse_unclosed_quotation_type() {
2063        // Test: ( [Int -- Int -- )  (missing ])
2064        let source = ": broken ( [Int -- Int -- ) ;";
2065        let mut parser = Parser::new(source);
2066        let result = parser.parse();
2067
2068        assert!(result.is_err());
2069        let err_msg = result.unwrap_err();
2070        // Parser might error with various messages depending on where it fails
2071        // It should at least indicate a parsing problem
2072        assert!(
2073            err_msg.contains("Unclosed")
2074                || err_msg.contains("Expected")
2075                || err_msg.contains("Unexpected"),
2076            "Got error: {}",
2077            err_msg
2078        );
2079    }
2080
2081    #[test]
2082    fn test_parse_multiple_quotation_types() {
2083        // Test: ( [Int -- Int] [String -- Bool] -- )
2084        let source = ": multi ( [Int -- Int] [String -- Bool] -- ) ;";
2085        let mut parser = Parser::new(source);
2086        let program = parser.parse().unwrap();
2087
2088        let effect = program.words[0].effect.as_ref().unwrap();
2089
2090        // Pop second quotation (String -- Bool)
2091        let (rest, top) = effect.inputs.clone().pop().unwrap();
2092        match top {
2093            Type::Quotation(quot_effect) => {
2094                assert!(matches!(
2095                    quot_effect.inputs.clone().pop().unwrap().1,
2096                    Type::String
2097                ));
2098                assert!(matches!(
2099                    quot_effect.outputs.clone().pop().unwrap().1,
2100                    Type::Bool
2101                ));
2102            }
2103            _ => panic!("Expected Quotation type"),
2104        }
2105
2106        // Pop first quotation (Int -- Int)
2107        let (_, top2) = rest.pop().unwrap();
2108        match top2 {
2109            Type::Quotation(quot_effect) => {
2110                assert!(matches!(
2111                    quot_effect.inputs.clone().pop().unwrap().1,
2112                    Type::Int
2113                ));
2114                assert!(matches!(
2115                    quot_effect.outputs.clone().pop().unwrap().1,
2116                    Type::Int
2117                ));
2118            }
2119            _ => panic!("Expected Quotation type"),
2120        }
2121    }
2122
2123    #[test]
2124    fn test_parse_quotation_type_without_separator() {
2125        // Test: ( [Int] -- ) should be REJECTED
2126        //
2127        // Design decision: The '--' separator is REQUIRED for clarity.
2128        // [Int] looks like a list type in most languages, not a consumer function.
2129        // This would confuse users.
2130        //
2131        // Require explicit syntax:
2132        // - `[Int -- ]` for quotation that consumes Int and produces nothing
2133        // - `[ -- Int]` for quotation that produces Int
2134        // - `[Int -- Int]` for transformation
2135        let source = ": consumer ( [Int] -- ) ;";
2136        let mut parser = Parser::new(source);
2137        let result = parser.parse();
2138
2139        // Should fail with helpful error message
2140        assert!(result.is_err());
2141        let err_msg = result.unwrap_err();
2142        assert!(
2143            err_msg.contains("require") && err_msg.contains("--"),
2144            "Expected error about missing '--' separator, got: {}",
2145            err_msg
2146        );
2147    }
2148
2149    #[test]
2150    fn test_parse_bare_quotation_type_rejected() {
2151        // Test: ( Int Quotation -- Int ) should be REJECTED
2152        //
2153        // 'Quotation' looks like a type name but would be silently treated as a
2154        // type variable without this check. Users must use explicit effect syntax.
2155        let source = ": apply-twice ( Int Quotation -- Int ) ;";
2156        let mut parser = Parser::new(source);
2157        let result = parser.parse();
2158
2159        assert!(result.is_err());
2160        let err_msg = result.unwrap_err();
2161        assert!(
2162            err_msg.contains("Quotation") && err_msg.contains("not a valid type"),
2163            "Expected error about 'Quotation' not being valid, got: {}",
2164            err_msg
2165        );
2166        assert!(
2167            err_msg.contains("[Int -- Int]") || err_msg.contains("[ -- ]"),
2168            "Expected error to suggest explicit syntax, got: {}",
2169            err_msg
2170        );
2171    }
2172
2173    #[test]
2174    fn test_parse_no_stack_effect() {
2175        // Test word without stack effect (should still work)
2176        let source = ": test 1 2 add ;";
2177        let mut parser = Parser::new(source);
2178        let program = parser.parse().unwrap();
2179
2180        assert_eq!(program.words.len(), 1);
2181        assert!(program.words[0].effect.is_none());
2182    }
2183
2184    #[test]
2185    fn test_parse_simple_quotation() {
2186        let source = r#"
2187: test ( -- Quot )
2188  [ 1 add ] ;
2189"#;
2190
2191        let mut parser = Parser::new(source);
2192        let program = parser.parse().unwrap();
2193
2194        assert_eq!(program.words.len(), 1);
2195        assert_eq!(program.words[0].name, "test");
2196        assert_eq!(program.words[0].body.len(), 1);
2197
2198        match &program.words[0].body[0] {
2199            Statement::Quotation { body, .. } => {
2200                assert_eq!(body.len(), 2);
2201                assert_eq!(body[0], Statement::IntLiteral(1));
2202                assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "add"));
2203            }
2204            _ => panic!("Expected Quotation statement"),
2205        }
2206    }
2207
2208    #[test]
2209    fn test_parse_empty_quotation() {
2210        let source = ": test [ ] ;";
2211
2212        let mut parser = Parser::new(source);
2213        let program = parser.parse().unwrap();
2214
2215        assert_eq!(program.words.len(), 1);
2216
2217        match &program.words[0].body[0] {
2218            Statement::Quotation { body, .. } => {
2219                assert_eq!(body.len(), 0);
2220            }
2221            _ => panic!("Expected Quotation statement"),
2222        }
2223    }
2224
2225    #[test]
2226    fn test_parse_quotation_with_call() {
2227        let source = r#"
2228: test ( -- )
2229  5 [ 1 add ] call ;
2230"#;
2231
2232        let mut parser = Parser::new(source);
2233        let program = parser.parse().unwrap();
2234
2235        assert_eq!(program.words.len(), 1);
2236        assert_eq!(program.words[0].body.len(), 3);
2237
2238        assert_eq!(program.words[0].body[0], Statement::IntLiteral(5));
2239
2240        match &program.words[0].body[1] {
2241            Statement::Quotation { body, .. } => {
2242                assert_eq!(body.len(), 2);
2243            }
2244            _ => panic!("Expected Quotation"),
2245        }
2246
2247        assert!(matches!(
2248            &program.words[0].body[2],
2249            Statement::WordCall { name, .. } if name == "call"
2250        ));
2251    }
2252
2253    #[test]
2254    fn test_parse_nested_quotation() {
2255        let source = ": test [ [ 1 add ] call ] ;";
2256
2257        let mut parser = Parser::new(source);
2258        let program = parser.parse().unwrap();
2259
2260        assert_eq!(program.words.len(), 1);
2261
2262        match &program.words[0].body[0] {
2263            Statement::Quotation {
2264                body: outer_body, ..
2265            } => {
2266                assert_eq!(outer_body.len(), 2);
2267
2268                match &outer_body[0] {
2269                    Statement::Quotation {
2270                        body: inner_body, ..
2271                    } => {
2272                        assert_eq!(inner_body.len(), 2);
2273                        assert_eq!(inner_body[0], Statement::IntLiteral(1));
2274                        assert!(
2275                            matches!(&inner_body[1], Statement::WordCall { name, .. } if name == "add")
2276                        );
2277                    }
2278                    _ => panic!("Expected nested Quotation"),
2279                }
2280
2281                assert!(
2282                    matches!(&outer_body[1], Statement::WordCall { name, .. } if name == "call")
2283                );
2284            }
2285            _ => panic!("Expected Quotation"),
2286        }
2287    }
2288
2289    #[test]
2290    fn test_parse_while_with_quotations() {
2291        let source = r#"
2292: countdown ( Int -- )
2293  [ dup 0 > ] [ 1 subtract ] while drop ;
2294"#;
2295
2296        let mut parser = Parser::new(source);
2297        let program = parser.parse().unwrap();
2298
2299        assert_eq!(program.words.len(), 1);
2300        assert_eq!(program.words[0].body.len(), 4);
2301
2302        // First quotation: [ dup 0 > ]
2303        match &program.words[0].body[0] {
2304            Statement::Quotation { body: pred, .. } => {
2305                assert_eq!(pred.len(), 3);
2306                assert!(matches!(&pred[0], Statement::WordCall { name, .. } if name == "dup"));
2307                assert_eq!(pred[1], Statement::IntLiteral(0));
2308                assert!(matches!(&pred[2], Statement::WordCall { name, .. } if name == ">"));
2309            }
2310            _ => panic!("Expected predicate quotation"),
2311        }
2312
2313        // Second quotation: [ 1 subtract ]
2314        match &program.words[0].body[1] {
2315            Statement::Quotation { body, .. } => {
2316                assert_eq!(body.len(), 2);
2317                assert_eq!(body[0], Statement::IntLiteral(1));
2318                assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "subtract"));
2319            }
2320            _ => panic!("Expected body quotation"),
2321        }
2322
2323        // while call
2324        assert!(matches!(
2325            &program.words[0].body[2],
2326            Statement::WordCall { name, .. } if name == "while"
2327        ));
2328
2329        // drop
2330        assert!(matches!(
2331            &program.words[0].body[3],
2332            Statement::WordCall { name, .. } if name == "drop"
2333        ));
2334    }
2335
2336    #[test]
2337    fn test_parse_simple_closure_type() {
2338        // Test: ( Int -- Closure[Int -- Int] )
2339        let source = ": make-adder ( Int -- Closure[Int -- Int] ) ;";
2340        let mut parser = Parser::new(source);
2341        let program = parser.parse().unwrap();
2342
2343        assert_eq!(program.words.len(), 1);
2344        let word = &program.words[0];
2345        assert!(word.effect.is_some());
2346
2347        let effect = word.effect.as_ref().unwrap();
2348
2349        // Input: Int on RowVar("rest")
2350        let (input_rest, input_top) = effect.inputs.clone().pop().unwrap();
2351        assert_eq!(input_top, Type::Int);
2352        assert_eq!(input_rest, StackType::RowVar("rest".to_string()));
2353
2354        // Output: Closure[Int -- Int] on RowVar("rest")
2355        let (output_rest, output_top) = effect.outputs.clone().pop().unwrap();
2356        match output_top {
2357            Type::Closure { effect, captures } => {
2358                // Closure effect: Int -> Int
2359                assert_eq!(
2360                    effect.inputs,
2361                    StackType::Cons {
2362                        rest: Box::new(StackType::RowVar("rest".to_string())),
2363                        top: Type::Int
2364                    }
2365                );
2366                assert_eq!(
2367                    effect.outputs,
2368                    StackType::Cons {
2369                        rest: Box::new(StackType::RowVar("rest".to_string())),
2370                        top: Type::Int
2371                    }
2372                );
2373                // Captures should be empty (filled in by type checker)
2374                assert_eq!(captures.len(), 0);
2375            }
2376            _ => panic!("Expected Closure type, got {:?}", output_top),
2377        }
2378        assert_eq!(output_rest, StackType::RowVar("rest".to_string()));
2379    }
2380
2381    #[test]
2382    fn test_parse_closure_type_with_row_vars() {
2383        // Test: ( ..a Config -- ..a Closure[Request -- Response] )
2384        let source = ": make-handler ( ..a Config -- ..a Closure[Request -- Response] ) ;";
2385        let mut parser = Parser::new(source);
2386        let program = parser.parse().unwrap();
2387
2388        let effect = program.words[0].effect.as_ref().unwrap();
2389
2390        // Output: Closure on RowVar("a")
2391        let (rest, top) = effect.outputs.clone().pop().unwrap();
2392        match top {
2393            Type::Closure { effect, .. } => {
2394                // Closure effect: Request -> Response
2395                let (_, in_top) = effect.inputs.clone().pop().unwrap();
2396                assert_eq!(in_top, Type::Var("Request".to_string()));
2397                let (_, out_top) = effect.outputs.clone().pop().unwrap();
2398                assert_eq!(out_top, Type::Var("Response".to_string()));
2399            }
2400            _ => panic!("Expected Closure type"),
2401        }
2402        assert_eq!(rest, StackType::RowVar("a".to_string()));
2403    }
2404
2405    #[test]
2406    fn test_parse_closure_type_missing_bracket() {
2407        // Test: ( Int -- Closure ) should fail
2408        let source = ": broken ( Int -- Closure ) ;";
2409        let mut parser = Parser::new(source);
2410        let result = parser.parse();
2411
2412        assert!(result.is_err());
2413        let err_msg = result.unwrap_err();
2414        assert!(
2415            err_msg.contains("[") && err_msg.contains("Closure"),
2416            "Expected error about missing '[' after Closure, got: {}",
2417            err_msg
2418        );
2419    }
2420
2421    #[test]
2422    fn test_parse_closure_type_in_input() {
2423        // Test: ( Closure[Int -- Int] -- )
2424        let source = ": apply-closure ( Closure[Int -- Int] -- ) ;";
2425        let mut parser = Parser::new(source);
2426        let program = parser.parse().unwrap();
2427
2428        let effect = program.words[0].effect.as_ref().unwrap();
2429
2430        // Input: Closure[Int -- Int] on RowVar("rest")
2431        let (_, top) = effect.inputs.clone().pop().unwrap();
2432        match top {
2433            Type::Closure { effect, .. } => {
2434                // Verify closure effect
2435                assert!(matches!(effect.inputs.clone().pop().unwrap().1, Type::Int));
2436                assert!(matches!(effect.outputs.clone().pop().unwrap().1, Type::Int));
2437            }
2438            _ => panic!("Expected Closure type in input"),
2439        }
2440    }
2441
2442    // Tests for token position tracking
2443
2444    #[test]
2445    fn test_token_position_single_line() {
2446        // Test token positions on a single line
2447        let source = ": main ( -- ) ;";
2448        let tokens = tokenize(source);
2449
2450        // : is at line 0, column 0
2451        assert_eq!(tokens[0].text, ":");
2452        assert_eq!(tokens[0].line, 0);
2453        assert_eq!(tokens[0].column, 0);
2454
2455        // main is at line 0, column 2
2456        assert_eq!(tokens[1].text, "main");
2457        assert_eq!(tokens[1].line, 0);
2458        assert_eq!(tokens[1].column, 2);
2459
2460        // ( is at line 0, column 7
2461        assert_eq!(tokens[2].text, "(");
2462        assert_eq!(tokens[2].line, 0);
2463        assert_eq!(tokens[2].column, 7);
2464    }
2465
2466    #[test]
2467    fn test_token_position_multiline() {
2468        // Test token positions across multiple lines
2469        let source = ": main ( -- )\n  42\n;";
2470        let tokens = tokenize(source);
2471
2472        // Find the 42 token (after the newline)
2473        let token_42 = tokens.iter().find(|t| t.text == "42").unwrap();
2474        assert_eq!(token_42.line, 1);
2475        assert_eq!(token_42.column, 2); // After 2 spaces of indentation
2476
2477        // Find the ; token (on line 2)
2478        let token_semi = tokens.iter().find(|t| t.text == ";").unwrap();
2479        assert_eq!(token_semi.line, 2);
2480        assert_eq!(token_semi.column, 0);
2481    }
2482
2483    #[test]
2484    fn test_word_def_source_location_span() {
2485        // Test that word definitions capture correct start and end lines
2486        let source = r#": helper ( -- )
2487  "hello"
2488  write_line
2489;
2490
2491: main ( -- )
2492  helper
2493;"#;
2494
2495        let mut parser = Parser::new(source);
2496        let program = parser.parse().unwrap();
2497
2498        assert_eq!(program.words.len(), 2);
2499
2500        // First word: helper spans lines 0-3
2501        let helper = &program.words[0];
2502        assert_eq!(helper.name, "helper");
2503        let helper_source = helper.source.as_ref().unwrap();
2504        assert_eq!(helper_source.start_line, 0);
2505        assert_eq!(helper_source.end_line, 3);
2506
2507        // Second word: main spans lines 5-7
2508        let main_word = &program.words[1];
2509        assert_eq!(main_word.name, "main");
2510        let main_source = main_word.source.as_ref().unwrap();
2511        assert_eq!(main_source.start_line, 5);
2512        assert_eq!(main_source.end_line, 7);
2513    }
2514
2515    #[test]
2516    fn test_token_position_string_with_newline() {
2517        // Test that newlines inside strings are tracked correctly
2518        let source = "\"line1\\nline2\"";
2519        let tokens = tokenize(source);
2520
2521        // The string token should start at line 0, column 0
2522        assert_eq!(tokens.len(), 1);
2523        assert_eq!(tokens[0].line, 0);
2524        assert_eq!(tokens[0].column, 0);
2525    }
2526
2527    // ============================================================================
2528    //                         ADT PARSING TESTS
2529    // ============================================================================
2530
2531    #[test]
2532    fn test_parse_simple_union() {
2533        let source = r#"
2534union Message {
2535  Get { response-chan: Int }
2536  Set { value: Int }
2537}
2538
2539: main ( -- ) ;
2540"#;
2541
2542        let mut parser = Parser::new(source);
2543        let program = parser.parse().unwrap();
2544
2545        assert_eq!(program.unions.len(), 1);
2546        let union_def = &program.unions[0];
2547        assert_eq!(union_def.name, "Message");
2548        assert_eq!(union_def.variants.len(), 2);
2549
2550        // Check first variant
2551        assert_eq!(union_def.variants[0].name, "Get");
2552        assert_eq!(union_def.variants[0].fields.len(), 1);
2553        assert_eq!(union_def.variants[0].fields[0].name, "response-chan");
2554        assert_eq!(union_def.variants[0].fields[0].type_name, "Int");
2555
2556        // Check second variant
2557        assert_eq!(union_def.variants[1].name, "Set");
2558        assert_eq!(union_def.variants[1].fields.len(), 1);
2559        assert_eq!(union_def.variants[1].fields[0].name, "value");
2560        assert_eq!(union_def.variants[1].fields[0].type_name, "Int");
2561    }
2562
2563    #[test]
2564    fn test_parse_union_with_multiple_fields() {
2565        let source = r#"
2566union Report {
2567  Data { op: Int, delta: Int, total: Int }
2568  Empty
2569}
2570
2571: main ( -- ) ;
2572"#;
2573
2574        let mut parser = Parser::new(source);
2575        let program = parser.parse().unwrap();
2576
2577        assert_eq!(program.unions.len(), 1);
2578        let union_def = &program.unions[0];
2579        assert_eq!(union_def.name, "Report");
2580        assert_eq!(union_def.variants.len(), 2);
2581
2582        // Check Data variant with 3 fields
2583        let data_variant = &union_def.variants[0];
2584        assert_eq!(data_variant.name, "Data");
2585        assert_eq!(data_variant.fields.len(), 3);
2586        assert_eq!(data_variant.fields[0].name, "op");
2587        assert_eq!(data_variant.fields[1].name, "delta");
2588        assert_eq!(data_variant.fields[2].name, "total");
2589
2590        // Check Empty variant with no fields
2591        let empty_variant = &union_def.variants[1];
2592        assert_eq!(empty_variant.name, "Empty");
2593        assert_eq!(empty_variant.fields.len(), 0);
2594    }
2595
2596    #[test]
2597    fn test_parse_union_lowercase_name_error() {
2598        let source = r#"
2599union message {
2600  Get { }
2601}
2602"#;
2603
2604        let mut parser = Parser::new(source);
2605        let result = parser.parse();
2606        assert!(result.is_err());
2607        assert!(result.unwrap_err().contains("uppercase"));
2608    }
2609
2610    #[test]
2611    fn test_parse_union_empty_error() {
2612        let source = r#"
2613union Message {
2614}
2615"#;
2616
2617        let mut parser = Parser::new(source);
2618        let result = parser.parse();
2619        assert!(result.is_err());
2620        assert!(result.unwrap_err().contains("at least one variant"));
2621    }
2622
2623    #[test]
2624    fn test_parse_union_duplicate_variant_error() {
2625        let source = r#"
2626union Message {
2627  Get { x: Int }
2628  Get { y: String }
2629}
2630"#;
2631
2632        let mut parser = Parser::new(source);
2633        let result = parser.parse();
2634        assert!(result.is_err());
2635        let err = result.unwrap_err();
2636        assert!(err.contains("Duplicate variant name"));
2637        assert!(err.contains("Get"));
2638    }
2639
2640    #[test]
2641    fn test_parse_union_duplicate_field_error() {
2642        let source = r#"
2643union Data {
2644  Record { x: Int, x: String }
2645}
2646"#;
2647
2648        let mut parser = Parser::new(source);
2649        let result = parser.parse();
2650        assert!(result.is_err());
2651        let err = result.unwrap_err();
2652        assert!(err.contains("Duplicate field name"));
2653        assert!(err.contains("x"));
2654    }
2655
2656    #[test]
2657    fn test_parse_simple_match() {
2658        let source = r#"
2659: handle ( -- )
2660  match
2661    Get -> send-response
2662    Set -> process-set
2663  end
2664;
2665"#;
2666
2667        let mut parser = Parser::new(source);
2668        let program = parser.parse().unwrap();
2669
2670        assert_eq!(program.words.len(), 1);
2671        assert_eq!(program.words[0].body.len(), 1);
2672
2673        match &program.words[0].body[0] {
2674            Statement::Match { arms } => {
2675                assert_eq!(arms.len(), 2);
2676
2677                // First arm: Get ->
2678                match &arms[0].pattern {
2679                    Pattern::Variant(name) => assert_eq!(name, "Get"),
2680                    _ => panic!("Expected Variant pattern"),
2681                }
2682                assert_eq!(arms[0].body.len(), 1);
2683
2684                // Second arm: Set ->
2685                match &arms[1].pattern {
2686                    Pattern::Variant(name) => assert_eq!(name, "Set"),
2687                    _ => panic!("Expected Variant pattern"),
2688                }
2689                assert_eq!(arms[1].body.len(), 1);
2690            }
2691            _ => panic!("Expected Match statement"),
2692        }
2693    }
2694
2695    #[test]
2696    fn test_parse_match_with_bindings() {
2697        let source = r#"
2698: handle ( -- )
2699  match
2700    Get { >chan } -> chan send-response
2701    Report { >delta >total } -> delta total process
2702  end
2703;
2704"#;
2705
2706        let mut parser = Parser::new(source);
2707        let program = parser.parse().unwrap();
2708
2709        assert_eq!(program.words.len(), 1);
2710
2711        match &program.words[0].body[0] {
2712            Statement::Match { arms } => {
2713                assert_eq!(arms.len(), 2);
2714
2715                // First arm: Get { chan } ->
2716                match &arms[0].pattern {
2717                    Pattern::VariantWithBindings { name, bindings } => {
2718                        assert_eq!(name, "Get");
2719                        assert_eq!(bindings.len(), 1);
2720                        assert_eq!(bindings[0], "chan");
2721                    }
2722                    _ => panic!("Expected VariantWithBindings pattern"),
2723                }
2724
2725                // Second arm: Report { delta total } ->
2726                match &arms[1].pattern {
2727                    Pattern::VariantWithBindings { name, bindings } => {
2728                        assert_eq!(name, "Report");
2729                        assert_eq!(bindings.len(), 2);
2730                        assert_eq!(bindings[0], "delta");
2731                        assert_eq!(bindings[1], "total");
2732                    }
2733                    _ => panic!("Expected VariantWithBindings pattern"),
2734                }
2735            }
2736            _ => panic!("Expected Match statement"),
2737        }
2738    }
2739
2740    #[test]
2741    fn test_parse_match_bindings_require_prefix() {
2742        // Old syntax without > prefix should error
2743        let source = r#"
2744: handle ( -- )
2745  match
2746    Get { chan } -> chan send-response
2747  end
2748;
2749"#;
2750
2751        let mut parser = Parser::new(source);
2752        let result = parser.parse();
2753        assert!(result.is_err());
2754        let err = result.unwrap_err();
2755        assert!(err.contains(">chan"));
2756        assert!(err.contains("stack extraction"));
2757    }
2758
2759    #[test]
2760    fn test_parse_match_with_body_statements() {
2761        let source = r#"
2762: handle ( -- )
2763  match
2764    Get -> 1 2 add send-response
2765    Set -> process-value store
2766  end
2767;
2768"#;
2769
2770        let mut parser = Parser::new(source);
2771        let program = parser.parse().unwrap();
2772
2773        match &program.words[0].body[0] {
2774            Statement::Match { arms } => {
2775                // Get arm has 4 statements: 1, 2, add, send-response
2776                assert_eq!(arms[0].body.len(), 4);
2777                assert_eq!(arms[0].body[0], Statement::IntLiteral(1));
2778                assert_eq!(arms[0].body[1], Statement::IntLiteral(2));
2779                assert!(
2780                    matches!(&arms[0].body[2], Statement::WordCall { name, .. } if name == "add")
2781                );
2782
2783                // Set arm has 2 statements: process-value, store
2784                assert_eq!(arms[1].body.len(), 2);
2785            }
2786            _ => panic!("Expected Match statement"),
2787        }
2788    }
2789
2790    #[test]
2791    fn test_parse_match_empty_error() {
2792        let source = r#"
2793: handle ( -- )
2794  match
2795  end
2796;
2797"#;
2798
2799        let mut parser = Parser::new(source);
2800        let result = parser.parse();
2801        assert!(result.is_err());
2802        assert!(result.unwrap_err().contains("at least one arm"));
2803    }
2804
2805    #[test]
2806    fn test_parse_symbol_literal() {
2807        let source = r#"
2808: main ( -- )
2809    :hello drop
2810;
2811"#;
2812
2813        let mut parser = Parser::new(source);
2814        let program = parser.parse().unwrap();
2815        assert_eq!(program.words.len(), 1);
2816
2817        let main = &program.words[0];
2818        assert_eq!(main.body.len(), 2);
2819
2820        match &main.body[0] {
2821            Statement::Symbol(name) => assert_eq!(name, "hello"),
2822            _ => panic!("Expected Symbol statement, got {:?}", main.body[0]),
2823        }
2824    }
2825
2826    #[test]
2827    fn test_parse_symbol_with_hyphen() {
2828        let source = r#"
2829: main ( -- )
2830    :hello-world drop
2831;
2832"#;
2833
2834        let mut parser = Parser::new(source);
2835        let program = parser.parse().unwrap();
2836
2837        match &program.words[0].body[0] {
2838            Statement::Symbol(name) => assert_eq!(name, "hello-world"),
2839            _ => panic!("Expected Symbol statement"),
2840        }
2841    }
2842
2843    #[test]
2844    fn test_parse_symbol_starting_with_digit_fails() {
2845        let source = r#"
2846: main ( -- )
2847    :123abc drop
2848;
2849"#;
2850
2851        let mut parser = Parser::new(source);
2852        let result = parser.parse();
2853        assert!(result.is_err());
2854        assert!(result.unwrap_err().contains("cannot start with a digit"));
2855    }
2856
2857    #[test]
2858    fn test_parse_symbol_with_invalid_char_fails() {
2859        let source = r#"
2860: main ( -- )
2861    :hello@world drop
2862;
2863"#;
2864
2865        let mut parser = Parser::new(source);
2866        let result = parser.parse();
2867        assert!(result.is_err());
2868        assert!(result.unwrap_err().contains("invalid character"));
2869    }
2870
2871    #[test]
2872    fn test_parse_symbol_special_chars_allowed() {
2873        // Test that ? and ! are allowed in symbol names
2874        let source = r#"
2875: main ( -- )
2876    :empty? drop
2877    :save! drop
2878;
2879"#;
2880
2881        let mut parser = Parser::new(source);
2882        let program = parser.parse().unwrap();
2883
2884        match &program.words[0].body[0] {
2885            Statement::Symbol(name) => assert_eq!(name, "empty?"),
2886            _ => panic!("Expected Symbol statement"),
2887        }
2888        match &program.words[0].body[2] {
2889            Statement::Symbol(name) => assert_eq!(name, "save!"),
2890            _ => panic!("Expected Symbol statement"),
2891        }
2892    }
2893}