Skip to main content

seqc/
parser.rs

1//! Simple parser for Seq syntax
2//!
3//! Syntax:
4//! ```text
5//! : word-name ( stack-effect )
6//!   statement1
7//!   statement2
8//!   ... ;
9//! ```
10
11use crate::ast::{
12    Include, MatchArm, Pattern, Program, SourceLocation, Span, Statement, UnionDef, UnionField,
13    UnionVariant, WordDef,
14};
15use crate::types::{Effect, SideEffect, StackType, Type};
16
17/// A token with source position information
18#[derive(Debug, Clone)]
19pub struct Token {
20    pub text: String,
21    /// Line number (0-indexed for LSP compatibility)
22    pub line: usize,
23    /// Column number (0-indexed)
24    pub column: usize,
25}
26
27impl Token {
28    fn new(text: String, line: usize, column: usize) -> Self {
29        Token { text, line, column }
30    }
31}
32
33impl PartialEq<&str> for Token {
34    fn eq(&self, other: &&str) -> bool {
35        self.text == *other
36    }
37}
38
39impl PartialEq<str> for Token {
40    fn eq(&self, other: &str) -> bool {
41        self.text == other
42    }
43}
44
45pub struct Parser {
46    tokens: Vec<Token>,
47    pos: usize,
48    /// Counter for assigning unique IDs to quotations
49    /// Used by the typechecker to track inferred types
50    next_quotation_id: usize,
51    /// Pending lint annotations collected from `# seq:allow(lint-id)` comments
52    pending_allowed_lints: Vec<String>,
53    /// Known union type names - used to distinguish union types from type variables
54    /// RFC #345: Union types in stack effects must be recognized as concrete types
55    known_unions: std::collections::HashSet<String>,
56}
57
58/// Prepend "at line N: " to a parser error so the LSP can surface it at the
59/// correct source line. If the message already starts with "at line " (from a
60/// nested sub-parser that annotated with more specific info) we leave it as-is
61/// to avoid double-wrapping.
62fn annotate_error_with_line(msg: String, tok: Option<&Token>) -> String {
63    if msg.starts_with("at line ") {
64        return msg;
65    }
66    let line = tok.map(|t| t.line).unwrap_or(0);
67    format!("at line {}: {}", line + 1, msg)
68}
69
70impl Parser {
71    pub fn new(source: &str) -> Self {
72        let tokens = tokenize(source);
73        Parser {
74            tokens,
75            pos: 0,
76            next_quotation_id: 0,
77            pending_allowed_lints: Vec::new(),
78            known_unions: std::collections::HashSet::new(),
79        }
80    }
81
82    /// Register external union names (e.g., from included modules)
83    /// These union types will be recognized in stack effect declarations.
84    pub fn register_external_unions(&mut self, union_names: &[&str]) {
85        for name in union_names {
86            self.known_unions.insert(name.to_string());
87        }
88    }
89
90    pub fn parse(&mut self) -> Result<Program, String> {
91        let mut program = Program::new();
92
93        // Check for unclosed string error from tokenizer
94        if let Some(error_token) = self.tokens.iter().find(|t| *t == "<<<UNCLOSED_STRING>>>") {
95            return Err(format!(
96                "Unclosed string literal at line {}, column {} - missing closing quote",
97                error_token.line + 1, // 1-indexed for user display
98                error_token.column + 1
99            ));
100        }
101
102        while !self.is_at_end() {
103            self.skip_comments();
104            if self.is_at_end() {
105                break;
106            }
107
108            // Dispatch to the appropriate sub-parser. If the sub-parser returns
109            // an error, annotate it with the current token's line so the LSP
110            // can surface the diagnostic at the offending location rather than
111            // defaulting to line 1.
112            let result = if self.check("include") {
113                self.parse_include().map(|inc| program.includes.push(inc))
114            } else if self.check("union") {
115                self.parse_union_def().map(|u| program.unions.push(u))
116            } else {
117                self.parse_word_def().map(|w| program.words.push(w))
118            };
119
120            if let Err(msg) = result {
121                // Prefer the token we were looking at when the error fired.
122                // If we were already at EOF, fall back to the final token's line
123                // so the diagnostic lands near the unterminated construct
124                // instead of on line 1.
125                let loc_token = self.current_token().or_else(|| self.tokens.last());
126                return Err(annotate_error_with_line(msg, loc_token));
127            }
128        }
129
130        Ok(program)
131    }
132
133    /// Parse an include statement:
134    ///   include std:http     -> Include::Std("http")
135    ///   include ffi:readline -> Include::Ffi("readline")
136    ///   include "my-utils"   -> Include::Relative("my-utils")
137    fn parse_include(&mut self) -> Result<Include, String> {
138        self.consume("include");
139
140        let token = self
141            .advance()
142            .ok_or("Expected module name after 'include'")?
143            .clone();
144
145        // Check for std: prefix (tokenizer splits this into "std", ":", "name")
146        if token == "std" {
147            // Expect : token
148            if !self.consume(":") {
149                return Err("Expected ':' after 'std' in include statement".to_string());
150            }
151            // Get the module name
152            let name = self
153                .advance()
154                .ok_or("Expected module name after 'std:'")?
155                .clone();
156            return Ok(Include::Std(name));
157        }
158
159        // Check for ffi: prefix
160        if token == "ffi" {
161            // Expect : token
162            if !self.consume(":") {
163                return Err("Expected ':' after 'ffi' in include statement".to_string());
164            }
165            // Get the library name
166            let name = self
167                .advance()
168                .ok_or("Expected library name after 'ffi:'")?
169                .clone();
170            return Ok(Include::Ffi(name));
171        }
172
173        // Check for quoted string (relative path)
174        if token.starts_with('"') && token.ends_with('"') {
175            let path = token.trim_start_matches('"').trim_end_matches('"');
176            return Ok(Include::Relative(path.to_string()));
177        }
178
179        Err(format!(
180            "Invalid include syntax '{}'. Use 'include std:name', 'include ffi:lib', or 'include \"path\"'",
181            token
182        ))
183    }
184
185    /// Parse a union type definition:
186    ///   union Message {
187    ///     Get { response-chan: Int }
188    ///     Increment { response-chan: Int }
189    ///     Report { op: Int, delta: Int, total: Int }
190    ///   }
191    fn parse_union_def(&mut self) -> Result<UnionDef, String> {
192        // Capture start line from 'union' token
193        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
194
195        // Consume 'union' keyword
196        self.consume("union");
197
198        // Get union name (must start with uppercase)
199        let name = self
200            .advance()
201            .ok_or("Expected union name after 'union'")?
202            .clone();
203
204        if !name
205            .chars()
206            .next()
207            .map(|c| c.is_uppercase())
208            .unwrap_or(false)
209        {
210            return Err(format!(
211                "Union name '{}' must start with an uppercase letter",
212                name
213            ));
214        }
215
216        // RFC #345: Register this union name so it can be recognized in stack effects
217        // This allows ( UnionName -- ) to parse as Union type, not a type variable
218        self.known_unions.insert(name.clone());
219
220        // Skip comments and newlines
221        self.skip_comments();
222
223        // Expect '{'
224        if !self.consume("{") {
225            return Err(format!(
226                "Expected '{{' after union name '{}', got '{}'",
227                name,
228                self.current()
229            ));
230        }
231
232        // Parse variants until '}'
233        let mut variants = Vec::new();
234        loop {
235            self.skip_comments();
236
237            if self.check("}") {
238                break;
239            }
240
241            if self.is_at_end() {
242                return Err(format!("Unexpected end of file in union '{}'", name));
243            }
244
245            variants.push(self.parse_union_variant()?);
246        }
247
248        // Capture end line from '}' token before consuming
249        let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
250
251        // Consume '}'
252        self.consume("}");
253
254        if variants.is_empty() {
255            return Err(format!("Union '{}' must have at least one variant", name));
256        }
257
258        // Check for duplicate variant names
259        let mut seen_variants = std::collections::HashSet::new();
260        for variant in &variants {
261            if !seen_variants.insert(&variant.name) {
262                return Err(format!(
263                    "Duplicate variant name '{}' in union '{}'",
264                    variant.name, name
265                ));
266            }
267        }
268
269        Ok(UnionDef {
270            name,
271            variants,
272            source: Some(SourceLocation::span(
273                std::path::PathBuf::new(),
274                start_line,
275                end_line,
276            )),
277        })
278    }
279
280    /// Parse a single union variant:
281    ///   Get { response-chan: Int }
282    ///   or just: Empty (no fields)
283    fn parse_union_variant(&mut self) -> Result<UnionVariant, String> {
284        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
285
286        // Get variant name (must start with uppercase)
287        let name = self.advance().ok_or("Expected variant name")?.clone();
288
289        if !name
290            .chars()
291            .next()
292            .map(|c| c.is_uppercase())
293            .unwrap_or(false)
294        {
295            return Err(format!(
296                "Variant name '{}' must start with an uppercase letter",
297                name
298            ));
299        }
300
301        self.skip_comments();
302
303        // Check for optional fields
304        let fields = if self.check("{") {
305            self.consume("{");
306            let fields = self.parse_union_fields()?;
307            if !self.consume("}") {
308                return Err(format!("Expected '}}' after variant '{}' fields", name));
309            }
310            fields
311        } else {
312            Vec::new()
313        };
314
315        Ok(UnionVariant {
316            name,
317            fields,
318            source: Some(SourceLocation::new(std::path::PathBuf::new(), start_line)),
319        })
320    }
321
322    /// Parse union fields: name: Type, name: Type, ...
323    fn parse_union_fields(&mut self) -> Result<Vec<UnionField>, String> {
324        let mut fields = Vec::new();
325
326        loop {
327            self.skip_comments();
328
329            if self.check("}") {
330                break;
331            }
332
333            // Get field name
334            let field_name = self.advance().ok_or("Expected field name")?.clone();
335
336            // Expect ':'
337            if !self.consume(":") {
338                return Err(format!(
339                    "Expected ':' after field name '{}', got '{}'",
340                    field_name,
341                    self.current()
342                ));
343            }
344
345            // Get type name
346            let type_name = self
347                .advance()
348                .ok_or("Expected type name after ':'")?
349                .clone();
350
351            fields.push(UnionField {
352                name: field_name,
353                type_name,
354            });
355
356            // Optional comma separator
357            self.skip_comments();
358            self.consume(",");
359        }
360
361        // Check for duplicate field names
362        let mut seen_fields = std::collections::HashSet::new();
363        for field in &fields {
364            if !seen_fields.insert(&field.name) {
365                return Err(format!("Duplicate field name '{}' in variant", field.name));
366            }
367        }
368
369        Ok(fields)
370    }
371
372    fn parse_word_def(&mut self) -> Result<WordDef, String> {
373        // Consume any pending lint annotations collected from comments before this word
374        let allowed_lints = std::mem::take(&mut self.pending_allowed_lints);
375
376        // Capture start line from ':' token
377        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
378
379        // Expect ':'
380        if !self.consume(":") {
381            return Err(format!(
382                "Expected ':' to start word definition, got '{}'",
383                self.current()
384            ));
385        }
386
387        // Get word name
388        let name = self
389            .advance()
390            .ok_or("Expected word name after ':'")?
391            .clone();
392
393        // Parse stack effect if present: ( ..a Int -- ..a Bool )
394        let effect = if self.check("(") {
395            Some(self.parse_stack_effect()?)
396        } else {
397            None
398        };
399
400        // Parse body until ';'
401        let mut body = Vec::new();
402        while !self.check(";") {
403            if self.is_at_end() {
404                return Err(format!("Unexpected end of file in word '{}'", name));
405            }
406
407            // Skip comments and newlines in body
408            self.skip_comments();
409            if self.check(";") {
410                break;
411            }
412
413            body.push(self.parse_statement()?);
414        }
415
416        // Capture end line from ';' token before consuming
417        let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
418
419        // Consume ';'
420        self.consume(";");
421
422        Ok(WordDef {
423            name,
424            effect,
425            body,
426            source: Some(crate::ast::SourceLocation::span(
427                std::path::PathBuf::new(),
428                start_line,
429                end_line,
430            )),
431            allowed_lints,
432        })
433    }
434
435    fn parse_statement(&mut self) -> Result<Statement, String> {
436        use crate::ast::Span;
437        let tok = self.advance_token().ok_or("Unexpected end of file")?;
438        let token = &tok.text;
439        let tok_line = tok.line;
440        let tok_column = tok.column;
441        let tok_len = tok.text.len();
442
443        // Check if it looks like a float literal (contains . or scientific notation)
444        // Must check this BEFORE integer parsing
445        if let Some(f) = is_float_literal(token)
446            .then(|| token.parse::<f64>().ok())
447            .flatten()
448        {
449            return Ok(Statement::FloatLiteral(f));
450        }
451
452        // Try to parse as hex literal (0x or 0X prefix)
453        if let Some(hex) = token
454            .strip_prefix("0x")
455            .or_else(|| token.strip_prefix("0X"))
456        {
457            return i64::from_str_radix(hex, 16)
458                .map(Statement::IntLiteral)
459                .map_err(|_| format!("Invalid hex literal: {}", token));
460        }
461
462        // Try to parse as binary literal (0b or 0B prefix)
463        if let Some(bin) = token
464            .strip_prefix("0b")
465            .or_else(|| token.strip_prefix("0B"))
466        {
467            return i64::from_str_radix(bin, 2)
468                .map(Statement::IntLiteral)
469                .map_err(|_| format!("Invalid binary literal: {}", token));
470        }
471
472        // Try to parse as decimal integer literal
473        if let Ok(n) = token.parse::<i64>() {
474            return Ok(Statement::IntLiteral(n));
475        }
476
477        // Try to parse as boolean literal
478        if token == "true" {
479            return Ok(Statement::BoolLiteral(true));
480        }
481        if token == "false" {
482            return Ok(Statement::BoolLiteral(false));
483        }
484
485        // Try to parse as symbol literal (:foo, :some-name)
486        if token == ":" {
487            // Get the next token as the symbol name
488            let name_tok = self
489                .advance_token()
490                .ok_or("Expected symbol name after ':', got end of input")?;
491            let name = &name_tok.text;
492            // Validate symbol name (identifier-like, kebab-case allowed)
493            if name.is_empty() {
494                return Err("Symbol name cannot be empty".to_string());
495            }
496            if name.starts_with(|c: char| c.is_ascii_digit()) {
497                return Err(format!(
498                    "Symbol name cannot start with a digit: ':{}'\n  Hint: Symbol names must start with a letter",
499                    name
500                ));
501            }
502            if let Some(bad_char) = name.chars().find(|c| {
503                !c.is_alphanumeric()
504                    && *c != '-'
505                    && *c != '_'
506                    && *c != '.'
507                    && *c != '?'
508                    && *c != '!'
509            }) {
510                return Err(format!(
511                    "Symbol name contains invalid character '{}': ':{}'\n  Hint: Allowed: letters, digits, - _ . ? !",
512                    bad_char, name
513                ));
514            }
515            return Ok(Statement::Symbol(name.clone()));
516        }
517
518        // Try to parse as string literal
519        if token.starts_with('"') {
520            // Validate token has at least opening and closing quotes
521            if token.len() < 2 || !token.ends_with('"') {
522                return Err(format!("Malformed string literal: {}", token));
523            }
524            // Strip exactly one quote from each end (not all quotes, which would
525            // incorrectly handle escaped quotes at string boundaries like "hello\"")
526            let raw = &token[1..token.len() - 1];
527            let unescaped = unescape_string(raw)?;
528            return Ok(Statement::StringLiteral(unescaped));
529        }
530
531        // Check for conditional
532        if token == "if" {
533            return self.parse_if(tok_line, tok_column);
534        }
535
536        // Check for quotation
537        if token == "[" {
538            return self.parse_quotation(tok_line, tok_column);
539        }
540
541        // Check for match expression
542        if token == "match" {
543            return self.parse_match(tok_line, tok_column);
544        }
545
546        // Otherwise it's a word call - preserve source span for precise diagnostics
547        Ok(Statement::WordCall {
548            name: token.to_string(),
549            span: Some(Span::new(tok_line, tok_column, tok_len)),
550        })
551    }
552
553    fn parse_if(&mut self, start_line: usize, start_column: usize) -> Result<Statement, String> {
554        let mut then_branch = Vec::new();
555
556        // Parse then branch until 'else' or 'then'
557        loop {
558            if self.is_at_end() {
559                return Err("Unexpected end of file in 'if' statement".to_string());
560            }
561
562            // Skip comments and newlines
563            self.skip_comments();
564
565            if self.check("else") {
566                self.advance();
567                // Parse else branch
568                break;
569            }
570
571            if self.check("then") {
572                self.advance();
573                // End of if without else
574                return Ok(Statement::If {
575                    then_branch,
576                    else_branch: None,
577                    span: Some(Span::new(start_line, start_column, "if".len())),
578                });
579            }
580
581            then_branch.push(self.parse_statement()?);
582        }
583
584        // Parse else branch until 'then'
585        let mut else_branch = Vec::new();
586        loop {
587            if self.is_at_end() {
588                return Err("Unexpected end of file in 'else' branch".to_string());
589            }
590
591            // Skip comments and newlines
592            self.skip_comments();
593
594            if self.check("then") {
595                self.advance();
596                return Ok(Statement::If {
597                    then_branch,
598                    else_branch: Some(else_branch),
599                    span: Some(Span::new(start_line, start_column, "if".len())),
600                });
601            }
602
603            else_branch.push(self.parse_statement()?);
604        }
605    }
606
607    fn parse_quotation(
608        &mut self,
609        start_line: usize,
610        start_column: usize,
611    ) -> Result<Statement, String> {
612        use crate::ast::QuotationSpan;
613        let mut body = Vec::new();
614
615        // Parse statements until ']'
616        loop {
617            if self.is_at_end() {
618                return Err("Unexpected end of file in quotation".to_string());
619            }
620
621            // Skip comments and newlines
622            self.skip_comments();
623
624            if self.check("]") {
625                let end_tok = self.advance_token().unwrap();
626                let end_line = end_tok.line;
627                let end_column = end_tok.column + 1; // exclusive
628                let id = self.next_quotation_id;
629                self.next_quotation_id += 1;
630                // Span from '[' to ']' inclusive
631                let span = QuotationSpan::new(start_line, start_column, end_line, end_column);
632                return Ok(Statement::Quotation {
633                    id,
634                    body,
635                    span: Some(span),
636                });
637            }
638
639            body.push(self.parse_statement()?);
640        }
641    }
642
643    /// Parse a match expression:
644    ///   match
645    ///     Get -> send-response
646    ///     Increment -> do-increment send-response
647    ///     Report -> aggregate-add
648    ///   end
649    fn parse_match(&mut self, start_line: usize, start_column: usize) -> Result<Statement, String> {
650        let mut arms = Vec::new();
651
652        loop {
653            self.skip_comments();
654
655            // Check for 'end' to terminate match
656            if self.check("end") {
657                self.advance();
658                break;
659            }
660
661            if self.is_at_end() {
662                return Err("Unexpected end of file in match expression".to_string());
663            }
664
665            arms.push(self.parse_match_arm()?);
666        }
667
668        if arms.is_empty() {
669            return Err("Match expression must have at least one arm".to_string());
670        }
671
672        Ok(Statement::Match {
673            arms,
674            span: Some(Span::new(start_line, start_column, "match".len())),
675        })
676    }
677
678    /// Parse a single match arm:
679    ///   Get -> send-response
680    ///   or with bindings:
681    ///   Get { chan } -> chan send-response
682    fn parse_match_arm(&mut self) -> Result<MatchArm, String> {
683        // Get variant name with position info
684        let variant_token = self
685            .advance_token()
686            .ok_or("Expected variant name in match arm")?;
687        let variant_name = variant_token.text.clone();
688        let arm_line = variant_token.line;
689        let arm_column = variant_token.column;
690        let arm_length = variant_name.len();
691
692        self.skip_comments();
693
694        // Check for optional bindings: { field1 field2 }
695        let pattern = if self.check("{") {
696            self.consume("{");
697            let mut bindings = Vec::new();
698
699            loop {
700                self.skip_comments();
701
702                if self.check("}") {
703                    break;
704                }
705
706                if self.is_at_end() {
707                    return Err(format!(
708                        "Unexpected end of file in match arm bindings for '{}'",
709                        variant_name
710                    ));
711                }
712
713                let token = self.advance().ok_or("Expected binding name")?.clone();
714
715                // Require > prefix to make clear these are stack extractions, not variables
716                if let Some(field_name) = token.strip_prefix('>') {
717                    if field_name.is_empty() {
718                        return Err(format!(
719                            "Expected field name after '>' in match bindings for '{}'",
720                            variant_name
721                        ));
722                    }
723                    bindings.push(field_name.to_string());
724                } else {
725                    return Err(format!(
726                        "Match bindings must use '>' prefix to indicate stack extraction. \
727                         Use '>{}' instead of '{}' in pattern for '{}'",
728                        token, token, variant_name
729                    ));
730                }
731            }
732
733            self.consume("}");
734            Pattern::VariantWithBindings {
735                name: variant_name,
736                bindings,
737            }
738        } else {
739            Pattern::Variant(variant_name.clone())
740        };
741
742        self.skip_comments();
743
744        // Expect '->' arrow
745        if !self.consume("->") {
746            return Err(format!(
747                "Expected '->' after pattern '{}', got '{}'",
748                match &pattern {
749                    Pattern::Variant(n) => n.clone(),
750                    Pattern::VariantWithBindings { name, .. } => name.clone(),
751                },
752                self.current()
753            ));
754        }
755
756        // Parse body until next pattern or 'end'
757        let mut body = Vec::new();
758        loop {
759            self.skip_comments();
760
761            // Check for end of arm (next pattern starts with uppercase, or 'end')
762            if self.check("end") {
763                break;
764            }
765
766            // Check if next token looks like a match pattern (not just any uppercase word).
767            // A pattern is: UppercaseName followed by '->' or '{'
768            // This prevents confusing 'Make-Get' (constructor call) with a pattern.
769            if let Some(token) = self.current_token()
770                && let Some(first_char) = token.text.chars().next()
771                && first_char.is_uppercase()
772            {
773                // Peek at next token to see if this is a pattern (followed by -> or {)
774                if let Some(next) = self.peek_at(1)
775                    && (next == "->" || next == "{")
776                {
777                    // This is the next pattern
778                    break;
779                }
780                // Otherwise it's just an uppercase word call (like Make-Get), continue parsing body
781            }
782
783            if self.is_at_end() {
784                return Err("Unexpected end of file in match arm body".to_string());
785            }
786
787            body.push(self.parse_statement()?);
788        }
789
790        Ok(MatchArm {
791            pattern,
792            body,
793            span: Some(Span::new(arm_line, arm_column, arm_length)),
794        })
795    }
796
797    /// Parse a stack effect declaration: ( ..a Int -- ..a Bool )
798    /// With optional computational effects: ( ..a Int -- ..a Bool | Yield Int )
799    fn parse_stack_effect(&mut self) -> Result<Effect, String> {
800        // Consume '('
801        if !self.consume("(") {
802            return Err("Expected '(' to start stack effect".to_string());
803        }
804
805        // Parse input stack types (until '--' or ')')
806        let (input_row_var, input_types) =
807            self.parse_type_list_until(&["--", ")"], "stack effect inputs", 0)?;
808
809        // Consume '--'
810        if !self.consume("--") {
811            return Err("Expected '--' separator in stack effect".to_string());
812        }
813
814        // Parse output stack types (until ')' or '|')
815        let (output_row_var, output_types) =
816            self.parse_type_list_until(&[")", "|"], "stack effect outputs", 0)?;
817
818        // Parse optional computational effects after '|'
819        let effects = if self.consume("|") {
820            self.parse_effect_annotations()?
821        } else {
822            Vec::new()
823        };
824
825        // Consume ')'
826        if !self.consume(")") {
827            return Err("Expected ')' to end stack effect".to_string());
828        }
829
830        // Build input and output StackTypes
831        let inputs = self.build_stack_type(input_row_var, input_types);
832        let outputs = self.build_stack_type(output_row_var, output_types);
833
834        Ok(Effect::with_effects(inputs, outputs, effects))
835    }
836
837    /// Parse computational effect annotations after '|'
838    /// Example: | Yield Int
839    fn parse_effect_annotations(&mut self) -> Result<Vec<SideEffect>, String> {
840        let mut effects = Vec::new();
841
842        // Parse effects until we hit ')'
843        while let Some(token) = self.peek_at(0) {
844            if token == ")" {
845                break;
846            }
847
848            match token {
849                "Yield" => {
850                    self.advance(); // consume "Yield"
851                    // Parse the yield type
852                    if let Some(type_token) = self.current_token() {
853                        if type_token.text == ")" {
854                            return Err("Expected type after 'Yield'".to_string());
855                        }
856                        let type_token = type_token.clone();
857                        self.advance();
858                        let yield_type = self.parse_type(&type_token)?;
859                        effects.push(SideEffect::Yield(Box::new(yield_type)));
860                    } else {
861                        return Err("Expected type after 'Yield'".to_string());
862                    }
863                }
864                _ => {
865                    return Err(format!("Unknown effect '{}'. Expected 'Yield'", token));
866                }
867            }
868        }
869
870        if effects.is_empty() {
871            return Err("Expected at least one effect after '|'".to_string());
872        }
873
874        Ok(effects)
875    }
876
877    /// Parse a single type token into a Type
878    fn parse_type(&self, token: &Token) -> Result<Type, String> {
879        match token.text.as_str() {
880            "Int" => Ok(Type::Int),
881            "Float" => Ok(Type::Float),
882            "Bool" => Ok(Type::Bool),
883            "String" => Ok(Type::String),
884            // Reject 'Quotation' - it looks like a type but would be silently treated as a type variable.
885            // Users must use explicit effect syntax like [Int -- Int] instead.
886            "Quotation" => Err(format!(
887                "'Quotation' is not a valid type at line {}, column {}. Use explicit quotation syntax like [Int -- Int] or [ -- ] instead.",
888                token.line + 1,
889                token.column + 1
890            )),
891            _ => {
892                // Check if it's a type variable (starts with uppercase)
893                if let Some(first_char) = token.text.chars().next() {
894                    if first_char.is_uppercase() {
895                        // RFC #345: Check if this is a known union type name
896                        // Union types are nominal and should NOT unify with each other
897                        if self.known_unions.contains(&token.text) {
898                            Ok(Type::Union(token.text.to_string()))
899                        } else {
900                            // Unknown uppercase identifier - treat as type variable
901                            Ok(Type::Var(token.text.to_string()))
902                        }
903                    } else {
904                        Err(format!(
905                            "Unknown type: '{}' at line {}, column {}. Expected Int, Bool, String, Closure, or a type variable (uppercase)",
906                            token.text.escape_default(),
907                            token.line + 1, // 1-indexed for user display
908                            token.column + 1
909                        ))
910                    }
911                } else {
912                    Err(format!(
913                        "Invalid type: '{}' at line {}, column {}",
914                        token.text.escape_default(),
915                        token.line + 1,
916                        token.column + 1
917                    ))
918                }
919            }
920        }
921    }
922
923    /// Validate row variable name
924    /// Row variables must start with a lowercase letter and contain only alphanumeric characters
925    fn validate_row_var_name(&self, name: &str) -> Result<(), String> {
926        if name.is_empty() {
927            return Err("Row variable must have a name after '..'".to_string());
928        }
929
930        // Must start with lowercase letter
931        let first_char = name.chars().next().unwrap();
932        if !first_char.is_ascii_lowercase() {
933            return Err(format!(
934                "Row variable '..{}' must start with a lowercase letter (a-z)",
935                name
936            ));
937        }
938
939        // Rest must be alphanumeric or underscore
940        for ch in name.chars() {
941            if !ch.is_alphanumeric() && ch != '_' {
942                return Err(format!(
943                    "Row variable '..{}' can only contain letters, numbers, and underscores",
944                    name
945                ));
946            }
947        }
948
949        // Check for reserved keywords (type names that might confuse users)
950        match name {
951            "Int" | "Bool" | "String" => {
952                return Err(format!(
953                    "Row variable '..{}' cannot use type name as identifier",
954                    name
955                ));
956            }
957            _ => {}
958        }
959
960        Ok(())
961    }
962
963    /// Parse a list of types until one of the given terminators is reached
964    /// Returns (optional row variable, list of types)
965    /// Used by both parse_stack_effect and parse_quotation_type
966    ///
967    /// depth: Current nesting depth for quotation types (0 at top level)
968    fn parse_type_list_until(
969        &mut self,
970        terminators: &[&str],
971        context: &str,
972        depth: usize,
973    ) -> Result<(Option<String>, Vec<Type>), String> {
974        const MAX_QUOTATION_DEPTH: usize = 32;
975
976        if depth > MAX_QUOTATION_DEPTH {
977            return Err(format!(
978                "Quotation type nesting exceeds maximum depth of {} (possible deeply nested types or DOS attack)",
979                MAX_QUOTATION_DEPTH
980            ));
981        }
982
983        let mut types = Vec::new();
984        let mut row_var = None;
985
986        while !terminators.iter().any(|t| self.check(t)) {
987            // Skip comments and blank lines within type lists
988            self.skip_comments();
989
990            // Re-check terminators after skipping comments
991            if terminators.iter().any(|t| self.check(t)) {
992                break;
993            }
994
995            if self.is_at_end() {
996                return Err(format!(
997                    "Unexpected end while parsing {} - expected one of: {}",
998                    context,
999                    terminators.join(", ")
1000                ));
1001            }
1002
1003            let token = self
1004                .advance_token()
1005                .ok_or_else(|| format!("Unexpected end in {}", context))?
1006                .clone();
1007
1008            // Check for row variable: ..name
1009            if token.text.starts_with("..") {
1010                let var_name = token.text.trim_start_matches("..").to_string();
1011                self.validate_row_var_name(&var_name)?;
1012                row_var = Some(var_name);
1013            } else if token.text == "Closure" {
1014                // Closure type: Closure[effect]
1015                if !self.consume("[") {
1016                    return Err("Expected '[' after 'Closure' in type signature".to_string());
1017                }
1018                let effect_type = self.parse_quotation_type(depth)?;
1019                match effect_type {
1020                    Type::Quotation(effect) => {
1021                        types.push(Type::Closure {
1022                            effect,
1023                            captures: Vec::new(), // Filled in by type checker
1024                        });
1025                    }
1026                    _ => unreachable!("parse_quotation_type should return Quotation"),
1027                }
1028            } else if token.text == "[" {
1029                // Nested quotation type
1030                types.push(self.parse_quotation_type(depth)?);
1031            } else {
1032                // Parse as concrete type
1033                types.push(self.parse_type(&token)?);
1034            }
1035        }
1036
1037        Ok((row_var, types))
1038    }
1039
1040    /// Parse a quotation type: [inputs -- outputs]
1041    /// Note: The opening '[' has already been consumed
1042    ///
1043    /// depth: Current nesting depth (incremented for each nested quotation)
1044    fn parse_quotation_type(&mut self, depth: usize) -> Result<Type, String> {
1045        // Parse input stack types (until '--' or ']')
1046        let (input_row_var, input_types) =
1047            self.parse_type_list_until(&["--", "]"], "quotation type inputs", depth + 1)?;
1048
1049        // Require '--' separator for clarity
1050        if !self.consume("--") {
1051            // Check if user closed with ] without separator
1052            if self.check("]") {
1053                return Err(
1054                    "Quotation types require '--' separator. Did you mean '[Int -- ]' or '[ -- Int]'?"
1055                        .to_string(),
1056                );
1057            }
1058            return Err("Expected '--' separator in quotation type".to_string());
1059        }
1060
1061        // Parse output stack types (until ']')
1062        let (output_row_var, output_types) =
1063            self.parse_type_list_until(&["]"], "quotation type outputs", depth + 1)?;
1064
1065        // Consume ']'
1066        if !self.consume("]") {
1067            return Err("Expected ']' to end quotation type".to_string());
1068        }
1069
1070        // Build input and output StackTypes
1071        let inputs = self.build_stack_type(input_row_var, input_types);
1072        let outputs = self.build_stack_type(output_row_var, output_types);
1073
1074        Ok(Type::Quotation(Box::new(Effect::new(inputs, outputs))))
1075    }
1076
1077    /// Build a StackType from an optional row variable and a list of types
1078    /// Example: row_var="a", types=[Int, Bool] => RowVar("a") with Int on top of Bool
1079    ///
1080    /// IMPORTANT: ALL stack effects are implicitly row-polymorphic in concatenative languages.
1081    /// This means:
1082    ///   ( -- )        becomes  ( ..rest -- ..rest )       - no-op, preserves stack
1083    ///   ( -- Int )    becomes  ( ..rest -- ..rest Int )   - pushes Int
1084    ///   ( Int -- )    becomes  ( ..rest Int -- ..rest )   - consumes Int
1085    ///   ( Int -- Int) becomes  ( ..rest Int -- ..rest Int ) - transforms top
1086    fn build_stack_type(&self, row_var: Option<String>, types: Vec<Type>) -> StackType {
1087        // Always use row polymorphism - this is fundamental to concatenative semantics
1088        let base = match row_var {
1089            Some(name) => StackType::RowVar(name),
1090            None => StackType::RowVar("rest".to_string()),
1091        };
1092
1093        // Push types onto the stack (bottom to top order)
1094        types.into_iter().fold(base, |stack, ty| stack.push(ty))
1095    }
1096
1097    fn skip_comments(&mut self) {
1098        loop {
1099            // Check for comment: either standalone "#" or token starting with "#"
1100            // The latter handles shebangs like "#!/usr/bin/env seqc"
1101            let is_comment = if self.is_at_end() {
1102                false
1103            } else {
1104                let tok = self.current();
1105                tok == "#" || tok.starts_with("#!")
1106            };
1107
1108            if is_comment {
1109                self.advance(); // consume # or shebang token
1110
1111                // Collect all tokens until newline to reconstruct the comment text
1112                let mut comment_parts: Vec<String> = Vec::new();
1113                while !self.is_at_end() && self.current() != "\n" {
1114                    comment_parts.push(self.current().to_string());
1115                    self.advance();
1116                }
1117                if !self.is_at_end() {
1118                    self.advance(); // skip newline
1119                }
1120
1121                // Join parts and check for seq:allow annotation
1122                // Format: # seq:allow(lint-id) -> parts = ["seq", ":", "allow", "(", "lint-id", ")"]
1123                let comment = comment_parts.join("");
1124                if let Some(lint_id) = comment
1125                    .strip_prefix("seq:allow(")
1126                    .and_then(|s| s.strip_suffix(")"))
1127                {
1128                    self.pending_allowed_lints.push(lint_id.to_string());
1129                }
1130            } else if self.check("\n") {
1131                // Skip blank lines
1132                self.advance();
1133            } else {
1134                break;
1135            }
1136        }
1137    }
1138
1139    fn check(&self, expected: &str) -> bool {
1140        if self.is_at_end() {
1141            return false;
1142        }
1143        self.current() == expected
1144    }
1145
1146    fn consume(&mut self, expected: &str) -> bool {
1147        if self.check(expected) {
1148            self.advance();
1149            true
1150        } else {
1151            false
1152        }
1153    }
1154
1155    /// Get the text of the current token
1156    fn current(&self) -> &str {
1157        if self.is_at_end() {
1158            ""
1159        } else {
1160            &self.tokens[self.pos].text
1161        }
1162    }
1163
1164    /// Get the full current token with position info
1165    fn current_token(&self) -> Option<&Token> {
1166        if self.is_at_end() {
1167            None
1168        } else {
1169            Some(&self.tokens[self.pos])
1170        }
1171    }
1172
1173    /// Peek at a token N positions ahead without consuming
1174    fn peek_at(&self, n: usize) -> Option<&str> {
1175        let idx = self.pos + n;
1176        if idx < self.tokens.len() {
1177            Some(&self.tokens[idx].text)
1178        } else {
1179            None
1180        }
1181    }
1182
1183    /// Advance and return the token text (for compatibility with existing code)
1184    fn advance(&mut self) -> Option<&String> {
1185        if self.is_at_end() {
1186            None
1187        } else {
1188            let token = &self.tokens[self.pos];
1189            self.pos += 1;
1190            Some(&token.text)
1191        }
1192    }
1193
1194    /// Advance and return the full token with position info
1195    fn advance_token(&mut self) -> Option<&Token> {
1196        if self.is_at_end() {
1197            None
1198        } else {
1199            let token = &self.tokens[self.pos];
1200            self.pos += 1;
1201            Some(token)
1202        }
1203    }
1204
1205    fn is_at_end(&self) -> bool {
1206        self.pos >= self.tokens.len()
1207    }
1208}
1209
1210/// Check if a token looks like a float literal
1211///
1212/// Float literals contain either:
1213/// - A decimal point: `3.14`, `.5`, `5.`
1214/// - Scientific notation: `1e10`, `1E-5`, `1.5e3`
1215///
1216/// This check must happen BEFORE integer parsing to avoid
1217/// parsing "5" in "5.0" as an integer.
1218fn is_float_literal(token: &str) -> bool {
1219    // Skip leading minus sign for negative numbers
1220    let s = token.strip_prefix('-').unwrap_or(token);
1221
1222    // Must have at least one digit
1223    if s.is_empty() {
1224        return false;
1225    }
1226
1227    // Check for decimal point or scientific notation
1228    s.contains('.') || s.contains('e') || s.contains('E')
1229}
1230
1231/// Process escape sequences in a string literal
1232///
1233/// Supported escape sequences:
1234/// - `\"` -> `"`  (quote)
1235/// - `\\` -> `\`  (backslash)
1236/// - `\n` -> newline
1237/// - `\r` -> carriage return
1238/// - `\t` -> tab
1239/// - `\xNN` -> Unicode code point U+00NN (hex value 00-FF)
1240///
1241/// # Note on `\xNN` encoding
1242///
1243/// The `\xNN` escape creates a Unicode code point U+00NN, not a raw byte.
1244/// For values 0x00-0x7F (ASCII), this maps directly to the byte value.
1245/// For values 0x80-0xFF (Latin-1 Supplement), the character is stored as
1246/// a multi-byte UTF-8 sequence. For example:
1247/// - `\x41` -> 'A' (1 byte in UTF-8)
1248/// - `\x1b` -> ESC (1 byte in UTF-8, used for ANSI terminal codes)
1249/// - `\xFF` -> 'ÿ' (U+00FF, 2 bytes in UTF-8: 0xC3 0xBF)
1250///
1251/// This matches Python 3 and Rust string behavior. For terminal ANSI codes,
1252/// which are the primary use case, all values are in the ASCII range.
1253///
1254/// # Errors
1255/// Returns error if an unknown escape sequence is encountered
1256fn unescape_string(s: &str) -> Result<String, String> {
1257    let mut result = String::new();
1258    let mut chars = s.chars();
1259
1260    while let Some(ch) = chars.next() {
1261        if ch == '\\' {
1262            match chars.next() {
1263                Some('"') => result.push('"'),
1264                Some('\\') => result.push('\\'),
1265                Some('n') => result.push('\n'),
1266                Some('r') => result.push('\r'),
1267                Some('t') => result.push('\t'),
1268                Some('x') => {
1269                    // Hex escape: \xNN
1270                    let hex1 = chars.next().ok_or_else(|| {
1271                        "Incomplete hex escape sequence '\\x' - expected 2 hex digits".to_string()
1272                    })?;
1273                    let hex2 = chars.next().ok_or_else(|| {
1274                        format!(
1275                            "Incomplete hex escape sequence '\\x{}' - expected 2 hex digits",
1276                            hex1
1277                        )
1278                    })?;
1279
1280                    let hex_str: String = [hex1, hex2].iter().collect();
1281                    let byte_val = u8::from_str_radix(&hex_str, 16).map_err(|_| {
1282                        format!(
1283                            "Invalid hex escape sequence '\\x{}' - expected 2 hex digits (00-FF)",
1284                            hex_str
1285                        )
1286                    })?;
1287
1288                    result.push(byte_val as char);
1289                }
1290                Some(c) => {
1291                    return Err(format!(
1292                        "Unknown escape sequence '\\{}' in string literal. \
1293                         Supported: \\\" \\\\ \\n \\r \\t \\xNN",
1294                        c
1295                    ));
1296                }
1297                None => {
1298                    return Err("String ends with incomplete escape sequence '\\'".to_string());
1299                }
1300            }
1301        } else {
1302            result.push(ch);
1303        }
1304    }
1305
1306    Ok(result)
1307}
1308
1309fn tokenize(source: &str) -> Vec<Token> {
1310    let mut tokens = Vec::new();
1311    let mut current = String::new();
1312    let mut current_start_line = 0;
1313    let mut current_start_col = 0;
1314    let mut in_string = false;
1315    let mut prev_was_backslash = false;
1316
1317    // Track current position (0-indexed)
1318    let mut line = 0;
1319    let mut col = 0;
1320
1321    for ch in source.chars() {
1322        if in_string {
1323            current.push(ch);
1324            if ch == '"' && !prev_was_backslash {
1325                // Unescaped quote ends the string
1326                in_string = false;
1327                tokens.push(Token::new(
1328                    current.clone(),
1329                    current_start_line,
1330                    current_start_col,
1331                ));
1332                current.clear();
1333                prev_was_backslash = false;
1334            } else if ch == '\\' && !prev_was_backslash {
1335                // Start of escape sequence
1336                prev_was_backslash = true;
1337            } else {
1338                // Regular character or escaped character
1339                prev_was_backslash = false;
1340            }
1341            // Track newlines inside strings
1342            if ch == '\n' {
1343                line += 1;
1344                col = 0;
1345            } else {
1346                col += 1;
1347            }
1348        } else if ch == '"' {
1349            if !current.is_empty() {
1350                tokens.push(Token::new(
1351                    current.clone(),
1352                    current_start_line,
1353                    current_start_col,
1354                ));
1355                current.clear();
1356            }
1357            in_string = true;
1358            current_start_line = line;
1359            current_start_col = col;
1360            current.push(ch);
1361            prev_was_backslash = false;
1362            col += 1;
1363        } else if ch.is_whitespace() {
1364            if !current.is_empty() {
1365                tokens.push(Token::new(
1366                    current.clone(),
1367                    current_start_line,
1368                    current_start_col,
1369                ));
1370                current.clear();
1371            }
1372            // Preserve newlines for comment handling
1373            if ch == '\n' {
1374                tokens.push(Token::new("\n".to_string(), line, col));
1375                line += 1;
1376                col = 0;
1377            } else {
1378                col += 1;
1379            }
1380        } else if "():;[]{},".contains(ch) {
1381            if !current.is_empty() {
1382                tokens.push(Token::new(
1383                    current.clone(),
1384                    current_start_line,
1385                    current_start_col,
1386                ));
1387                current.clear();
1388            }
1389            tokens.push(Token::new(ch.to_string(), line, col));
1390            col += 1;
1391        } else {
1392            if current.is_empty() {
1393                current_start_line = line;
1394                current_start_col = col;
1395            }
1396            current.push(ch);
1397            col += 1;
1398        }
1399    }
1400
1401    // Check for unclosed string literal
1402    if in_string {
1403        // Return error by adding a special error token
1404        // The parser will handle this as a parse error
1405        tokens.push(Token::new(
1406            "<<<UNCLOSED_STRING>>>".to_string(),
1407            current_start_line,
1408            current_start_col,
1409        ));
1410    } else if !current.is_empty() {
1411        tokens.push(Token::new(current, current_start_line, current_start_col));
1412    }
1413
1414    tokens
1415}
1416
1417#[cfg(test)]
1418mod tests {
1419    use super::*;
1420
1421    #[test]
1422    fn test_parse_hello_world() {
1423        let source = r#"
1424: main ( -- )
1425  "Hello, World!" write_line ;
1426"#;
1427
1428        let mut parser = Parser::new(source);
1429        let program = parser.parse().unwrap();
1430
1431        assert_eq!(program.words.len(), 1);
1432        assert_eq!(program.words[0].name, "main");
1433        assert_eq!(program.words[0].body.len(), 2);
1434
1435        match &program.words[0].body[0] {
1436            Statement::StringLiteral(s) => assert_eq!(s, "Hello, World!"),
1437            _ => panic!("Expected StringLiteral"),
1438        }
1439
1440        match &program.words[0].body[1] {
1441            Statement::WordCall { name, .. } => assert_eq!(name, "write_line"),
1442            _ => panic!("Expected WordCall"),
1443        }
1444    }
1445
1446    #[test]
1447    fn test_parse_with_numbers() {
1448        let source = ": add-example ( -- ) 2 3 add ;";
1449
1450        let mut parser = Parser::new(source);
1451        let program = parser.parse().unwrap();
1452
1453        assert_eq!(program.words[0].body.len(), 3);
1454        assert_eq!(program.words[0].body[0], Statement::IntLiteral(2));
1455        assert_eq!(program.words[0].body[1], Statement::IntLiteral(3));
1456        assert!(matches!(
1457            &program.words[0].body[2],
1458            Statement::WordCall { name, .. } if name == "add"
1459        ));
1460    }
1461
1462    #[test]
1463    fn test_parse_hex_literals() {
1464        let source = ": test ( -- ) 0xFF 0x10 0X1A ;";
1465        let mut parser = Parser::new(source);
1466        let program = parser.parse().unwrap();
1467
1468        assert_eq!(program.words[0].body[0], Statement::IntLiteral(255));
1469        assert_eq!(program.words[0].body[1], Statement::IntLiteral(16));
1470        assert_eq!(program.words[0].body[2], Statement::IntLiteral(26));
1471    }
1472
1473    #[test]
1474    fn test_parse_binary_literals() {
1475        let source = ": test ( -- ) 0b1010 0B1111 0b0 ;";
1476        let mut parser = Parser::new(source);
1477        let program = parser.parse().unwrap();
1478
1479        assert_eq!(program.words[0].body[0], Statement::IntLiteral(10));
1480        assert_eq!(program.words[0].body[1], Statement::IntLiteral(15));
1481        assert_eq!(program.words[0].body[2], Statement::IntLiteral(0));
1482    }
1483
1484    #[test]
1485    fn test_parse_invalid_hex_literal() {
1486        let source = ": test ( -- ) 0xGG ;";
1487        let mut parser = Parser::new(source);
1488        let err = parser.parse().unwrap_err();
1489        assert!(err.contains("Invalid hex literal"));
1490    }
1491
1492    #[test]
1493    fn test_parse_invalid_binary_literal() {
1494        let source = ": test ( -- ) 0b123 ;";
1495        let mut parser = Parser::new(source);
1496        let err = parser.parse().unwrap_err();
1497        assert!(err.contains("Invalid binary literal"));
1498    }
1499
1500    #[test]
1501    fn test_parse_escaped_quotes() {
1502        let source = r#": main ( -- ) "Say \"hello\" there" write_line ;"#;
1503
1504        let mut parser = Parser::new(source);
1505        let program = parser.parse().unwrap();
1506
1507        assert_eq!(program.words.len(), 1);
1508        assert_eq!(program.words[0].body.len(), 2);
1509
1510        match &program.words[0].body[0] {
1511            // Escape sequences should be processed: \" becomes actual quote
1512            Statement::StringLiteral(s) => assert_eq!(s, "Say \"hello\" there"),
1513            _ => panic!("Expected StringLiteral with escaped quotes"),
1514        }
1515    }
1516
1517    /// Regression test for issue #117: escaped quote at end of string
1518    /// Previously failed with "String ends with incomplete escape sequence"
1519    #[test]
1520    fn test_escaped_quote_at_end_of_string() {
1521        let source = r#": main ( -- ) "hello\"" io.write-line ;"#;
1522
1523        let mut parser = Parser::new(source);
1524        let program = parser.parse().unwrap();
1525
1526        assert_eq!(program.words.len(), 1);
1527        match &program.words[0].body[0] {
1528            Statement::StringLiteral(s) => assert_eq!(s, "hello\""),
1529            _ => panic!("Expected StringLiteral ending with escaped quote"),
1530        }
1531    }
1532
1533    /// Test escaped quote at start of string (boundary case)
1534    #[test]
1535    fn test_escaped_quote_at_start_of_string() {
1536        let source = r#": main ( -- ) "\"hello" io.write-line ;"#;
1537
1538        let mut parser = Parser::new(source);
1539        let program = parser.parse().unwrap();
1540
1541        match &program.words[0].body[0] {
1542            Statement::StringLiteral(s) => assert_eq!(s, "\"hello"),
1543            _ => panic!("Expected StringLiteral starting with escaped quote"),
1544        }
1545    }
1546
1547    #[test]
1548    fn test_escape_sequences() {
1549        let source = r#": main ( -- ) "Line 1\nLine 2\tTabbed" write_line ;"#;
1550
1551        let mut parser = Parser::new(source);
1552        let program = parser.parse().unwrap();
1553
1554        match &program.words[0].body[0] {
1555            Statement::StringLiteral(s) => assert_eq!(s, "Line 1\nLine 2\tTabbed"),
1556            _ => panic!("Expected StringLiteral"),
1557        }
1558    }
1559
1560    #[test]
1561    fn test_unknown_escape_sequence() {
1562        let source = r#": main ( -- ) "Bad \q sequence" write_line ;"#;
1563
1564        let mut parser = Parser::new(source);
1565        let result = parser.parse();
1566
1567        assert!(result.is_err());
1568        assert!(result.unwrap_err().contains("Unknown escape sequence"));
1569    }
1570
1571    #[test]
1572    fn test_hex_escape_sequence() {
1573        // \x1b is ESC (27), \x41 is 'A' (65)
1574        let source = r#": main ( -- ) "\x1b[2K\x41" io.write-line ;"#;
1575
1576        let mut parser = Parser::new(source);
1577        let program = parser.parse().unwrap();
1578
1579        match &program.words[0].body[0] {
1580            Statement::StringLiteral(s) => {
1581                assert_eq!(s.len(), 5); // ESC [ 2 K A
1582                assert_eq!(s.as_bytes()[0], 0x1b); // ESC
1583                assert_eq!(s.as_bytes()[4], 0x41); // 'A'
1584            }
1585            _ => panic!("Expected StringLiteral"),
1586        }
1587    }
1588
1589    #[test]
1590    fn test_hex_escape_null_byte() {
1591        let source = r#": main ( -- ) "before\x00after" io.write-line ;"#;
1592
1593        let mut parser = Parser::new(source);
1594        let program = parser.parse().unwrap();
1595
1596        match &program.words[0].body[0] {
1597            Statement::StringLiteral(s) => {
1598                assert_eq!(s.len(), 12); // "before" + NUL + "after"
1599                assert_eq!(s.as_bytes()[6], 0x00);
1600            }
1601            _ => panic!("Expected StringLiteral"),
1602        }
1603    }
1604
1605    #[test]
1606    fn test_hex_escape_uppercase() {
1607        // Both uppercase and lowercase hex digits should work
1608        // Note: Values > 0x7F become Unicode code points (U+00NN), multi-byte in UTF-8
1609        let source = r#": main ( -- ) "\x41\x42\x4F" io.write-line ;"#;
1610
1611        let mut parser = Parser::new(source);
1612        let program = parser.parse().unwrap();
1613
1614        match &program.words[0].body[0] {
1615            Statement::StringLiteral(s) => {
1616                assert_eq!(s, "ABO"); // 0x41='A', 0x42='B', 0x4F='O'
1617            }
1618            _ => panic!("Expected StringLiteral"),
1619        }
1620    }
1621
1622    #[test]
1623    fn test_hex_escape_high_bytes() {
1624        // Values > 0x7F become Unicode code points (Latin-1), which are multi-byte in UTF-8
1625        let source = r#": main ( -- ) "\xFF" io.write-line ;"#;
1626
1627        let mut parser = Parser::new(source);
1628        let program = parser.parse().unwrap();
1629
1630        match &program.words[0].body[0] {
1631            Statement::StringLiteral(s) => {
1632                // \xFF becomes U+00FF (ÿ), which is 2 bytes in UTF-8: C3 BF
1633                assert_eq!(s, "\u{00FF}");
1634                assert_eq!(s.chars().next().unwrap(), 'ÿ');
1635            }
1636            _ => panic!("Expected StringLiteral"),
1637        }
1638    }
1639
1640    #[test]
1641    fn test_hex_escape_incomplete() {
1642        // \x with only one hex digit
1643        let source = r#": main ( -- ) "\x1" io.write-line ;"#;
1644
1645        let mut parser = Parser::new(source);
1646        let result = parser.parse();
1647
1648        assert!(result.is_err());
1649        assert!(result.unwrap_err().contains("Incomplete hex escape"));
1650    }
1651
1652    #[test]
1653    fn test_hex_escape_invalid_digits() {
1654        // \xGG is not valid hex
1655        let source = r#": main ( -- ) "\xGG" io.write-line ;"#;
1656
1657        let mut parser = Parser::new(source);
1658        let result = parser.parse();
1659
1660        assert!(result.is_err());
1661        assert!(result.unwrap_err().contains("Invalid hex escape"));
1662    }
1663
1664    #[test]
1665    fn test_hex_escape_at_end_of_string() {
1666        // \x at end of string with no digits
1667        let source = r#": main ( -- ) "test\x" io.write-line ;"#;
1668
1669        let mut parser = Parser::new(source);
1670        let result = parser.parse();
1671
1672        assert!(result.is_err());
1673        assert!(result.unwrap_err().contains("Incomplete hex escape"));
1674    }
1675
1676    #[test]
1677    fn test_unclosed_string_literal() {
1678        let source = r#": main ( -- ) "unclosed string ;"#;
1679
1680        let mut parser = Parser::new(source);
1681        let result = parser.parse();
1682
1683        assert!(result.is_err());
1684        let err_msg = result.unwrap_err();
1685        assert!(err_msg.contains("Unclosed string literal"));
1686        // Should include position information (line 1, column 15 for the opening quote)
1687        assert!(
1688            err_msg.contains("line 1"),
1689            "Expected line number in error: {}",
1690            err_msg
1691        );
1692        assert!(
1693            err_msg.contains("column 15"),
1694            "Expected column number in error: {}",
1695            err_msg
1696        );
1697    }
1698
1699    #[test]
1700    fn test_multiple_word_definitions() {
1701        let source = r#"
1702: double ( Int -- Int )
1703  2 multiply ;
1704
1705: quadruple ( Int -- Int )
1706  double double ;
1707"#;
1708
1709        let mut parser = Parser::new(source);
1710        let program = parser.parse().unwrap();
1711
1712        assert_eq!(program.words.len(), 2);
1713        assert_eq!(program.words[0].name, "double");
1714        assert_eq!(program.words[1].name, "quadruple");
1715
1716        // Verify stack effects were parsed
1717        assert!(program.words[0].effect.is_some());
1718        assert!(program.words[1].effect.is_some());
1719    }
1720
1721    #[test]
1722    fn test_user_word_calling_user_word() {
1723        let source = r#"
1724: helper ( -- )
1725  "helper called" write_line ;
1726
1727: main ( -- )
1728  helper ;
1729"#;
1730
1731        let mut parser = Parser::new(source);
1732        let program = parser.parse().unwrap();
1733
1734        assert_eq!(program.words.len(), 2);
1735
1736        // Check main calls helper
1737        match &program.words[1].body[0] {
1738            Statement::WordCall { name, .. } => assert_eq!(name, "helper"),
1739            _ => panic!("Expected WordCall to helper"),
1740        }
1741    }
1742
1743    #[test]
1744    fn test_parse_simple_stack_effect() {
1745        // Test: ( Int -- Bool )
1746        // With implicit row polymorphism, this becomes: ( ..rest Int -- ..rest Bool )
1747        let source = ": test ( Int -- Bool ) 1 ;";
1748        let mut parser = Parser::new(source);
1749        let program = parser.parse().unwrap();
1750
1751        assert_eq!(program.words.len(), 1);
1752        let word = &program.words[0];
1753        assert!(word.effect.is_some());
1754
1755        let effect = word.effect.as_ref().unwrap();
1756
1757        // Input: Int on RowVar("rest") (implicit row polymorphism)
1758        assert_eq!(
1759            effect.inputs,
1760            StackType::Cons {
1761                rest: Box::new(StackType::RowVar("rest".to_string())),
1762                top: Type::Int
1763            }
1764        );
1765
1766        // Output: Bool on RowVar("rest") (implicit row polymorphism)
1767        assert_eq!(
1768            effect.outputs,
1769            StackType::Cons {
1770                rest: Box::new(StackType::RowVar("rest".to_string())),
1771                top: Type::Bool
1772            }
1773        );
1774    }
1775
1776    #[test]
1777    fn test_parse_row_polymorphic_stack_effect() {
1778        // Test: ( ..a Int -- ..a Bool )
1779        let source = ": test ( ..a Int -- ..a Bool ) 1 ;";
1780        let mut parser = Parser::new(source);
1781        let program = parser.parse().unwrap();
1782
1783        assert_eq!(program.words.len(), 1);
1784        let word = &program.words[0];
1785        assert!(word.effect.is_some());
1786
1787        let effect = word.effect.as_ref().unwrap();
1788
1789        // Input: Int on RowVar("a")
1790        assert_eq!(
1791            effect.inputs,
1792            StackType::Cons {
1793                rest: Box::new(StackType::RowVar("a".to_string())),
1794                top: Type::Int
1795            }
1796        );
1797
1798        // Output: Bool on RowVar("a")
1799        assert_eq!(
1800            effect.outputs,
1801            StackType::Cons {
1802                rest: Box::new(StackType::RowVar("a".to_string())),
1803                top: Type::Bool
1804            }
1805        );
1806    }
1807
1808    #[test]
1809    fn test_parse_invalid_row_var_starts_with_digit() {
1810        // Test: Row variable cannot start with digit
1811        let source = ": test ( ..123 Int -- ) ;";
1812        let mut parser = Parser::new(source);
1813        let result = parser.parse();
1814
1815        assert!(result.is_err());
1816        let err_msg = result.unwrap_err();
1817        assert!(
1818            err_msg.contains("lowercase letter"),
1819            "Expected error about lowercase letter, got: {}",
1820            err_msg
1821        );
1822    }
1823
1824    #[test]
1825    fn test_parse_invalid_row_var_starts_with_uppercase() {
1826        // Test: Row variable cannot start with uppercase (that's a type variable)
1827        let source = ": test ( ..Int Int -- ) ;";
1828        let mut parser = Parser::new(source);
1829        let result = parser.parse();
1830
1831        assert!(result.is_err());
1832        let err_msg = result.unwrap_err();
1833        assert!(
1834            err_msg.contains("lowercase letter") || err_msg.contains("type name"),
1835            "Expected error about lowercase letter or type name, got: {}",
1836            err_msg
1837        );
1838    }
1839
1840    #[test]
1841    fn test_parse_invalid_row_var_with_special_chars() {
1842        // Test: Row variable cannot contain special characters
1843        let source = ": test ( ..a-b Int -- ) ;";
1844        let mut parser = Parser::new(source);
1845        let result = parser.parse();
1846
1847        assert!(result.is_err());
1848        let err_msg = result.unwrap_err();
1849        assert!(
1850            err_msg.contains("letters, numbers, and underscores")
1851                || err_msg.contains("Unknown type"),
1852            "Expected error about valid characters, got: {}",
1853            err_msg
1854        );
1855    }
1856
1857    #[test]
1858    fn test_parse_valid_row_var_with_underscore() {
1859        // Test: Row variable CAN contain underscore
1860        let source = ": test ( ..my_row Int -- ..my_row Bool ) ;";
1861        let mut parser = Parser::new(source);
1862        let result = parser.parse();
1863
1864        assert!(result.is_ok(), "Should accept row variable with underscore");
1865    }
1866
1867    #[test]
1868    fn test_parse_multiple_types_stack_effect() {
1869        // Test: ( Int String -- Bool )
1870        // With implicit row polymorphism: ( ..rest Int String -- ..rest Bool )
1871        let source = ": test ( Int String -- Bool ) 1 ;";
1872        let mut parser = Parser::new(source);
1873        let program = parser.parse().unwrap();
1874
1875        let effect = program.words[0].effect.as_ref().unwrap();
1876
1877        // Input: String on Int on RowVar("rest")
1878        let (rest, top) = effect.inputs.clone().pop().unwrap();
1879        assert_eq!(top, Type::String);
1880        let (rest2, top2) = rest.pop().unwrap();
1881        assert_eq!(top2, Type::Int);
1882        assert_eq!(rest2, StackType::RowVar("rest".to_string()));
1883
1884        // Output: Bool on RowVar("rest") (implicit row polymorphism)
1885        assert_eq!(
1886            effect.outputs,
1887            StackType::Cons {
1888                rest: Box::new(StackType::RowVar("rest".to_string())),
1889                top: Type::Bool
1890            }
1891        );
1892    }
1893
1894    #[test]
1895    fn test_parse_type_variable() {
1896        // Test: ( ..a T -- ..a T T ) for dup
1897        let source = ": dup ( ..a T -- ..a T T ) ;";
1898        let mut parser = Parser::new(source);
1899        let program = parser.parse().unwrap();
1900
1901        let effect = program.words[0].effect.as_ref().unwrap();
1902
1903        // Input: T on RowVar("a")
1904        assert_eq!(
1905            effect.inputs,
1906            StackType::Cons {
1907                rest: Box::new(StackType::RowVar("a".to_string())),
1908                top: Type::Var("T".to_string())
1909            }
1910        );
1911
1912        // Output: T on T on RowVar("a")
1913        let (rest, top) = effect.outputs.clone().pop().unwrap();
1914        assert_eq!(top, Type::Var("T".to_string()));
1915        let (rest2, top2) = rest.pop().unwrap();
1916        assert_eq!(top2, Type::Var("T".to_string()));
1917        assert_eq!(rest2, StackType::RowVar("a".to_string()));
1918    }
1919
1920    #[test]
1921    fn test_parse_empty_stack_effect() {
1922        // Test: ( -- )
1923        // In concatenative languages, even empty effects are row-polymorphic
1924        // ( -- ) means ( ..rest -- ..rest ) - preserves stack
1925        let source = ": test ( -- ) ;";
1926        let mut parser = Parser::new(source);
1927        let program = parser.parse().unwrap();
1928
1929        let effect = program.words[0].effect.as_ref().unwrap();
1930
1931        // Both inputs and outputs should use the same implicit row variable
1932        assert_eq!(effect.inputs, StackType::RowVar("rest".to_string()));
1933        assert_eq!(effect.outputs, StackType::RowVar("rest".to_string()));
1934    }
1935
1936    #[test]
1937    fn test_parse_invalid_type() {
1938        // Test invalid type (lowercase, not a row var)
1939        let source = ": test ( invalid -- Bool ) ;";
1940        let mut parser = Parser::new(source);
1941        let result = parser.parse();
1942
1943        assert!(result.is_err());
1944        assert!(result.unwrap_err().contains("Unknown type"));
1945    }
1946
1947    #[test]
1948    fn test_parse_unclosed_stack_effect() {
1949        // Test unclosed stack effect - parser tries to parse all tokens until ')' or EOF
1950        // In this case, it encounters "body" which is an invalid type
1951        let source = ": test ( Int -- Bool body ;";
1952        let mut parser = Parser::new(source);
1953        let result = parser.parse();
1954
1955        assert!(result.is_err());
1956        let err_msg = result.unwrap_err();
1957        // Parser will try to parse "body" as a type and fail
1958        assert!(err_msg.contains("Unknown type"));
1959    }
1960
1961    #[test]
1962    fn test_parse_simple_quotation_type() {
1963        // Test: ( [Int -- Int] -- )
1964        let source = ": apply ( [Int -- Int] -- ) ;";
1965        let mut parser = Parser::new(source);
1966        let program = parser.parse().unwrap();
1967
1968        let effect = program.words[0].effect.as_ref().unwrap();
1969
1970        // Input should be: Quotation(Int -- Int) on RowVar("rest")
1971        let (rest, top) = effect.inputs.clone().pop().unwrap();
1972        match top {
1973            Type::Quotation(quot_effect) => {
1974                // Check quotation's input: Int on RowVar("rest")
1975                assert_eq!(
1976                    quot_effect.inputs,
1977                    StackType::Cons {
1978                        rest: Box::new(StackType::RowVar("rest".to_string())),
1979                        top: Type::Int
1980                    }
1981                );
1982                // Check quotation's output: Int on RowVar("rest")
1983                assert_eq!(
1984                    quot_effect.outputs,
1985                    StackType::Cons {
1986                        rest: Box::new(StackType::RowVar("rest".to_string())),
1987                        top: Type::Int
1988                    }
1989                );
1990            }
1991            _ => panic!("Expected Quotation type, got {:?}", top),
1992        }
1993        assert_eq!(rest, StackType::RowVar("rest".to_string()));
1994    }
1995
1996    #[test]
1997    fn test_parse_quotation_type_with_row_vars() {
1998        // Test: ( ..a [..a T -- ..a Bool] -- ..a )
1999        let source = ": test ( ..a [..a T -- ..a Bool] -- ..a ) ;";
2000        let mut parser = Parser::new(source);
2001        let program = parser.parse().unwrap();
2002
2003        let effect = program.words[0].effect.as_ref().unwrap();
2004
2005        // Input: Quotation on RowVar("a")
2006        let (rest, top) = effect.inputs.clone().pop().unwrap();
2007        match top {
2008            Type::Quotation(quot_effect) => {
2009                // Check quotation's input: T on RowVar("a")
2010                let (q_in_rest, q_in_top) = quot_effect.inputs.clone().pop().unwrap();
2011                assert_eq!(q_in_top, Type::Var("T".to_string()));
2012                assert_eq!(q_in_rest, StackType::RowVar("a".to_string()));
2013
2014                // Check quotation's output: Bool on RowVar("a")
2015                let (q_out_rest, q_out_top) = quot_effect.outputs.clone().pop().unwrap();
2016                assert_eq!(q_out_top, Type::Bool);
2017                assert_eq!(q_out_rest, StackType::RowVar("a".to_string()));
2018            }
2019            _ => panic!("Expected Quotation type, got {:?}", top),
2020        }
2021        assert_eq!(rest, StackType::RowVar("a".to_string()));
2022    }
2023
2024    #[test]
2025    fn test_parse_nested_quotation_type() {
2026        // Test: ( [[Int -- Int] -- Bool] -- )
2027        let source = ": nested ( [[Int -- Int] -- Bool] -- ) ;";
2028        let mut parser = Parser::new(source);
2029        let program = parser.parse().unwrap();
2030
2031        let effect = program.words[0].effect.as_ref().unwrap();
2032
2033        // Input: Quotation([Int -- Int] -- Bool) on RowVar("rest")
2034        let (_, top) = effect.inputs.clone().pop().unwrap();
2035        match top {
2036            Type::Quotation(outer_effect) => {
2037                // Outer quotation input: [Int -- Int] on RowVar("rest")
2038                let (_, outer_in_top) = outer_effect.inputs.clone().pop().unwrap();
2039                match outer_in_top {
2040                    Type::Quotation(inner_effect) => {
2041                        // Inner quotation: Int -- Int
2042                        assert!(matches!(
2043                            inner_effect.inputs.clone().pop().unwrap().1,
2044                            Type::Int
2045                        ));
2046                        assert!(matches!(
2047                            inner_effect.outputs.clone().pop().unwrap().1,
2048                            Type::Int
2049                        ));
2050                    }
2051                    _ => panic!("Expected nested Quotation type"),
2052                }
2053
2054                // Outer quotation output: Bool
2055                let (_, outer_out_top) = outer_effect.outputs.clone().pop().unwrap();
2056                assert_eq!(outer_out_top, Type::Bool);
2057            }
2058            _ => panic!("Expected Quotation type"),
2059        }
2060    }
2061
2062    #[test]
2063    fn test_parse_deeply_nested_quotation_type_exceeds_limit() {
2064        // Test: Deeply nested quotation types should fail with max depth error
2065        // Build a quotation type nested 35 levels deep (exceeds MAX_QUOTATION_DEPTH = 32)
2066        let mut source = String::from(": deep ( ");
2067
2068        // Build opening brackets: [[[[[[...
2069        for _ in 0..35 {
2070            source.push_str("[ -- ");
2071        }
2072
2073        source.push_str("Int");
2074
2075        // Build closing brackets: ...]]]]]]
2076        for _ in 0..35 {
2077            source.push_str(" ]");
2078        }
2079
2080        source.push_str(" -- ) ;");
2081
2082        let mut parser = Parser::new(&source);
2083        let result = parser.parse();
2084
2085        // Should fail with depth limit error
2086        assert!(result.is_err());
2087        let err_msg = result.unwrap_err();
2088        assert!(
2089            err_msg.contains("depth") || err_msg.contains("32"),
2090            "Expected depth limit error, got: {}",
2091            err_msg
2092        );
2093    }
2094
2095    #[test]
2096    fn test_parse_empty_quotation_type() {
2097        // Test: ( [ -- ] -- )
2098        // An empty quotation type is also row-polymorphic: [ ..rest -- ..rest ]
2099        let source = ": empty-quot ( [ -- ] -- ) ;";
2100        let mut parser = Parser::new(source);
2101        let program = parser.parse().unwrap();
2102
2103        let effect = program.words[0].effect.as_ref().unwrap();
2104
2105        let (_, top) = effect.inputs.clone().pop().unwrap();
2106        match top {
2107            Type::Quotation(quot_effect) => {
2108                // Empty quotation preserves the stack (row-polymorphic)
2109                assert_eq!(quot_effect.inputs, StackType::RowVar("rest".to_string()));
2110                assert_eq!(quot_effect.outputs, StackType::RowVar("rest".to_string()));
2111            }
2112            _ => panic!("Expected Quotation type"),
2113        }
2114    }
2115
2116    #[test]
2117    fn test_parse_quotation_type_in_output() {
2118        // Test: ( -- [Int -- Int] )
2119        let source = ": maker ( -- [Int -- Int] ) ;";
2120        let mut parser = Parser::new(source);
2121        let program = parser.parse().unwrap();
2122
2123        let effect = program.words[0].effect.as_ref().unwrap();
2124
2125        // Output should be: Quotation(Int -- Int) on RowVar("rest")
2126        let (_, top) = effect.outputs.clone().pop().unwrap();
2127        match top {
2128            Type::Quotation(quot_effect) => {
2129                assert!(matches!(
2130                    quot_effect.inputs.clone().pop().unwrap().1,
2131                    Type::Int
2132                ));
2133                assert!(matches!(
2134                    quot_effect.outputs.clone().pop().unwrap().1,
2135                    Type::Int
2136                ));
2137            }
2138            _ => panic!("Expected Quotation type"),
2139        }
2140    }
2141
2142    #[test]
2143    fn test_parse_unclosed_quotation_type() {
2144        // Test: ( [Int -- Int -- )  (missing ])
2145        let source = ": broken ( [Int -- Int -- ) ;";
2146        let mut parser = Parser::new(source);
2147        let result = parser.parse();
2148
2149        assert!(result.is_err());
2150        let err_msg = result.unwrap_err();
2151        // Parser might error with various messages depending on where it fails
2152        // It should at least indicate a parsing problem
2153        assert!(
2154            err_msg.contains("Unclosed")
2155                || err_msg.contains("Expected")
2156                || err_msg.contains("Unexpected"),
2157            "Got error: {}",
2158            err_msg
2159        );
2160    }
2161
2162    #[test]
2163    fn test_parse_multiple_quotation_types() {
2164        // Test: ( [Int -- Int] [String -- Bool] -- )
2165        let source = ": multi ( [Int -- Int] [String -- Bool] -- ) ;";
2166        let mut parser = Parser::new(source);
2167        let program = parser.parse().unwrap();
2168
2169        let effect = program.words[0].effect.as_ref().unwrap();
2170
2171        // Pop second quotation (String -- Bool)
2172        let (rest, top) = effect.inputs.clone().pop().unwrap();
2173        match top {
2174            Type::Quotation(quot_effect) => {
2175                assert!(matches!(
2176                    quot_effect.inputs.clone().pop().unwrap().1,
2177                    Type::String
2178                ));
2179                assert!(matches!(
2180                    quot_effect.outputs.clone().pop().unwrap().1,
2181                    Type::Bool
2182                ));
2183            }
2184            _ => panic!("Expected Quotation type"),
2185        }
2186
2187        // Pop first quotation (Int -- Int)
2188        let (_, top2) = rest.pop().unwrap();
2189        match top2 {
2190            Type::Quotation(quot_effect) => {
2191                assert!(matches!(
2192                    quot_effect.inputs.clone().pop().unwrap().1,
2193                    Type::Int
2194                ));
2195                assert!(matches!(
2196                    quot_effect.outputs.clone().pop().unwrap().1,
2197                    Type::Int
2198                ));
2199            }
2200            _ => panic!("Expected Quotation type"),
2201        }
2202    }
2203
2204    #[test]
2205    fn test_parse_quotation_type_without_separator() {
2206        // Test: ( [Int] -- ) should be REJECTED
2207        //
2208        // Design decision: The '--' separator is REQUIRED for clarity.
2209        // [Int] looks like a list type in most languages, not a consumer function.
2210        // This would confuse users.
2211        //
2212        // Require explicit syntax:
2213        // - `[Int -- ]` for quotation that consumes Int and produces nothing
2214        // - `[ -- Int]` for quotation that produces Int
2215        // - `[Int -- Int]` for transformation
2216        let source = ": consumer ( [Int] -- ) ;";
2217        let mut parser = Parser::new(source);
2218        let result = parser.parse();
2219
2220        // Should fail with helpful error message
2221        assert!(result.is_err());
2222        let err_msg = result.unwrap_err();
2223        assert!(
2224            err_msg.contains("require") && err_msg.contains("--"),
2225            "Expected error about missing '--' separator, got: {}",
2226            err_msg
2227        );
2228    }
2229
2230    #[test]
2231    fn test_parse_bare_quotation_type_rejected() {
2232        // Test: ( Int Quotation -- Int ) should be REJECTED
2233        //
2234        // 'Quotation' looks like a type name but would be silently treated as a
2235        // type variable without this check. Users must use explicit effect syntax.
2236        let source = ": apply-twice ( Int Quotation -- Int ) ;";
2237        let mut parser = Parser::new(source);
2238        let result = parser.parse();
2239
2240        assert!(result.is_err());
2241        let err_msg = result.unwrap_err();
2242        assert!(
2243            err_msg.contains("Quotation") && err_msg.contains("not a valid type"),
2244            "Expected error about 'Quotation' not being valid, got: {}",
2245            err_msg
2246        );
2247        assert!(
2248            err_msg.contains("[Int -- Int]") || err_msg.contains("[ -- ]"),
2249            "Expected error to suggest explicit syntax, got: {}",
2250            err_msg
2251        );
2252    }
2253
2254    #[test]
2255    fn test_parse_no_stack_effect() {
2256        // Test word without stack effect (should still work)
2257        let source = ": test 1 2 add ;";
2258        let mut parser = Parser::new(source);
2259        let program = parser.parse().unwrap();
2260
2261        assert_eq!(program.words.len(), 1);
2262        assert!(program.words[0].effect.is_none());
2263    }
2264
2265    #[test]
2266    fn test_parse_simple_quotation() {
2267        let source = r#"
2268: test ( -- Quot )
2269  [ 1 add ] ;
2270"#;
2271
2272        let mut parser = Parser::new(source);
2273        let program = parser.parse().unwrap();
2274
2275        assert_eq!(program.words.len(), 1);
2276        assert_eq!(program.words[0].name, "test");
2277        assert_eq!(program.words[0].body.len(), 1);
2278
2279        match &program.words[0].body[0] {
2280            Statement::Quotation { body, .. } => {
2281                assert_eq!(body.len(), 2);
2282                assert_eq!(body[0], Statement::IntLiteral(1));
2283                assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "add"));
2284            }
2285            _ => panic!("Expected Quotation statement"),
2286        }
2287    }
2288
2289    #[test]
2290    fn test_parse_empty_quotation() {
2291        let source = ": test [ ] ;";
2292
2293        let mut parser = Parser::new(source);
2294        let program = parser.parse().unwrap();
2295
2296        assert_eq!(program.words.len(), 1);
2297
2298        match &program.words[0].body[0] {
2299            Statement::Quotation { body, .. } => {
2300                assert_eq!(body.len(), 0);
2301            }
2302            _ => panic!("Expected Quotation statement"),
2303        }
2304    }
2305
2306    #[test]
2307    fn test_parse_quotation_with_call() {
2308        let source = r#"
2309: test ( -- )
2310  5 [ 1 add ] call ;
2311"#;
2312
2313        let mut parser = Parser::new(source);
2314        let program = parser.parse().unwrap();
2315
2316        assert_eq!(program.words.len(), 1);
2317        assert_eq!(program.words[0].body.len(), 3);
2318
2319        assert_eq!(program.words[0].body[0], Statement::IntLiteral(5));
2320
2321        match &program.words[0].body[1] {
2322            Statement::Quotation { body, .. } => {
2323                assert_eq!(body.len(), 2);
2324            }
2325            _ => panic!("Expected Quotation"),
2326        }
2327
2328        assert!(matches!(
2329            &program.words[0].body[2],
2330            Statement::WordCall { name, .. } if name == "call"
2331        ));
2332    }
2333
2334    #[test]
2335    fn test_parse_nested_quotation() {
2336        let source = ": test [ [ 1 add ] call ] ;";
2337
2338        let mut parser = Parser::new(source);
2339        let program = parser.parse().unwrap();
2340
2341        assert_eq!(program.words.len(), 1);
2342
2343        match &program.words[0].body[0] {
2344            Statement::Quotation {
2345                body: outer_body, ..
2346            } => {
2347                assert_eq!(outer_body.len(), 2);
2348
2349                match &outer_body[0] {
2350                    Statement::Quotation {
2351                        body: inner_body, ..
2352                    } => {
2353                        assert_eq!(inner_body.len(), 2);
2354                        assert_eq!(inner_body[0], Statement::IntLiteral(1));
2355                        assert!(
2356                            matches!(&inner_body[1], Statement::WordCall { name, .. } if name == "add")
2357                        );
2358                    }
2359                    _ => panic!("Expected nested Quotation"),
2360                }
2361
2362                assert!(
2363                    matches!(&outer_body[1], Statement::WordCall { name, .. } if name == "call")
2364                );
2365            }
2366            _ => panic!("Expected Quotation"),
2367        }
2368    }
2369
2370    #[test]
2371    fn test_parse_while_with_quotations() {
2372        let source = r#"
2373: countdown ( Int -- )
2374  [ dup 0 > ] [ 1 subtract ] while drop ;
2375"#;
2376
2377        let mut parser = Parser::new(source);
2378        let program = parser.parse().unwrap();
2379
2380        assert_eq!(program.words.len(), 1);
2381        assert_eq!(program.words[0].body.len(), 4);
2382
2383        // First quotation: [ dup 0 > ]
2384        match &program.words[0].body[0] {
2385            Statement::Quotation { body: pred, .. } => {
2386                assert_eq!(pred.len(), 3);
2387                assert!(matches!(&pred[0], Statement::WordCall { name, .. } if name == "dup"));
2388                assert_eq!(pred[1], Statement::IntLiteral(0));
2389                assert!(matches!(&pred[2], Statement::WordCall { name, .. } if name == ">"));
2390            }
2391            _ => panic!("Expected predicate quotation"),
2392        }
2393
2394        // Second quotation: [ 1 subtract ]
2395        match &program.words[0].body[1] {
2396            Statement::Quotation { body, .. } => {
2397                assert_eq!(body.len(), 2);
2398                assert_eq!(body[0], Statement::IntLiteral(1));
2399                assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "subtract"));
2400            }
2401            _ => panic!("Expected body quotation"),
2402        }
2403
2404        // while call
2405        assert!(matches!(
2406            &program.words[0].body[2],
2407            Statement::WordCall { name, .. } if name == "while"
2408        ));
2409
2410        // drop
2411        assert!(matches!(
2412            &program.words[0].body[3],
2413            Statement::WordCall { name, .. } if name == "drop"
2414        ));
2415    }
2416
2417    #[test]
2418    fn test_parse_simple_closure_type() {
2419        // Test: ( Int -- Closure[Int -- Int] )
2420        let source = ": make-adder ( Int -- Closure[Int -- Int] ) ;";
2421        let mut parser = Parser::new(source);
2422        let program = parser.parse().unwrap();
2423
2424        assert_eq!(program.words.len(), 1);
2425        let word = &program.words[0];
2426        assert!(word.effect.is_some());
2427
2428        let effect = word.effect.as_ref().unwrap();
2429
2430        // Input: Int on RowVar("rest")
2431        let (input_rest, input_top) = effect.inputs.clone().pop().unwrap();
2432        assert_eq!(input_top, Type::Int);
2433        assert_eq!(input_rest, StackType::RowVar("rest".to_string()));
2434
2435        // Output: Closure[Int -- Int] on RowVar("rest")
2436        let (output_rest, output_top) = effect.outputs.clone().pop().unwrap();
2437        match output_top {
2438            Type::Closure { effect, captures } => {
2439                // Closure effect: Int -> Int
2440                assert_eq!(
2441                    effect.inputs,
2442                    StackType::Cons {
2443                        rest: Box::new(StackType::RowVar("rest".to_string())),
2444                        top: Type::Int
2445                    }
2446                );
2447                assert_eq!(
2448                    effect.outputs,
2449                    StackType::Cons {
2450                        rest: Box::new(StackType::RowVar("rest".to_string())),
2451                        top: Type::Int
2452                    }
2453                );
2454                // Captures should be empty (filled in by type checker)
2455                assert_eq!(captures.len(), 0);
2456            }
2457            _ => panic!("Expected Closure type, got {:?}", output_top),
2458        }
2459        assert_eq!(output_rest, StackType::RowVar("rest".to_string()));
2460    }
2461
2462    #[test]
2463    fn test_parse_closure_type_with_row_vars() {
2464        // Test: ( ..a Config -- ..a Closure[Request -- Response] )
2465        let source = ": make-handler ( ..a Config -- ..a Closure[Request -- Response] ) ;";
2466        let mut parser = Parser::new(source);
2467        let program = parser.parse().unwrap();
2468
2469        let effect = program.words[0].effect.as_ref().unwrap();
2470
2471        // Output: Closure on RowVar("a")
2472        let (rest, top) = effect.outputs.clone().pop().unwrap();
2473        match top {
2474            Type::Closure { effect, .. } => {
2475                // Closure effect: Request -> Response
2476                let (_, in_top) = effect.inputs.clone().pop().unwrap();
2477                assert_eq!(in_top, Type::Var("Request".to_string()));
2478                let (_, out_top) = effect.outputs.clone().pop().unwrap();
2479                assert_eq!(out_top, Type::Var("Response".to_string()));
2480            }
2481            _ => panic!("Expected Closure type"),
2482        }
2483        assert_eq!(rest, StackType::RowVar("a".to_string()));
2484    }
2485
2486    #[test]
2487    fn test_parse_closure_type_missing_bracket() {
2488        // Test: ( Int -- Closure ) should fail
2489        let source = ": broken ( Int -- Closure ) ;";
2490        let mut parser = Parser::new(source);
2491        let result = parser.parse();
2492
2493        assert!(result.is_err());
2494        let err_msg = result.unwrap_err();
2495        assert!(
2496            err_msg.contains("[") && err_msg.contains("Closure"),
2497            "Expected error about missing '[' after Closure, got: {}",
2498            err_msg
2499        );
2500    }
2501
2502    #[test]
2503    fn test_parse_closure_type_in_input() {
2504        // Test: ( Closure[Int -- Int] -- )
2505        let source = ": apply-closure ( Closure[Int -- Int] -- ) ;";
2506        let mut parser = Parser::new(source);
2507        let program = parser.parse().unwrap();
2508
2509        let effect = program.words[0].effect.as_ref().unwrap();
2510
2511        // Input: Closure[Int -- Int] on RowVar("rest")
2512        let (_, top) = effect.inputs.clone().pop().unwrap();
2513        match top {
2514            Type::Closure { effect, .. } => {
2515                // Verify closure effect
2516                assert!(matches!(effect.inputs.clone().pop().unwrap().1, Type::Int));
2517                assert!(matches!(effect.outputs.clone().pop().unwrap().1, Type::Int));
2518            }
2519            _ => panic!("Expected Closure type in input"),
2520        }
2521    }
2522
2523    // Tests for token position tracking
2524
2525    #[test]
2526    fn test_token_position_single_line() {
2527        // Test token positions on a single line
2528        let source = ": main ( -- ) ;";
2529        let tokens = tokenize(source);
2530
2531        // : is at line 0, column 0
2532        assert_eq!(tokens[0].text, ":");
2533        assert_eq!(tokens[0].line, 0);
2534        assert_eq!(tokens[0].column, 0);
2535
2536        // main is at line 0, column 2
2537        assert_eq!(tokens[1].text, "main");
2538        assert_eq!(tokens[1].line, 0);
2539        assert_eq!(tokens[1].column, 2);
2540
2541        // ( is at line 0, column 7
2542        assert_eq!(tokens[2].text, "(");
2543        assert_eq!(tokens[2].line, 0);
2544        assert_eq!(tokens[2].column, 7);
2545    }
2546
2547    #[test]
2548    fn test_token_position_multiline() {
2549        // Test token positions across multiple lines
2550        let source = ": main ( -- )\n  42\n;";
2551        let tokens = tokenize(source);
2552
2553        // Find the 42 token (after the newline)
2554        let token_42 = tokens.iter().find(|t| t.text == "42").unwrap();
2555        assert_eq!(token_42.line, 1);
2556        assert_eq!(token_42.column, 2); // After 2 spaces of indentation
2557
2558        // Find the ; token (on line 2)
2559        let token_semi = tokens.iter().find(|t| t.text == ";").unwrap();
2560        assert_eq!(token_semi.line, 2);
2561        assert_eq!(token_semi.column, 0);
2562    }
2563
2564    #[test]
2565    fn test_word_def_source_location_span() {
2566        // Test that word definitions capture correct start and end lines
2567        let source = r#": helper ( -- )
2568  "hello"
2569  write_line
2570;
2571
2572: main ( -- )
2573  helper
2574;"#;
2575
2576        let mut parser = Parser::new(source);
2577        let program = parser.parse().unwrap();
2578
2579        assert_eq!(program.words.len(), 2);
2580
2581        // First word: helper spans lines 0-3
2582        let helper = &program.words[0];
2583        assert_eq!(helper.name, "helper");
2584        let helper_source = helper.source.as_ref().unwrap();
2585        assert_eq!(helper_source.start_line, 0);
2586        assert_eq!(helper_source.end_line, 3);
2587
2588        // Second word: main spans lines 5-7
2589        let main_word = &program.words[1];
2590        assert_eq!(main_word.name, "main");
2591        let main_source = main_word.source.as_ref().unwrap();
2592        assert_eq!(main_source.start_line, 5);
2593        assert_eq!(main_source.end_line, 7);
2594    }
2595
2596    #[test]
2597    fn test_token_position_string_with_newline() {
2598        // Test that newlines inside strings are tracked correctly
2599        let source = "\"line1\\nline2\"";
2600        let tokens = tokenize(source);
2601
2602        // The string token should start at line 0, column 0
2603        assert_eq!(tokens.len(), 1);
2604        assert_eq!(tokens[0].line, 0);
2605        assert_eq!(tokens[0].column, 0);
2606    }
2607
2608    // ============================================================================
2609    //                         ADT PARSING TESTS
2610    // ============================================================================
2611
2612    #[test]
2613    fn test_parse_simple_union() {
2614        let source = r#"
2615union Message {
2616  Get { response-chan: Int }
2617  Set { value: Int }
2618}
2619
2620: main ( -- ) ;
2621"#;
2622
2623        let mut parser = Parser::new(source);
2624        let program = parser.parse().unwrap();
2625
2626        assert_eq!(program.unions.len(), 1);
2627        let union_def = &program.unions[0];
2628        assert_eq!(union_def.name, "Message");
2629        assert_eq!(union_def.variants.len(), 2);
2630
2631        // Check first variant
2632        assert_eq!(union_def.variants[0].name, "Get");
2633        assert_eq!(union_def.variants[0].fields.len(), 1);
2634        assert_eq!(union_def.variants[0].fields[0].name, "response-chan");
2635        assert_eq!(union_def.variants[0].fields[0].type_name, "Int");
2636
2637        // Check second variant
2638        assert_eq!(union_def.variants[1].name, "Set");
2639        assert_eq!(union_def.variants[1].fields.len(), 1);
2640        assert_eq!(union_def.variants[1].fields[0].name, "value");
2641        assert_eq!(union_def.variants[1].fields[0].type_name, "Int");
2642    }
2643
2644    #[test]
2645    fn test_parse_union_with_multiple_fields() {
2646        let source = r#"
2647union Report {
2648  Data { op: Int, delta: Int, total: Int }
2649  Empty
2650}
2651
2652: main ( -- ) ;
2653"#;
2654
2655        let mut parser = Parser::new(source);
2656        let program = parser.parse().unwrap();
2657
2658        assert_eq!(program.unions.len(), 1);
2659        let union_def = &program.unions[0];
2660        assert_eq!(union_def.name, "Report");
2661        assert_eq!(union_def.variants.len(), 2);
2662
2663        // Check Data variant with 3 fields
2664        let data_variant = &union_def.variants[0];
2665        assert_eq!(data_variant.name, "Data");
2666        assert_eq!(data_variant.fields.len(), 3);
2667        assert_eq!(data_variant.fields[0].name, "op");
2668        assert_eq!(data_variant.fields[1].name, "delta");
2669        assert_eq!(data_variant.fields[2].name, "total");
2670
2671        // Check Empty variant with no fields
2672        let empty_variant = &union_def.variants[1];
2673        assert_eq!(empty_variant.name, "Empty");
2674        assert_eq!(empty_variant.fields.len(), 0);
2675    }
2676
2677    #[test]
2678    fn test_parse_union_lowercase_name_error() {
2679        let source = r#"
2680union message {
2681  Get { }
2682}
2683"#;
2684
2685        let mut parser = Parser::new(source);
2686        let result = parser.parse();
2687        assert!(result.is_err());
2688        assert!(result.unwrap_err().contains("uppercase"));
2689    }
2690
2691    #[test]
2692    fn test_parse_union_empty_error() {
2693        let source = r#"
2694union Message {
2695}
2696"#;
2697
2698        let mut parser = Parser::new(source);
2699        let result = parser.parse();
2700        assert!(result.is_err());
2701        assert!(result.unwrap_err().contains("at least one variant"));
2702    }
2703
2704    #[test]
2705    fn test_parse_union_duplicate_variant_error() {
2706        let source = r#"
2707union Message {
2708  Get { x: Int }
2709  Get { y: String }
2710}
2711"#;
2712
2713        let mut parser = Parser::new(source);
2714        let result = parser.parse();
2715        assert!(result.is_err());
2716        let err = result.unwrap_err();
2717        assert!(err.contains("Duplicate variant name"));
2718        assert!(err.contains("Get"));
2719    }
2720
2721    #[test]
2722    fn test_parse_union_duplicate_field_error() {
2723        let source = r#"
2724union Data {
2725  Record { x: Int, x: String }
2726}
2727"#;
2728
2729        let mut parser = Parser::new(source);
2730        let result = parser.parse();
2731        assert!(result.is_err());
2732        let err = result.unwrap_err();
2733        assert!(err.contains("Duplicate field name"));
2734        assert!(err.contains("x"));
2735    }
2736
2737    #[test]
2738    fn test_parse_simple_match() {
2739        let source = r#"
2740: handle ( -- )
2741  match
2742    Get -> send-response
2743    Set -> process-set
2744  end
2745;
2746"#;
2747
2748        let mut parser = Parser::new(source);
2749        let program = parser.parse().unwrap();
2750
2751        assert_eq!(program.words.len(), 1);
2752        assert_eq!(program.words[0].body.len(), 1);
2753
2754        match &program.words[0].body[0] {
2755            Statement::Match { arms, span: _ } => {
2756                assert_eq!(arms.len(), 2);
2757
2758                // First arm: Get ->
2759                match &arms[0].pattern {
2760                    Pattern::Variant(name) => assert_eq!(name, "Get"),
2761                    _ => panic!("Expected Variant pattern"),
2762                }
2763                assert_eq!(arms[0].body.len(), 1);
2764
2765                // Second arm: Set ->
2766                match &arms[1].pattern {
2767                    Pattern::Variant(name) => assert_eq!(name, "Set"),
2768                    _ => panic!("Expected Variant pattern"),
2769                }
2770                assert_eq!(arms[1].body.len(), 1);
2771            }
2772            _ => panic!("Expected Match statement"),
2773        }
2774    }
2775
2776    #[test]
2777    fn test_parse_match_with_bindings() {
2778        let source = r#"
2779: handle ( -- )
2780  match
2781    Get { >chan } -> chan send-response
2782    Report { >delta >total } -> delta total process
2783  end
2784;
2785"#;
2786
2787        let mut parser = Parser::new(source);
2788        let program = parser.parse().unwrap();
2789
2790        assert_eq!(program.words.len(), 1);
2791
2792        match &program.words[0].body[0] {
2793            Statement::Match { arms, span: _ } => {
2794                assert_eq!(arms.len(), 2);
2795
2796                // First arm: Get { chan } ->
2797                match &arms[0].pattern {
2798                    Pattern::VariantWithBindings { name, bindings } => {
2799                        assert_eq!(name, "Get");
2800                        assert_eq!(bindings.len(), 1);
2801                        assert_eq!(bindings[0], "chan");
2802                    }
2803                    _ => panic!("Expected VariantWithBindings pattern"),
2804                }
2805
2806                // Second arm: Report { delta total } ->
2807                match &arms[1].pattern {
2808                    Pattern::VariantWithBindings { name, bindings } => {
2809                        assert_eq!(name, "Report");
2810                        assert_eq!(bindings.len(), 2);
2811                        assert_eq!(bindings[0], "delta");
2812                        assert_eq!(bindings[1], "total");
2813                    }
2814                    _ => panic!("Expected VariantWithBindings pattern"),
2815                }
2816            }
2817            _ => panic!("Expected Match statement"),
2818        }
2819    }
2820
2821    #[test]
2822    fn test_parse_match_bindings_require_prefix() {
2823        // Old syntax without > prefix should error
2824        let source = r#"
2825: handle ( -- )
2826  match
2827    Get { chan } -> chan send-response
2828  end
2829;
2830"#;
2831
2832        let mut parser = Parser::new(source);
2833        let result = parser.parse();
2834        assert!(result.is_err());
2835        let err = result.unwrap_err();
2836        assert!(err.contains(">chan"));
2837        assert!(err.contains("stack extraction"));
2838    }
2839
2840    #[test]
2841    fn test_parse_match_with_body_statements() {
2842        let source = r#"
2843: handle ( -- )
2844  match
2845    Get -> 1 2 add send-response
2846    Set -> process-value store
2847  end
2848;
2849"#;
2850
2851        let mut parser = Parser::new(source);
2852        let program = parser.parse().unwrap();
2853
2854        match &program.words[0].body[0] {
2855            Statement::Match { arms, span: _ } => {
2856                // Get arm has 4 statements: 1, 2, add, send-response
2857                assert_eq!(arms[0].body.len(), 4);
2858                assert_eq!(arms[0].body[0], Statement::IntLiteral(1));
2859                assert_eq!(arms[0].body[1], Statement::IntLiteral(2));
2860                assert!(
2861                    matches!(&arms[0].body[2], Statement::WordCall { name, .. } if name == "add")
2862                );
2863
2864                // Set arm has 2 statements: process-value, store
2865                assert_eq!(arms[1].body.len(), 2);
2866            }
2867            _ => panic!("Expected Match statement"),
2868        }
2869    }
2870
2871    #[test]
2872    fn test_parse_match_empty_error() {
2873        let source = r#"
2874: handle ( -- )
2875  match
2876  end
2877;
2878"#;
2879
2880        let mut parser = Parser::new(source);
2881        let result = parser.parse();
2882        assert!(result.is_err());
2883        assert!(result.unwrap_err().contains("at least one arm"));
2884    }
2885
2886    #[test]
2887    fn test_parse_symbol_literal() {
2888        let source = r#"
2889: main ( -- )
2890    :hello drop
2891;
2892"#;
2893
2894        let mut parser = Parser::new(source);
2895        let program = parser.parse().unwrap();
2896        assert_eq!(program.words.len(), 1);
2897
2898        let main = &program.words[0];
2899        assert_eq!(main.body.len(), 2);
2900
2901        match &main.body[0] {
2902            Statement::Symbol(name) => assert_eq!(name, "hello"),
2903            _ => panic!("Expected Symbol statement, got {:?}", main.body[0]),
2904        }
2905    }
2906
2907    #[test]
2908    fn test_parse_symbol_with_hyphen() {
2909        let source = r#"
2910: main ( -- )
2911    :hello-world drop
2912;
2913"#;
2914
2915        let mut parser = Parser::new(source);
2916        let program = parser.parse().unwrap();
2917
2918        match &program.words[0].body[0] {
2919            Statement::Symbol(name) => assert_eq!(name, "hello-world"),
2920            _ => panic!("Expected Symbol statement"),
2921        }
2922    }
2923
2924    #[test]
2925    fn test_parse_symbol_starting_with_digit_fails() {
2926        let source = r#"
2927: main ( -- )
2928    :123abc drop
2929;
2930"#;
2931
2932        let mut parser = Parser::new(source);
2933        let result = parser.parse();
2934        assert!(result.is_err());
2935        assert!(result.unwrap_err().contains("cannot start with a digit"));
2936    }
2937
2938    #[test]
2939    fn test_parse_symbol_with_invalid_char_fails() {
2940        let source = r#"
2941: main ( -- )
2942    :hello@world drop
2943;
2944"#;
2945
2946        let mut parser = Parser::new(source);
2947        let result = parser.parse();
2948        assert!(result.is_err());
2949        assert!(result.unwrap_err().contains("invalid character"));
2950    }
2951
2952    #[test]
2953    fn test_parse_symbol_special_chars_allowed() {
2954        // Test that ? and ! are allowed in symbol names
2955        let source = r#"
2956: main ( -- )
2957    :empty? drop
2958    :save! drop
2959;
2960"#;
2961
2962        let mut parser = Parser::new(source);
2963        let program = parser.parse().unwrap();
2964
2965        match &program.words[0].body[0] {
2966            Statement::Symbol(name) => assert_eq!(name, "empty?"),
2967            _ => panic!("Expected Symbol statement"),
2968        }
2969        match &program.words[0].body[2] {
2970            Statement::Symbol(name) => assert_eq!(name, "save!"),
2971            _ => panic!("Expected Symbol statement"),
2972        }
2973    }
2974}