Skip to main content

seqc/
parser.rs

1//! Simple parser for Seq syntax
2//!
3//! Syntax:
4//! ```text
5//! : word-name ( stack-effect )
6//!   statement1
7//!   statement2
8//!   ... ;
9//! ```
10
11use crate::ast::{
12    Include, MatchArm, Pattern, Program, SourceLocation, Span, Statement, UnionDef, UnionField,
13    UnionVariant, WordDef,
14};
15use crate::types::{Effect, SideEffect, StackType, Type};
16
17/// A token with source position information
18#[derive(Debug, Clone)]
19pub struct Token {
20    pub text: String,
21    /// Line number (0-indexed for LSP compatibility)
22    pub line: usize,
23    /// Column number (0-indexed)
24    pub column: usize,
25}
26
27impl Token {
28    fn new(text: String, line: usize, column: usize) -> Self {
29        Token { text, line, column }
30    }
31}
32
33impl PartialEq<&str> for Token {
34    fn eq(&self, other: &&str) -> bool {
35        self.text == *other
36    }
37}
38
39impl PartialEq<str> for Token {
40    fn eq(&self, other: &str) -> bool {
41        self.text == other
42    }
43}
44
45pub struct Parser {
46    tokens: Vec<Token>,
47    pos: usize,
48    /// Counter for assigning unique IDs to quotations
49    /// Used by the typechecker to track inferred types
50    next_quotation_id: usize,
51    /// Pending lint annotations collected from `# seq:allow(lint-id)` comments
52    pending_allowed_lints: Vec<String>,
53    /// Known union type names - used to distinguish union types from type variables
54    /// RFC #345: Union types in stack effects must be recognized as concrete types
55    known_unions: std::collections::HashSet<String>,
56}
57
58impl Parser {
59    pub fn new(source: &str) -> Self {
60        let tokens = tokenize(source);
61        Parser {
62            tokens,
63            pos: 0,
64            next_quotation_id: 0,
65            pending_allowed_lints: Vec::new(),
66            known_unions: std::collections::HashSet::new(),
67        }
68    }
69
70    /// Register external union names (e.g., from included modules)
71    /// These union types will be recognized in stack effect declarations.
72    pub fn register_external_unions(&mut self, union_names: &[&str]) {
73        for name in union_names {
74            self.known_unions.insert(name.to_string());
75        }
76    }
77
78    pub fn parse(&mut self) -> Result<Program, String> {
79        let mut program = Program::new();
80
81        // Check for unclosed string error from tokenizer
82        if let Some(error_token) = self.tokens.iter().find(|t| *t == "<<<UNCLOSED_STRING>>>") {
83            return Err(format!(
84                "Unclosed string literal at line {}, column {} - missing closing quote",
85                error_token.line + 1, // 1-indexed for user display
86                error_token.column + 1
87            ));
88        }
89
90        while !self.is_at_end() {
91            self.skip_comments();
92            if self.is_at_end() {
93                break;
94            }
95
96            // Check for include statement
97            if self.check("include") {
98                let include = self.parse_include()?;
99                program.includes.push(include);
100                continue;
101            }
102
103            // Check for union definition
104            if self.check("union") {
105                let union_def = self.parse_union_def()?;
106                program.unions.push(union_def);
107                continue;
108            }
109
110            let word = self.parse_word_def()?;
111            program.words.push(word);
112        }
113
114        Ok(program)
115    }
116
117    /// Parse an include statement:
118    ///   include std:http     -> Include::Std("http")
119    ///   include ffi:readline -> Include::Ffi("readline")
120    ///   include "my-utils"   -> Include::Relative("my-utils")
121    fn parse_include(&mut self) -> Result<Include, String> {
122        self.consume("include");
123
124        let token = self
125            .advance()
126            .ok_or("Expected module name after 'include'")?
127            .clone();
128
129        // Check for std: prefix (tokenizer splits this into "std", ":", "name")
130        if token == "std" {
131            // Expect : token
132            if !self.consume(":") {
133                return Err("Expected ':' after 'std' in include statement".to_string());
134            }
135            // Get the module name
136            let name = self
137                .advance()
138                .ok_or("Expected module name after 'std:'")?
139                .clone();
140            return Ok(Include::Std(name));
141        }
142
143        // Check for ffi: prefix
144        if token == "ffi" {
145            // Expect : token
146            if !self.consume(":") {
147                return Err("Expected ':' after 'ffi' in include statement".to_string());
148            }
149            // Get the library name
150            let name = self
151                .advance()
152                .ok_or("Expected library name after 'ffi:'")?
153                .clone();
154            return Ok(Include::Ffi(name));
155        }
156
157        // Check for quoted string (relative path)
158        if token.starts_with('"') && token.ends_with('"') {
159            let path = token.trim_start_matches('"').trim_end_matches('"');
160            return Ok(Include::Relative(path.to_string()));
161        }
162
163        Err(format!(
164            "Invalid include syntax '{}'. Use 'include std:name', 'include ffi:lib', or 'include \"path\"'",
165            token
166        ))
167    }
168
169    /// Parse a union type definition:
170    ///   union Message {
171    ///     Get { response-chan: Int }
172    ///     Increment { response-chan: Int }
173    ///     Report { op: Int, delta: Int, total: Int }
174    ///   }
175    fn parse_union_def(&mut self) -> Result<UnionDef, String> {
176        // Capture start line from 'union' token
177        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
178
179        // Consume 'union' keyword
180        self.consume("union");
181
182        // Get union name (must start with uppercase)
183        let name = self
184            .advance()
185            .ok_or("Expected union name after 'union'")?
186            .clone();
187
188        if !name
189            .chars()
190            .next()
191            .map(|c| c.is_uppercase())
192            .unwrap_or(false)
193        {
194            return Err(format!(
195                "Union name '{}' must start with an uppercase letter",
196                name
197            ));
198        }
199
200        // RFC #345: Register this union name so it can be recognized in stack effects
201        // This allows ( UnionName -- ) to parse as Union type, not a type variable
202        self.known_unions.insert(name.clone());
203
204        // Skip comments and newlines
205        self.skip_comments();
206
207        // Expect '{'
208        if !self.consume("{") {
209            return Err(format!(
210                "Expected '{{' after union name '{}', got '{}'",
211                name,
212                self.current()
213            ));
214        }
215
216        // Parse variants until '}'
217        let mut variants = Vec::new();
218        loop {
219            self.skip_comments();
220
221            if self.check("}") {
222                break;
223            }
224
225            if self.is_at_end() {
226                return Err(format!("Unexpected end of file in union '{}'", name));
227            }
228
229            variants.push(self.parse_union_variant()?);
230        }
231
232        // Capture end line from '}' token before consuming
233        let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
234
235        // Consume '}'
236        self.consume("}");
237
238        if variants.is_empty() {
239            return Err(format!("Union '{}' must have at least one variant", name));
240        }
241
242        // Check for duplicate variant names
243        let mut seen_variants = std::collections::HashSet::new();
244        for variant in &variants {
245            if !seen_variants.insert(&variant.name) {
246                return Err(format!(
247                    "Duplicate variant name '{}' in union '{}'",
248                    variant.name, name
249                ));
250            }
251        }
252
253        Ok(UnionDef {
254            name,
255            variants,
256            source: Some(SourceLocation::span(
257                std::path::PathBuf::new(),
258                start_line,
259                end_line,
260            )),
261        })
262    }
263
264    /// Parse a single union variant:
265    ///   Get { response-chan: Int }
266    ///   or just: Empty (no fields)
267    fn parse_union_variant(&mut self) -> Result<UnionVariant, String> {
268        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
269
270        // Get variant name (must start with uppercase)
271        let name = self.advance().ok_or("Expected variant name")?.clone();
272
273        if !name
274            .chars()
275            .next()
276            .map(|c| c.is_uppercase())
277            .unwrap_or(false)
278        {
279            return Err(format!(
280                "Variant name '{}' must start with an uppercase letter",
281                name
282            ));
283        }
284
285        self.skip_comments();
286
287        // Check for optional fields
288        let fields = if self.check("{") {
289            self.consume("{");
290            let fields = self.parse_union_fields()?;
291            if !self.consume("}") {
292                return Err(format!("Expected '}}' after variant '{}' fields", name));
293            }
294            fields
295        } else {
296            Vec::new()
297        };
298
299        Ok(UnionVariant {
300            name,
301            fields,
302            source: Some(SourceLocation::new(std::path::PathBuf::new(), start_line)),
303        })
304    }
305
306    /// Parse union fields: name: Type, name: Type, ...
307    fn parse_union_fields(&mut self) -> Result<Vec<UnionField>, String> {
308        let mut fields = Vec::new();
309
310        loop {
311            self.skip_comments();
312
313            if self.check("}") {
314                break;
315            }
316
317            // Get field name
318            let field_name = self.advance().ok_or("Expected field name")?.clone();
319
320            // Expect ':'
321            if !self.consume(":") {
322                return Err(format!(
323                    "Expected ':' after field name '{}', got '{}'",
324                    field_name,
325                    self.current()
326                ));
327            }
328
329            // Get type name
330            let type_name = self
331                .advance()
332                .ok_or("Expected type name after ':'")?
333                .clone();
334
335            fields.push(UnionField {
336                name: field_name,
337                type_name,
338            });
339
340            // Optional comma separator
341            self.skip_comments();
342            self.consume(",");
343        }
344
345        // Check for duplicate field names
346        let mut seen_fields = std::collections::HashSet::new();
347        for field in &fields {
348            if !seen_fields.insert(&field.name) {
349                return Err(format!("Duplicate field name '{}' in variant", field.name));
350            }
351        }
352
353        Ok(fields)
354    }
355
356    fn parse_word_def(&mut self) -> Result<WordDef, String> {
357        // Consume any pending lint annotations collected from comments before this word
358        let allowed_lints = std::mem::take(&mut self.pending_allowed_lints);
359
360        // Capture start line from ':' token
361        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
362
363        // Expect ':'
364        if !self.consume(":") {
365            return Err(format!(
366                "Expected ':' to start word definition, got '{}'",
367                self.current()
368            ));
369        }
370
371        // Get word name
372        let name = self
373            .advance()
374            .ok_or("Expected word name after ':'")?
375            .clone();
376
377        // Parse stack effect if present: ( ..a Int -- ..a Bool )
378        let effect = if self.check("(") {
379            Some(self.parse_stack_effect()?)
380        } else {
381            None
382        };
383
384        // Parse body until ';'
385        let mut body = Vec::new();
386        while !self.check(";") {
387            if self.is_at_end() {
388                return Err(format!("Unexpected end of file in word '{}'", name));
389            }
390
391            // Skip comments and newlines in body
392            self.skip_comments();
393            if self.check(";") {
394                break;
395            }
396
397            body.push(self.parse_statement()?);
398        }
399
400        // Capture end line from ';' token before consuming
401        let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
402
403        // Consume ';'
404        self.consume(";");
405
406        Ok(WordDef {
407            name,
408            effect,
409            body,
410            source: Some(crate::ast::SourceLocation::span(
411                std::path::PathBuf::new(),
412                start_line,
413                end_line,
414            )),
415            allowed_lints,
416        })
417    }
418
419    fn parse_statement(&mut self) -> Result<Statement, String> {
420        use crate::ast::Span;
421        let tok = self.advance_token().ok_or("Unexpected end of file")?;
422        let token = &tok.text;
423        let tok_line = tok.line;
424        let tok_column = tok.column;
425        let tok_len = tok.text.len();
426
427        // Check if it looks like a float literal (contains . or scientific notation)
428        // Must check this BEFORE integer parsing
429        if let Some(f) = is_float_literal(token)
430            .then(|| token.parse::<f64>().ok())
431            .flatten()
432        {
433            return Ok(Statement::FloatLiteral(f));
434        }
435
436        // Try to parse as hex literal (0x or 0X prefix)
437        if let Some(hex) = token
438            .strip_prefix("0x")
439            .or_else(|| token.strip_prefix("0X"))
440        {
441            return i64::from_str_radix(hex, 16)
442                .map(Statement::IntLiteral)
443                .map_err(|_| format!("Invalid hex literal: {}", token));
444        }
445
446        // Try to parse as binary literal (0b or 0B prefix)
447        if let Some(bin) = token
448            .strip_prefix("0b")
449            .or_else(|| token.strip_prefix("0B"))
450        {
451            return i64::from_str_radix(bin, 2)
452                .map(Statement::IntLiteral)
453                .map_err(|_| format!("Invalid binary literal: {}", token));
454        }
455
456        // Try to parse as decimal integer literal
457        if let Ok(n) = token.parse::<i64>() {
458            return Ok(Statement::IntLiteral(n));
459        }
460
461        // Try to parse as boolean literal
462        if token == "true" {
463            return Ok(Statement::BoolLiteral(true));
464        }
465        if token == "false" {
466            return Ok(Statement::BoolLiteral(false));
467        }
468
469        // Try to parse as symbol literal (:foo, :some-name)
470        if token == ":" {
471            // Get the next token as the symbol name
472            let name_tok = self
473                .advance_token()
474                .ok_or("Expected symbol name after ':', got end of input")?;
475            let name = &name_tok.text;
476            // Validate symbol name (identifier-like, kebab-case allowed)
477            if name.is_empty() {
478                return Err("Symbol name cannot be empty".to_string());
479            }
480            if name.starts_with(|c: char| c.is_ascii_digit()) {
481                return Err(format!(
482                    "Symbol name cannot start with a digit: ':{}'\n  Hint: Symbol names must start with a letter",
483                    name
484                ));
485            }
486            if let Some(bad_char) = name.chars().find(|c| {
487                !c.is_alphanumeric()
488                    && *c != '-'
489                    && *c != '_'
490                    && *c != '.'
491                    && *c != '?'
492                    && *c != '!'
493            }) {
494                return Err(format!(
495                    "Symbol name contains invalid character '{}': ':{}'\n  Hint: Allowed: letters, digits, - _ . ? !",
496                    bad_char, name
497                ));
498            }
499            return Ok(Statement::Symbol(name.clone()));
500        }
501
502        // Try to parse as string literal
503        if token.starts_with('"') {
504            // Validate token has at least opening and closing quotes
505            if token.len() < 2 || !token.ends_with('"') {
506                return Err(format!("Malformed string literal: {}", token));
507            }
508            // Strip exactly one quote from each end (not all quotes, which would
509            // incorrectly handle escaped quotes at string boundaries like "hello\"")
510            let raw = &token[1..token.len() - 1];
511            let unescaped = unescape_string(raw)?;
512            return Ok(Statement::StringLiteral(unescaped));
513        }
514
515        // Check for conditional
516        if token == "if" {
517            return self.parse_if(tok_line, tok_column);
518        }
519
520        // Check for quotation
521        if token == "[" {
522            return self.parse_quotation(tok_line, tok_column);
523        }
524
525        // Check for match expression
526        if token == "match" {
527            return self.parse_match(tok_line, tok_column);
528        }
529
530        // Otherwise it's a word call - preserve source span for precise diagnostics
531        Ok(Statement::WordCall {
532            name: token.to_string(),
533            span: Some(Span::new(tok_line, tok_column, tok_len)),
534        })
535    }
536
537    fn parse_if(&mut self, start_line: usize, start_column: usize) -> Result<Statement, String> {
538        let mut then_branch = Vec::new();
539
540        // Parse then branch until 'else' or 'then'
541        loop {
542            if self.is_at_end() {
543                return Err("Unexpected end of file in 'if' statement".to_string());
544            }
545
546            // Skip comments and newlines
547            self.skip_comments();
548
549            if self.check("else") {
550                self.advance();
551                // Parse else branch
552                break;
553            }
554
555            if self.check("then") {
556                self.advance();
557                // End of if without else
558                return Ok(Statement::If {
559                    then_branch,
560                    else_branch: None,
561                    span: Some(Span::new(start_line, start_column, "if".len())),
562                });
563            }
564
565            then_branch.push(self.parse_statement()?);
566        }
567
568        // Parse else branch until 'then'
569        let mut else_branch = Vec::new();
570        loop {
571            if self.is_at_end() {
572                return Err("Unexpected end of file in 'else' branch".to_string());
573            }
574
575            // Skip comments and newlines
576            self.skip_comments();
577
578            if self.check("then") {
579                self.advance();
580                return Ok(Statement::If {
581                    then_branch,
582                    else_branch: Some(else_branch),
583                    span: Some(Span::new(start_line, start_column, "if".len())),
584                });
585            }
586
587            else_branch.push(self.parse_statement()?);
588        }
589    }
590
591    fn parse_quotation(
592        &mut self,
593        start_line: usize,
594        start_column: usize,
595    ) -> Result<Statement, String> {
596        use crate::ast::QuotationSpan;
597        let mut body = Vec::new();
598
599        // Parse statements until ']'
600        loop {
601            if self.is_at_end() {
602                return Err("Unexpected end of file in quotation".to_string());
603            }
604
605            // Skip comments and newlines
606            self.skip_comments();
607
608            if self.check("]") {
609                let end_tok = self.advance_token().unwrap();
610                let end_line = end_tok.line;
611                let end_column = end_tok.column + 1; // exclusive
612                let id = self.next_quotation_id;
613                self.next_quotation_id += 1;
614                // Span from '[' to ']' inclusive
615                let span = QuotationSpan::new(start_line, start_column, end_line, end_column);
616                return Ok(Statement::Quotation {
617                    id,
618                    body,
619                    span: Some(span),
620                });
621            }
622
623            body.push(self.parse_statement()?);
624        }
625    }
626
627    /// Parse a match expression:
628    ///   match
629    ///     Get -> send-response
630    ///     Increment -> do-increment send-response
631    ///     Report -> aggregate-add
632    ///   end
633    fn parse_match(&mut self, start_line: usize, start_column: usize) -> Result<Statement, String> {
634        let mut arms = Vec::new();
635
636        loop {
637            self.skip_comments();
638
639            // Check for 'end' to terminate match
640            if self.check("end") {
641                self.advance();
642                break;
643            }
644
645            if self.is_at_end() {
646                return Err("Unexpected end of file in match expression".to_string());
647            }
648
649            arms.push(self.parse_match_arm()?);
650        }
651
652        if arms.is_empty() {
653            return Err("Match expression must have at least one arm".to_string());
654        }
655
656        Ok(Statement::Match {
657            arms,
658            span: Some(Span::new(start_line, start_column, "match".len())),
659        })
660    }
661
662    /// Parse a single match arm:
663    ///   Get -> send-response
664    ///   or with bindings:
665    ///   Get { chan } -> chan send-response
666    fn parse_match_arm(&mut self) -> Result<MatchArm, String> {
667        // Get variant name with position info
668        let variant_token = self
669            .advance_token()
670            .ok_or("Expected variant name in match arm")?;
671        let variant_name = variant_token.text.clone();
672        let arm_line = variant_token.line;
673        let arm_column = variant_token.column;
674        let arm_length = variant_name.len();
675
676        self.skip_comments();
677
678        // Check for optional bindings: { field1 field2 }
679        let pattern = if self.check("{") {
680            self.consume("{");
681            let mut bindings = Vec::new();
682
683            loop {
684                self.skip_comments();
685
686                if self.check("}") {
687                    break;
688                }
689
690                if self.is_at_end() {
691                    return Err(format!(
692                        "Unexpected end of file in match arm bindings for '{}'",
693                        variant_name
694                    ));
695                }
696
697                let token = self.advance().ok_or("Expected binding name")?.clone();
698
699                // Require > prefix to make clear these are stack extractions, not variables
700                if let Some(field_name) = token.strip_prefix('>') {
701                    if field_name.is_empty() {
702                        return Err(format!(
703                            "Expected field name after '>' in match bindings for '{}'",
704                            variant_name
705                        ));
706                    }
707                    bindings.push(field_name.to_string());
708                } else {
709                    return Err(format!(
710                        "Match bindings must use '>' prefix to indicate stack extraction. \
711                         Use '>{}' instead of '{}' in pattern for '{}'",
712                        token, token, variant_name
713                    ));
714                }
715            }
716
717            self.consume("}");
718            Pattern::VariantWithBindings {
719                name: variant_name,
720                bindings,
721            }
722        } else {
723            Pattern::Variant(variant_name.clone())
724        };
725
726        self.skip_comments();
727
728        // Expect '->' arrow
729        if !self.consume("->") {
730            return Err(format!(
731                "Expected '->' after pattern '{}', got '{}'",
732                match &pattern {
733                    Pattern::Variant(n) => n.clone(),
734                    Pattern::VariantWithBindings { name, .. } => name.clone(),
735                },
736                self.current()
737            ));
738        }
739
740        // Parse body until next pattern or 'end'
741        let mut body = Vec::new();
742        loop {
743            self.skip_comments();
744
745            // Check for end of arm (next pattern starts with uppercase, or 'end')
746            if self.check("end") {
747                break;
748            }
749
750            // Check if next token looks like a match pattern (not just any uppercase word).
751            // A pattern is: UppercaseName followed by '->' or '{'
752            // This prevents confusing 'Make-Get' (constructor call) with a pattern.
753            if let Some(token) = self.current_token()
754                && let Some(first_char) = token.text.chars().next()
755                && first_char.is_uppercase()
756            {
757                // Peek at next token to see if this is a pattern (followed by -> or {)
758                if let Some(next) = self.peek_at(1)
759                    && (next == "->" || next == "{")
760                {
761                    // This is the next pattern
762                    break;
763                }
764                // Otherwise it's just an uppercase word call (like Make-Get), continue parsing body
765            }
766
767            if self.is_at_end() {
768                return Err("Unexpected end of file in match arm body".to_string());
769            }
770
771            body.push(self.parse_statement()?);
772        }
773
774        Ok(MatchArm {
775            pattern,
776            body,
777            span: Some(Span::new(arm_line, arm_column, arm_length)),
778        })
779    }
780
781    /// Parse a stack effect declaration: ( ..a Int -- ..a Bool )
782    /// With optional computational effects: ( ..a Int -- ..a Bool | Yield Int )
783    fn parse_stack_effect(&mut self) -> Result<Effect, String> {
784        // Consume '('
785        if !self.consume("(") {
786            return Err("Expected '(' to start stack effect".to_string());
787        }
788
789        // Parse input stack types (until '--' or ')')
790        let (input_row_var, input_types) =
791            self.parse_type_list_until(&["--", ")"], "stack effect inputs", 0)?;
792
793        // Consume '--'
794        if !self.consume("--") {
795            return Err("Expected '--' separator in stack effect".to_string());
796        }
797
798        // Parse output stack types (until ')' or '|')
799        let (output_row_var, output_types) =
800            self.parse_type_list_until(&[")", "|"], "stack effect outputs", 0)?;
801
802        // Parse optional computational effects after '|'
803        let effects = if self.consume("|") {
804            self.parse_effect_annotations()?
805        } else {
806            Vec::new()
807        };
808
809        // Consume ')'
810        if !self.consume(")") {
811            return Err("Expected ')' to end stack effect".to_string());
812        }
813
814        // Build input and output StackTypes
815        let inputs = self.build_stack_type(input_row_var, input_types);
816        let outputs = self.build_stack_type(output_row_var, output_types);
817
818        Ok(Effect::with_effects(inputs, outputs, effects))
819    }
820
821    /// Parse computational effect annotations after '|'
822    /// Example: | Yield Int
823    fn parse_effect_annotations(&mut self) -> Result<Vec<SideEffect>, String> {
824        let mut effects = Vec::new();
825
826        // Parse effects until we hit ')'
827        while let Some(token) = self.peek_at(0) {
828            if token == ")" {
829                break;
830            }
831
832            match token {
833                "Yield" => {
834                    self.advance(); // consume "Yield"
835                    // Parse the yield type
836                    if let Some(type_token) = self.current_token() {
837                        if type_token.text == ")" {
838                            return Err("Expected type after 'Yield'".to_string());
839                        }
840                        let type_token = type_token.clone();
841                        self.advance();
842                        let yield_type = self.parse_type(&type_token)?;
843                        effects.push(SideEffect::Yield(Box::new(yield_type)));
844                    } else {
845                        return Err("Expected type after 'Yield'".to_string());
846                    }
847                }
848                _ => {
849                    return Err(format!("Unknown effect '{}'. Expected 'Yield'", token));
850                }
851            }
852        }
853
854        if effects.is_empty() {
855            return Err("Expected at least one effect after '|'".to_string());
856        }
857
858        Ok(effects)
859    }
860
861    /// Parse a single type token into a Type
862    fn parse_type(&self, token: &Token) -> Result<Type, String> {
863        match token.text.as_str() {
864            "Int" => Ok(Type::Int),
865            "Float" => Ok(Type::Float),
866            "Bool" => Ok(Type::Bool),
867            "String" => Ok(Type::String),
868            // Reject 'Quotation' - it looks like a type but would be silently treated as a type variable.
869            // Users must use explicit effect syntax like [Int -- Int] instead.
870            "Quotation" => Err(format!(
871                "'Quotation' is not a valid type at line {}, column {}. Use explicit quotation syntax like [Int -- Int] or [ -- ] instead.",
872                token.line + 1,
873                token.column + 1
874            )),
875            _ => {
876                // Check if it's a type variable (starts with uppercase)
877                if let Some(first_char) = token.text.chars().next() {
878                    if first_char.is_uppercase() {
879                        // RFC #345: Check if this is a known union type name
880                        // Union types are nominal and should NOT unify with each other
881                        if self.known_unions.contains(&token.text) {
882                            Ok(Type::Union(token.text.to_string()))
883                        } else {
884                            // Unknown uppercase identifier - treat as type variable
885                            Ok(Type::Var(token.text.to_string()))
886                        }
887                    } else {
888                        Err(format!(
889                            "Unknown type: '{}' at line {}, column {}. Expected Int, Bool, String, Closure, or a type variable (uppercase)",
890                            token.text.escape_default(),
891                            token.line + 1, // 1-indexed for user display
892                            token.column + 1
893                        ))
894                    }
895                } else {
896                    Err(format!(
897                        "Invalid type: '{}' at line {}, column {}",
898                        token.text.escape_default(),
899                        token.line + 1,
900                        token.column + 1
901                    ))
902                }
903            }
904        }
905    }
906
907    /// Validate row variable name
908    /// Row variables must start with a lowercase letter and contain only alphanumeric characters
909    fn validate_row_var_name(&self, name: &str) -> Result<(), String> {
910        if name.is_empty() {
911            return Err("Row variable must have a name after '..'".to_string());
912        }
913
914        // Must start with lowercase letter
915        let first_char = name.chars().next().unwrap();
916        if !first_char.is_ascii_lowercase() {
917            return Err(format!(
918                "Row variable '..{}' must start with a lowercase letter (a-z)",
919                name
920            ));
921        }
922
923        // Rest must be alphanumeric or underscore
924        for ch in name.chars() {
925            if !ch.is_alphanumeric() && ch != '_' {
926                return Err(format!(
927                    "Row variable '..{}' can only contain letters, numbers, and underscores",
928                    name
929                ));
930            }
931        }
932
933        // Check for reserved keywords (type names that might confuse users)
934        match name {
935            "Int" | "Bool" | "String" => {
936                return Err(format!(
937                    "Row variable '..{}' cannot use type name as identifier",
938                    name
939                ));
940            }
941            _ => {}
942        }
943
944        Ok(())
945    }
946
947    /// Parse a list of types until one of the given terminators is reached
948    /// Returns (optional row variable, list of types)
949    /// Used by both parse_stack_effect and parse_quotation_type
950    ///
951    /// depth: Current nesting depth for quotation types (0 at top level)
952    fn parse_type_list_until(
953        &mut self,
954        terminators: &[&str],
955        context: &str,
956        depth: usize,
957    ) -> Result<(Option<String>, Vec<Type>), String> {
958        const MAX_QUOTATION_DEPTH: usize = 32;
959
960        if depth > MAX_QUOTATION_DEPTH {
961            return Err(format!(
962                "Quotation type nesting exceeds maximum depth of {} (possible deeply nested types or DOS attack)",
963                MAX_QUOTATION_DEPTH
964            ));
965        }
966
967        let mut types = Vec::new();
968        let mut row_var = None;
969
970        while !terminators.iter().any(|t| self.check(t)) {
971            // Skip comments and blank lines within type lists
972            self.skip_comments();
973
974            // Re-check terminators after skipping comments
975            if terminators.iter().any(|t| self.check(t)) {
976                break;
977            }
978
979            if self.is_at_end() {
980                return Err(format!(
981                    "Unexpected end while parsing {} - expected one of: {}",
982                    context,
983                    terminators.join(", ")
984                ));
985            }
986
987            let token = self
988                .advance_token()
989                .ok_or_else(|| format!("Unexpected end in {}", context))?
990                .clone();
991
992            // Check for row variable: ..name
993            if token.text.starts_with("..") {
994                let var_name = token.text.trim_start_matches("..").to_string();
995                self.validate_row_var_name(&var_name)?;
996                row_var = Some(var_name);
997            } else if token.text == "Closure" {
998                // Closure type: Closure[effect]
999                if !self.consume("[") {
1000                    return Err("Expected '[' after 'Closure' in type signature".to_string());
1001                }
1002                let effect_type = self.parse_quotation_type(depth)?;
1003                match effect_type {
1004                    Type::Quotation(effect) => {
1005                        types.push(Type::Closure {
1006                            effect,
1007                            captures: Vec::new(), // Filled in by type checker
1008                        });
1009                    }
1010                    _ => unreachable!("parse_quotation_type should return Quotation"),
1011                }
1012            } else if token.text == "[" {
1013                // Nested quotation type
1014                types.push(self.parse_quotation_type(depth)?);
1015            } else {
1016                // Parse as concrete type
1017                types.push(self.parse_type(&token)?);
1018            }
1019        }
1020
1021        Ok((row_var, types))
1022    }
1023
1024    /// Parse a quotation type: [inputs -- outputs]
1025    /// Note: The opening '[' has already been consumed
1026    ///
1027    /// depth: Current nesting depth (incremented for each nested quotation)
1028    fn parse_quotation_type(&mut self, depth: usize) -> Result<Type, String> {
1029        // Parse input stack types (until '--' or ']')
1030        let (input_row_var, input_types) =
1031            self.parse_type_list_until(&["--", "]"], "quotation type inputs", depth + 1)?;
1032
1033        // Require '--' separator for clarity
1034        if !self.consume("--") {
1035            // Check if user closed with ] without separator
1036            if self.check("]") {
1037                return Err(
1038                    "Quotation types require '--' separator. Did you mean '[Int -- ]' or '[ -- Int]'?"
1039                        .to_string(),
1040                );
1041            }
1042            return Err("Expected '--' separator in quotation type".to_string());
1043        }
1044
1045        // Parse output stack types (until ']')
1046        let (output_row_var, output_types) =
1047            self.parse_type_list_until(&["]"], "quotation type outputs", depth + 1)?;
1048
1049        // Consume ']'
1050        if !self.consume("]") {
1051            return Err("Expected ']' to end quotation type".to_string());
1052        }
1053
1054        // Build input and output StackTypes
1055        let inputs = self.build_stack_type(input_row_var, input_types);
1056        let outputs = self.build_stack_type(output_row_var, output_types);
1057
1058        Ok(Type::Quotation(Box::new(Effect::new(inputs, outputs))))
1059    }
1060
1061    /// Build a StackType from an optional row variable and a list of types
1062    /// Example: row_var="a", types=[Int, Bool] => RowVar("a") with Int on top of Bool
1063    ///
1064    /// IMPORTANT: ALL stack effects are implicitly row-polymorphic in concatenative languages.
1065    /// This means:
1066    ///   ( -- )        becomes  ( ..rest -- ..rest )       - no-op, preserves stack
1067    ///   ( -- Int )    becomes  ( ..rest -- ..rest Int )   - pushes Int
1068    ///   ( Int -- )    becomes  ( ..rest Int -- ..rest )   - consumes Int
1069    ///   ( Int -- Int) becomes  ( ..rest Int -- ..rest Int ) - transforms top
1070    fn build_stack_type(&self, row_var: Option<String>, types: Vec<Type>) -> StackType {
1071        // Always use row polymorphism - this is fundamental to concatenative semantics
1072        let base = match row_var {
1073            Some(name) => StackType::RowVar(name),
1074            None => StackType::RowVar("rest".to_string()),
1075        };
1076
1077        // Push types onto the stack (bottom to top order)
1078        types.into_iter().fold(base, |stack, ty| stack.push(ty))
1079    }
1080
1081    fn skip_comments(&mut self) {
1082        loop {
1083            // Check for comment: either standalone "#" or token starting with "#"
1084            // The latter handles shebangs like "#!/usr/bin/env seqc"
1085            let is_comment = if self.is_at_end() {
1086                false
1087            } else {
1088                let tok = self.current();
1089                tok == "#" || tok.starts_with("#!")
1090            };
1091
1092            if is_comment {
1093                self.advance(); // consume # or shebang token
1094
1095                // Collect all tokens until newline to reconstruct the comment text
1096                let mut comment_parts: Vec<String> = Vec::new();
1097                while !self.is_at_end() && self.current() != "\n" {
1098                    comment_parts.push(self.current().to_string());
1099                    self.advance();
1100                }
1101                if !self.is_at_end() {
1102                    self.advance(); // skip newline
1103                }
1104
1105                // Join parts and check for seq:allow annotation
1106                // Format: # seq:allow(lint-id) -> parts = ["seq", ":", "allow", "(", "lint-id", ")"]
1107                let comment = comment_parts.join("");
1108                if let Some(lint_id) = comment
1109                    .strip_prefix("seq:allow(")
1110                    .and_then(|s| s.strip_suffix(")"))
1111                {
1112                    self.pending_allowed_lints.push(lint_id.to_string());
1113                }
1114            } else if self.check("\n") {
1115                // Skip blank lines
1116                self.advance();
1117            } else {
1118                break;
1119            }
1120        }
1121    }
1122
1123    fn check(&self, expected: &str) -> bool {
1124        if self.is_at_end() {
1125            return false;
1126        }
1127        self.current() == expected
1128    }
1129
1130    fn consume(&mut self, expected: &str) -> bool {
1131        if self.check(expected) {
1132            self.advance();
1133            true
1134        } else {
1135            false
1136        }
1137    }
1138
1139    /// Get the text of the current token
1140    fn current(&self) -> &str {
1141        if self.is_at_end() {
1142            ""
1143        } else {
1144            &self.tokens[self.pos].text
1145        }
1146    }
1147
1148    /// Get the full current token with position info
1149    fn current_token(&self) -> Option<&Token> {
1150        if self.is_at_end() {
1151            None
1152        } else {
1153            Some(&self.tokens[self.pos])
1154        }
1155    }
1156
1157    /// Peek at a token N positions ahead without consuming
1158    fn peek_at(&self, n: usize) -> Option<&str> {
1159        let idx = self.pos + n;
1160        if idx < self.tokens.len() {
1161            Some(&self.tokens[idx].text)
1162        } else {
1163            None
1164        }
1165    }
1166
1167    /// Advance and return the token text (for compatibility with existing code)
1168    fn advance(&mut self) -> Option<&String> {
1169        if self.is_at_end() {
1170            None
1171        } else {
1172            let token = &self.tokens[self.pos];
1173            self.pos += 1;
1174            Some(&token.text)
1175        }
1176    }
1177
1178    /// Advance and return the full token with position info
1179    fn advance_token(&mut self) -> Option<&Token> {
1180        if self.is_at_end() {
1181            None
1182        } else {
1183            let token = &self.tokens[self.pos];
1184            self.pos += 1;
1185            Some(token)
1186        }
1187    }
1188
1189    fn is_at_end(&self) -> bool {
1190        self.pos >= self.tokens.len()
1191    }
1192}
1193
1194/// Check if a token looks like a float literal
1195///
1196/// Float literals contain either:
1197/// - A decimal point: `3.14`, `.5`, `5.`
1198/// - Scientific notation: `1e10`, `1E-5`, `1.5e3`
1199///
1200/// This check must happen BEFORE integer parsing to avoid
1201/// parsing "5" in "5.0" as an integer.
1202fn is_float_literal(token: &str) -> bool {
1203    // Skip leading minus sign for negative numbers
1204    let s = token.strip_prefix('-').unwrap_or(token);
1205
1206    // Must have at least one digit
1207    if s.is_empty() {
1208        return false;
1209    }
1210
1211    // Check for decimal point or scientific notation
1212    s.contains('.') || s.contains('e') || s.contains('E')
1213}
1214
1215/// Process escape sequences in a string literal
1216///
1217/// Supported escape sequences:
1218/// - `\"` -> `"`  (quote)
1219/// - `\\` -> `\`  (backslash)
1220/// - `\n` -> newline
1221/// - `\r` -> carriage return
1222/// - `\t` -> tab
1223/// - `\xNN` -> Unicode code point U+00NN (hex value 00-FF)
1224///
1225/// # Note on `\xNN` encoding
1226///
1227/// The `\xNN` escape creates a Unicode code point U+00NN, not a raw byte.
1228/// For values 0x00-0x7F (ASCII), this maps directly to the byte value.
1229/// For values 0x80-0xFF (Latin-1 Supplement), the character is stored as
1230/// a multi-byte UTF-8 sequence. For example:
1231/// - `\x41` -> 'A' (1 byte in UTF-8)
1232/// - `\x1b` -> ESC (1 byte in UTF-8, used for ANSI terminal codes)
1233/// - `\xFF` -> 'ÿ' (U+00FF, 2 bytes in UTF-8: 0xC3 0xBF)
1234///
1235/// This matches Python 3 and Rust string behavior. For terminal ANSI codes,
1236/// which are the primary use case, all values are in the ASCII range.
1237///
1238/// # Errors
1239/// Returns error if an unknown escape sequence is encountered
1240fn unescape_string(s: &str) -> Result<String, String> {
1241    let mut result = String::new();
1242    let mut chars = s.chars();
1243
1244    while let Some(ch) = chars.next() {
1245        if ch == '\\' {
1246            match chars.next() {
1247                Some('"') => result.push('"'),
1248                Some('\\') => result.push('\\'),
1249                Some('n') => result.push('\n'),
1250                Some('r') => result.push('\r'),
1251                Some('t') => result.push('\t'),
1252                Some('x') => {
1253                    // Hex escape: \xNN
1254                    let hex1 = chars.next().ok_or_else(|| {
1255                        "Incomplete hex escape sequence '\\x' - expected 2 hex digits".to_string()
1256                    })?;
1257                    let hex2 = chars.next().ok_or_else(|| {
1258                        format!(
1259                            "Incomplete hex escape sequence '\\x{}' - expected 2 hex digits",
1260                            hex1
1261                        )
1262                    })?;
1263
1264                    let hex_str: String = [hex1, hex2].iter().collect();
1265                    let byte_val = u8::from_str_radix(&hex_str, 16).map_err(|_| {
1266                        format!(
1267                            "Invalid hex escape sequence '\\x{}' - expected 2 hex digits (00-FF)",
1268                            hex_str
1269                        )
1270                    })?;
1271
1272                    result.push(byte_val as char);
1273                }
1274                Some(c) => {
1275                    return Err(format!(
1276                        "Unknown escape sequence '\\{}' in string literal. \
1277                         Supported: \\\" \\\\ \\n \\r \\t \\xNN",
1278                        c
1279                    ));
1280                }
1281                None => {
1282                    return Err("String ends with incomplete escape sequence '\\'".to_string());
1283                }
1284            }
1285        } else {
1286            result.push(ch);
1287        }
1288    }
1289
1290    Ok(result)
1291}
1292
1293fn tokenize(source: &str) -> Vec<Token> {
1294    let mut tokens = Vec::new();
1295    let mut current = String::new();
1296    let mut current_start_line = 0;
1297    let mut current_start_col = 0;
1298    let mut in_string = false;
1299    let mut prev_was_backslash = false;
1300
1301    // Track current position (0-indexed)
1302    let mut line = 0;
1303    let mut col = 0;
1304
1305    for ch in source.chars() {
1306        if in_string {
1307            current.push(ch);
1308            if ch == '"' && !prev_was_backslash {
1309                // Unescaped quote ends the string
1310                in_string = false;
1311                tokens.push(Token::new(
1312                    current.clone(),
1313                    current_start_line,
1314                    current_start_col,
1315                ));
1316                current.clear();
1317                prev_was_backslash = false;
1318            } else if ch == '\\' && !prev_was_backslash {
1319                // Start of escape sequence
1320                prev_was_backslash = true;
1321            } else {
1322                // Regular character or escaped character
1323                prev_was_backslash = false;
1324            }
1325            // Track newlines inside strings
1326            if ch == '\n' {
1327                line += 1;
1328                col = 0;
1329            } else {
1330                col += 1;
1331            }
1332        } else if ch == '"' {
1333            if !current.is_empty() {
1334                tokens.push(Token::new(
1335                    current.clone(),
1336                    current_start_line,
1337                    current_start_col,
1338                ));
1339                current.clear();
1340            }
1341            in_string = true;
1342            current_start_line = line;
1343            current_start_col = col;
1344            current.push(ch);
1345            prev_was_backslash = false;
1346            col += 1;
1347        } else if ch.is_whitespace() {
1348            if !current.is_empty() {
1349                tokens.push(Token::new(
1350                    current.clone(),
1351                    current_start_line,
1352                    current_start_col,
1353                ));
1354                current.clear();
1355            }
1356            // Preserve newlines for comment handling
1357            if ch == '\n' {
1358                tokens.push(Token::new("\n".to_string(), line, col));
1359                line += 1;
1360                col = 0;
1361            } else {
1362                col += 1;
1363            }
1364        } else if "():;[]{},".contains(ch) {
1365            if !current.is_empty() {
1366                tokens.push(Token::new(
1367                    current.clone(),
1368                    current_start_line,
1369                    current_start_col,
1370                ));
1371                current.clear();
1372            }
1373            tokens.push(Token::new(ch.to_string(), line, col));
1374            col += 1;
1375        } else {
1376            if current.is_empty() {
1377                current_start_line = line;
1378                current_start_col = col;
1379            }
1380            current.push(ch);
1381            col += 1;
1382        }
1383    }
1384
1385    // Check for unclosed string literal
1386    if in_string {
1387        // Return error by adding a special error token
1388        // The parser will handle this as a parse error
1389        tokens.push(Token::new(
1390            "<<<UNCLOSED_STRING>>>".to_string(),
1391            current_start_line,
1392            current_start_col,
1393        ));
1394    } else if !current.is_empty() {
1395        tokens.push(Token::new(current, current_start_line, current_start_col));
1396    }
1397
1398    tokens
1399}
1400
1401#[cfg(test)]
1402mod tests {
1403    use super::*;
1404
1405    #[test]
1406    fn test_parse_hello_world() {
1407        let source = r#"
1408: main ( -- )
1409  "Hello, World!" write_line ;
1410"#;
1411
1412        let mut parser = Parser::new(source);
1413        let program = parser.parse().unwrap();
1414
1415        assert_eq!(program.words.len(), 1);
1416        assert_eq!(program.words[0].name, "main");
1417        assert_eq!(program.words[0].body.len(), 2);
1418
1419        match &program.words[0].body[0] {
1420            Statement::StringLiteral(s) => assert_eq!(s, "Hello, World!"),
1421            _ => panic!("Expected StringLiteral"),
1422        }
1423
1424        match &program.words[0].body[1] {
1425            Statement::WordCall { name, .. } => assert_eq!(name, "write_line"),
1426            _ => panic!("Expected WordCall"),
1427        }
1428    }
1429
1430    #[test]
1431    fn test_parse_with_numbers() {
1432        let source = ": add-example ( -- ) 2 3 add ;";
1433
1434        let mut parser = Parser::new(source);
1435        let program = parser.parse().unwrap();
1436
1437        assert_eq!(program.words[0].body.len(), 3);
1438        assert_eq!(program.words[0].body[0], Statement::IntLiteral(2));
1439        assert_eq!(program.words[0].body[1], Statement::IntLiteral(3));
1440        assert!(matches!(
1441            &program.words[0].body[2],
1442            Statement::WordCall { name, .. } if name == "add"
1443        ));
1444    }
1445
1446    #[test]
1447    fn test_parse_hex_literals() {
1448        let source = ": test ( -- ) 0xFF 0x10 0X1A ;";
1449        let mut parser = Parser::new(source);
1450        let program = parser.parse().unwrap();
1451
1452        assert_eq!(program.words[0].body[0], Statement::IntLiteral(255));
1453        assert_eq!(program.words[0].body[1], Statement::IntLiteral(16));
1454        assert_eq!(program.words[0].body[2], Statement::IntLiteral(26));
1455    }
1456
1457    #[test]
1458    fn test_parse_binary_literals() {
1459        let source = ": test ( -- ) 0b1010 0B1111 0b0 ;";
1460        let mut parser = Parser::new(source);
1461        let program = parser.parse().unwrap();
1462
1463        assert_eq!(program.words[0].body[0], Statement::IntLiteral(10));
1464        assert_eq!(program.words[0].body[1], Statement::IntLiteral(15));
1465        assert_eq!(program.words[0].body[2], Statement::IntLiteral(0));
1466    }
1467
1468    #[test]
1469    fn test_parse_invalid_hex_literal() {
1470        let source = ": test ( -- ) 0xGG ;";
1471        let mut parser = Parser::new(source);
1472        let err = parser.parse().unwrap_err();
1473        assert!(err.contains("Invalid hex literal"));
1474    }
1475
1476    #[test]
1477    fn test_parse_invalid_binary_literal() {
1478        let source = ": test ( -- ) 0b123 ;";
1479        let mut parser = Parser::new(source);
1480        let err = parser.parse().unwrap_err();
1481        assert!(err.contains("Invalid binary literal"));
1482    }
1483
1484    #[test]
1485    fn test_parse_escaped_quotes() {
1486        let source = r#": main ( -- ) "Say \"hello\" there" write_line ;"#;
1487
1488        let mut parser = Parser::new(source);
1489        let program = parser.parse().unwrap();
1490
1491        assert_eq!(program.words.len(), 1);
1492        assert_eq!(program.words[0].body.len(), 2);
1493
1494        match &program.words[0].body[0] {
1495            // Escape sequences should be processed: \" becomes actual quote
1496            Statement::StringLiteral(s) => assert_eq!(s, "Say \"hello\" there"),
1497            _ => panic!("Expected StringLiteral with escaped quotes"),
1498        }
1499    }
1500
1501    /// Regression test for issue #117: escaped quote at end of string
1502    /// Previously failed with "String ends with incomplete escape sequence"
1503    #[test]
1504    fn test_escaped_quote_at_end_of_string() {
1505        let source = r#": main ( -- ) "hello\"" io.write-line ;"#;
1506
1507        let mut parser = Parser::new(source);
1508        let program = parser.parse().unwrap();
1509
1510        assert_eq!(program.words.len(), 1);
1511        match &program.words[0].body[0] {
1512            Statement::StringLiteral(s) => assert_eq!(s, "hello\""),
1513            _ => panic!("Expected StringLiteral ending with escaped quote"),
1514        }
1515    }
1516
1517    /// Test escaped quote at start of string (boundary case)
1518    #[test]
1519    fn test_escaped_quote_at_start_of_string() {
1520        let source = r#": main ( -- ) "\"hello" io.write-line ;"#;
1521
1522        let mut parser = Parser::new(source);
1523        let program = parser.parse().unwrap();
1524
1525        match &program.words[0].body[0] {
1526            Statement::StringLiteral(s) => assert_eq!(s, "\"hello"),
1527            _ => panic!("Expected StringLiteral starting with escaped quote"),
1528        }
1529    }
1530
1531    #[test]
1532    fn test_escape_sequences() {
1533        let source = r#": main ( -- ) "Line 1\nLine 2\tTabbed" write_line ;"#;
1534
1535        let mut parser = Parser::new(source);
1536        let program = parser.parse().unwrap();
1537
1538        match &program.words[0].body[0] {
1539            Statement::StringLiteral(s) => assert_eq!(s, "Line 1\nLine 2\tTabbed"),
1540            _ => panic!("Expected StringLiteral"),
1541        }
1542    }
1543
1544    #[test]
1545    fn test_unknown_escape_sequence() {
1546        let source = r#": main ( -- ) "Bad \q sequence" write_line ;"#;
1547
1548        let mut parser = Parser::new(source);
1549        let result = parser.parse();
1550
1551        assert!(result.is_err());
1552        assert!(result.unwrap_err().contains("Unknown escape sequence"));
1553    }
1554
1555    #[test]
1556    fn test_hex_escape_sequence() {
1557        // \x1b is ESC (27), \x41 is 'A' (65)
1558        let source = r#": main ( -- ) "\x1b[2K\x41" io.write-line ;"#;
1559
1560        let mut parser = Parser::new(source);
1561        let program = parser.parse().unwrap();
1562
1563        match &program.words[0].body[0] {
1564            Statement::StringLiteral(s) => {
1565                assert_eq!(s.len(), 5); // ESC [ 2 K A
1566                assert_eq!(s.as_bytes()[0], 0x1b); // ESC
1567                assert_eq!(s.as_bytes()[4], 0x41); // 'A'
1568            }
1569            _ => panic!("Expected StringLiteral"),
1570        }
1571    }
1572
1573    #[test]
1574    fn test_hex_escape_null_byte() {
1575        let source = r#": main ( -- ) "before\x00after" io.write-line ;"#;
1576
1577        let mut parser = Parser::new(source);
1578        let program = parser.parse().unwrap();
1579
1580        match &program.words[0].body[0] {
1581            Statement::StringLiteral(s) => {
1582                assert_eq!(s.len(), 12); // "before" + NUL + "after"
1583                assert_eq!(s.as_bytes()[6], 0x00);
1584            }
1585            _ => panic!("Expected StringLiteral"),
1586        }
1587    }
1588
1589    #[test]
1590    fn test_hex_escape_uppercase() {
1591        // Both uppercase and lowercase hex digits should work
1592        // Note: Values > 0x7F become Unicode code points (U+00NN), multi-byte in UTF-8
1593        let source = r#": main ( -- ) "\x41\x42\x4F" io.write-line ;"#;
1594
1595        let mut parser = Parser::new(source);
1596        let program = parser.parse().unwrap();
1597
1598        match &program.words[0].body[0] {
1599            Statement::StringLiteral(s) => {
1600                assert_eq!(s, "ABO"); // 0x41='A', 0x42='B', 0x4F='O'
1601            }
1602            _ => panic!("Expected StringLiteral"),
1603        }
1604    }
1605
1606    #[test]
1607    fn test_hex_escape_high_bytes() {
1608        // Values > 0x7F become Unicode code points (Latin-1), which are multi-byte in UTF-8
1609        let source = r#": main ( -- ) "\xFF" io.write-line ;"#;
1610
1611        let mut parser = Parser::new(source);
1612        let program = parser.parse().unwrap();
1613
1614        match &program.words[0].body[0] {
1615            Statement::StringLiteral(s) => {
1616                // \xFF becomes U+00FF (ÿ), which is 2 bytes in UTF-8: C3 BF
1617                assert_eq!(s, "\u{00FF}");
1618                assert_eq!(s.chars().next().unwrap(), 'ÿ');
1619            }
1620            _ => panic!("Expected StringLiteral"),
1621        }
1622    }
1623
1624    #[test]
1625    fn test_hex_escape_incomplete() {
1626        // \x with only one hex digit
1627        let source = r#": main ( -- ) "\x1" io.write-line ;"#;
1628
1629        let mut parser = Parser::new(source);
1630        let result = parser.parse();
1631
1632        assert!(result.is_err());
1633        assert!(result.unwrap_err().contains("Incomplete hex escape"));
1634    }
1635
1636    #[test]
1637    fn test_hex_escape_invalid_digits() {
1638        // \xGG is not valid hex
1639        let source = r#": main ( -- ) "\xGG" io.write-line ;"#;
1640
1641        let mut parser = Parser::new(source);
1642        let result = parser.parse();
1643
1644        assert!(result.is_err());
1645        assert!(result.unwrap_err().contains("Invalid hex escape"));
1646    }
1647
1648    #[test]
1649    fn test_hex_escape_at_end_of_string() {
1650        // \x at end of string with no digits
1651        let source = r#": main ( -- ) "test\x" io.write-line ;"#;
1652
1653        let mut parser = Parser::new(source);
1654        let result = parser.parse();
1655
1656        assert!(result.is_err());
1657        assert!(result.unwrap_err().contains("Incomplete hex escape"));
1658    }
1659
1660    #[test]
1661    fn test_unclosed_string_literal() {
1662        let source = r#": main ( -- ) "unclosed string ;"#;
1663
1664        let mut parser = Parser::new(source);
1665        let result = parser.parse();
1666
1667        assert!(result.is_err());
1668        let err_msg = result.unwrap_err();
1669        assert!(err_msg.contains("Unclosed string literal"));
1670        // Should include position information (line 1, column 15 for the opening quote)
1671        assert!(
1672            err_msg.contains("line 1"),
1673            "Expected line number in error: {}",
1674            err_msg
1675        );
1676        assert!(
1677            err_msg.contains("column 15"),
1678            "Expected column number in error: {}",
1679            err_msg
1680        );
1681    }
1682
1683    #[test]
1684    fn test_multiple_word_definitions() {
1685        let source = r#"
1686: double ( Int -- Int )
1687  2 multiply ;
1688
1689: quadruple ( Int -- Int )
1690  double double ;
1691"#;
1692
1693        let mut parser = Parser::new(source);
1694        let program = parser.parse().unwrap();
1695
1696        assert_eq!(program.words.len(), 2);
1697        assert_eq!(program.words[0].name, "double");
1698        assert_eq!(program.words[1].name, "quadruple");
1699
1700        // Verify stack effects were parsed
1701        assert!(program.words[0].effect.is_some());
1702        assert!(program.words[1].effect.is_some());
1703    }
1704
1705    #[test]
1706    fn test_user_word_calling_user_word() {
1707        let source = r#"
1708: helper ( -- )
1709  "helper called" write_line ;
1710
1711: main ( -- )
1712  helper ;
1713"#;
1714
1715        let mut parser = Parser::new(source);
1716        let program = parser.parse().unwrap();
1717
1718        assert_eq!(program.words.len(), 2);
1719
1720        // Check main calls helper
1721        match &program.words[1].body[0] {
1722            Statement::WordCall { name, .. } => assert_eq!(name, "helper"),
1723            _ => panic!("Expected WordCall to helper"),
1724        }
1725    }
1726
1727    #[test]
1728    fn test_parse_simple_stack_effect() {
1729        // Test: ( Int -- Bool )
1730        // With implicit row polymorphism, this becomes: ( ..rest Int -- ..rest Bool )
1731        let source = ": test ( Int -- Bool ) 1 ;";
1732        let mut parser = Parser::new(source);
1733        let program = parser.parse().unwrap();
1734
1735        assert_eq!(program.words.len(), 1);
1736        let word = &program.words[0];
1737        assert!(word.effect.is_some());
1738
1739        let effect = word.effect.as_ref().unwrap();
1740
1741        // Input: Int on RowVar("rest") (implicit row polymorphism)
1742        assert_eq!(
1743            effect.inputs,
1744            StackType::Cons {
1745                rest: Box::new(StackType::RowVar("rest".to_string())),
1746                top: Type::Int
1747            }
1748        );
1749
1750        // Output: Bool on RowVar("rest") (implicit row polymorphism)
1751        assert_eq!(
1752            effect.outputs,
1753            StackType::Cons {
1754                rest: Box::new(StackType::RowVar("rest".to_string())),
1755                top: Type::Bool
1756            }
1757        );
1758    }
1759
1760    #[test]
1761    fn test_parse_row_polymorphic_stack_effect() {
1762        // Test: ( ..a Int -- ..a Bool )
1763        let source = ": test ( ..a Int -- ..a Bool ) 1 ;";
1764        let mut parser = Parser::new(source);
1765        let program = parser.parse().unwrap();
1766
1767        assert_eq!(program.words.len(), 1);
1768        let word = &program.words[0];
1769        assert!(word.effect.is_some());
1770
1771        let effect = word.effect.as_ref().unwrap();
1772
1773        // Input: Int on RowVar("a")
1774        assert_eq!(
1775            effect.inputs,
1776            StackType::Cons {
1777                rest: Box::new(StackType::RowVar("a".to_string())),
1778                top: Type::Int
1779            }
1780        );
1781
1782        // Output: Bool on RowVar("a")
1783        assert_eq!(
1784            effect.outputs,
1785            StackType::Cons {
1786                rest: Box::new(StackType::RowVar("a".to_string())),
1787                top: Type::Bool
1788            }
1789        );
1790    }
1791
1792    #[test]
1793    fn test_parse_invalid_row_var_starts_with_digit() {
1794        // Test: Row variable cannot start with digit
1795        let source = ": test ( ..123 Int -- ) ;";
1796        let mut parser = Parser::new(source);
1797        let result = parser.parse();
1798
1799        assert!(result.is_err());
1800        let err_msg = result.unwrap_err();
1801        assert!(
1802            err_msg.contains("lowercase letter"),
1803            "Expected error about lowercase letter, got: {}",
1804            err_msg
1805        );
1806    }
1807
1808    #[test]
1809    fn test_parse_invalid_row_var_starts_with_uppercase() {
1810        // Test: Row variable cannot start with uppercase (that's a type variable)
1811        let source = ": test ( ..Int Int -- ) ;";
1812        let mut parser = Parser::new(source);
1813        let result = parser.parse();
1814
1815        assert!(result.is_err());
1816        let err_msg = result.unwrap_err();
1817        assert!(
1818            err_msg.contains("lowercase letter") || err_msg.contains("type name"),
1819            "Expected error about lowercase letter or type name, got: {}",
1820            err_msg
1821        );
1822    }
1823
1824    #[test]
1825    fn test_parse_invalid_row_var_with_special_chars() {
1826        // Test: Row variable cannot contain special characters
1827        let source = ": test ( ..a-b Int -- ) ;";
1828        let mut parser = Parser::new(source);
1829        let result = parser.parse();
1830
1831        assert!(result.is_err());
1832        let err_msg = result.unwrap_err();
1833        assert!(
1834            err_msg.contains("letters, numbers, and underscores")
1835                || err_msg.contains("Unknown type"),
1836            "Expected error about valid characters, got: {}",
1837            err_msg
1838        );
1839    }
1840
1841    #[test]
1842    fn test_parse_valid_row_var_with_underscore() {
1843        // Test: Row variable CAN contain underscore
1844        let source = ": test ( ..my_row Int -- ..my_row Bool ) ;";
1845        let mut parser = Parser::new(source);
1846        let result = parser.parse();
1847
1848        assert!(result.is_ok(), "Should accept row variable with underscore");
1849    }
1850
1851    #[test]
1852    fn test_parse_multiple_types_stack_effect() {
1853        // Test: ( Int String -- Bool )
1854        // With implicit row polymorphism: ( ..rest Int String -- ..rest Bool )
1855        let source = ": test ( Int String -- Bool ) 1 ;";
1856        let mut parser = Parser::new(source);
1857        let program = parser.parse().unwrap();
1858
1859        let effect = program.words[0].effect.as_ref().unwrap();
1860
1861        // Input: String on Int on RowVar("rest")
1862        let (rest, top) = effect.inputs.clone().pop().unwrap();
1863        assert_eq!(top, Type::String);
1864        let (rest2, top2) = rest.pop().unwrap();
1865        assert_eq!(top2, Type::Int);
1866        assert_eq!(rest2, StackType::RowVar("rest".to_string()));
1867
1868        // Output: Bool on RowVar("rest") (implicit row polymorphism)
1869        assert_eq!(
1870            effect.outputs,
1871            StackType::Cons {
1872                rest: Box::new(StackType::RowVar("rest".to_string())),
1873                top: Type::Bool
1874            }
1875        );
1876    }
1877
1878    #[test]
1879    fn test_parse_type_variable() {
1880        // Test: ( ..a T -- ..a T T ) for dup
1881        let source = ": dup ( ..a T -- ..a T T ) ;";
1882        let mut parser = Parser::new(source);
1883        let program = parser.parse().unwrap();
1884
1885        let effect = program.words[0].effect.as_ref().unwrap();
1886
1887        // Input: T on RowVar("a")
1888        assert_eq!(
1889            effect.inputs,
1890            StackType::Cons {
1891                rest: Box::new(StackType::RowVar("a".to_string())),
1892                top: Type::Var("T".to_string())
1893            }
1894        );
1895
1896        // Output: T on T on RowVar("a")
1897        let (rest, top) = effect.outputs.clone().pop().unwrap();
1898        assert_eq!(top, Type::Var("T".to_string()));
1899        let (rest2, top2) = rest.pop().unwrap();
1900        assert_eq!(top2, Type::Var("T".to_string()));
1901        assert_eq!(rest2, StackType::RowVar("a".to_string()));
1902    }
1903
1904    #[test]
1905    fn test_parse_empty_stack_effect() {
1906        // Test: ( -- )
1907        // In concatenative languages, even empty effects are row-polymorphic
1908        // ( -- ) means ( ..rest -- ..rest ) - preserves stack
1909        let source = ": test ( -- ) ;";
1910        let mut parser = Parser::new(source);
1911        let program = parser.parse().unwrap();
1912
1913        let effect = program.words[0].effect.as_ref().unwrap();
1914
1915        // Both inputs and outputs should use the same implicit row variable
1916        assert_eq!(effect.inputs, StackType::RowVar("rest".to_string()));
1917        assert_eq!(effect.outputs, StackType::RowVar("rest".to_string()));
1918    }
1919
1920    #[test]
1921    fn test_parse_invalid_type() {
1922        // Test invalid type (lowercase, not a row var)
1923        let source = ": test ( invalid -- Bool ) ;";
1924        let mut parser = Parser::new(source);
1925        let result = parser.parse();
1926
1927        assert!(result.is_err());
1928        assert!(result.unwrap_err().contains("Unknown type"));
1929    }
1930
1931    #[test]
1932    fn test_parse_unclosed_stack_effect() {
1933        // Test unclosed stack effect - parser tries to parse all tokens until ')' or EOF
1934        // In this case, it encounters "body" which is an invalid type
1935        let source = ": test ( Int -- Bool body ;";
1936        let mut parser = Parser::new(source);
1937        let result = parser.parse();
1938
1939        assert!(result.is_err());
1940        let err_msg = result.unwrap_err();
1941        // Parser will try to parse "body" as a type and fail
1942        assert!(err_msg.contains("Unknown type"));
1943    }
1944
1945    #[test]
1946    fn test_parse_simple_quotation_type() {
1947        // Test: ( [Int -- Int] -- )
1948        let source = ": apply ( [Int -- Int] -- ) ;";
1949        let mut parser = Parser::new(source);
1950        let program = parser.parse().unwrap();
1951
1952        let effect = program.words[0].effect.as_ref().unwrap();
1953
1954        // Input should be: Quotation(Int -- Int) on RowVar("rest")
1955        let (rest, top) = effect.inputs.clone().pop().unwrap();
1956        match top {
1957            Type::Quotation(quot_effect) => {
1958                // Check quotation's input: Int on RowVar("rest")
1959                assert_eq!(
1960                    quot_effect.inputs,
1961                    StackType::Cons {
1962                        rest: Box::new(StackType::RowVar("rest".to_string())),
1963                        top: Type::Int
1964                    }
1965                );
1966                // Check quotation's output: Int on RowVar("rest")
1967                assert_eq!(
1968                    quot_effect.outputs,
1969                    StackType::Cons {
1970                        rest: Box::new(StackType::RowVar("rest".to_string())),
1971                        top: Type::Int
1972                    }
1973                );
1974            }
1975            _ => panic!("Expected Quotation type, got {:?}", top),
1976        }
1977        assert_eq!(rest, StackType::RowVar("rest".to_string()));
1978    }
1979
1980    #[test]
1981    fn test_parse_quotation_type_with_row_vars() {
1982        // Test: ( ..a [..a T -- ..a Bool] -- ..a )
1983        let source = ": test ( ..a [..a T -- ..a Bool] -- ..a ) ;";
1984        let mut parser = Parser::new(source);
1985        let program = parser.parse().unwrap();
1986
1987        let effect = program.words[0].effect.as_ref().unwrap();
1988
1989        // Input: Quotation on RowVar("a")
1990        let (rest, top) = effect.inputs.clone().pop().unwrap();
1991        match top {
1992            Type::Quotation(quot_effect) => {
1993                // Check quotation's input: T on RowVar("a")
1994                let (q_in_rest, q_in_top) = quot_effect.inputs.clone().pop().unwrap();
1995                assert_eq!(q_in_top, Type::Var("T".to_string()));
1996                assert_eq!(q_in_rest, StackType::RowVar("a".to_string()));
1997
1998                // Check quotation's output: Bool on RowVar("a")
1999                let (q_out_rest, q_out_top) = quot_effect.outputs.clone().pop().unwrap();
2000                assert_eq!(q_out_top, Type::Bool);
2001                assert_eq!(q_out_rest, StackType::RowVar("a".to_string()));
2002            }
2003            _ => panic!("Expected Quotation type, got {:?}", top),
2004        }
2005        assert_eq!(rest, StackType::RowVar("a".to_string()));
2006    }
2007
2008    #[test]
2009    fn test_parse_nested_quotation_type() {
2010        // Test: ( [[Int -- Int] -- Bool] -- )
2011        let source = ": nested ( [[Int -- Int] -- Bool] -- ) ;";
2012        let mut parser = Parser::new(source);
2013        let program = parser.parse().unwrap();
2014
2015        let effect = program.words[0].effect.as_ref().unwrap();
2016
2017        // Input: Quotation([Int -- Int] -- Bool) on RowVar("rest")
2018        let (_, top) = effect.inputs.clone().pop().unwrap();
2019        match top {
2020            Type::Quotation(outer_effect) => {
2021                // Outer quotation input: [Int -- Int] on RowVar("rest")
2022                let (_, outer_in_top) = outer_effect.inputs.clone().pop().unwrap();
2023                match outer_in_top {
2024                    Type::Quotation(inner_effect) => {
2025                        // Inner quotation: Int -- Int
2026                        assert!(matches!(
2027                            inner_effect.inputs.clone().pop().unwrap().1,
2028                            Type::Int
2029                        ));
2030                        assert!(matches!(
2031                            inner_effect.outputs.clone().pop().unwrap().1,
2032                            Type::Int
2033                        ));
2034                    }
2035                    _ => panic!("Expected nested Quotation type"),
2036                }
2037
2038                // Outer quotation output: Bool
2039                let (_, outer_out_top) = outer_effect.outputs.clone().pop().unwrap();
2040                assert_eq!(outer_out_top, Type::Bool);
2041            }
2042            _ => panic!("Expected Quotation type"),
2043        }
2044    }
2045
2046    #[test]
2047    fn test_parse_deeply_nested_quotation_type_exceeds_limit() {
2048        // Test: Deeply nested quotation types should fail with max depth error
2049        // Build a quotation type nested 35 levels deep (exceeds MAX_QUOTATION_DEPTH = 32)
2050        let mut source = String::from(": deep ( ");
2051
2052        // Build opening brackets: [[[[[[...
2053        for _ in 0..35 {
2054            source.push_str("[ -- ");
2055        }
2056
2057        source.push_str("Int");
2058
2059        // Build closing brackets: ...]]]]]]
2060        for _ in 0..35 {
2061            source.push_str(" ]");
2062        }
2063
2064        source.push_str(" -- ) ;");
2065
2066        let mut parser = Parser::new(&source);
2067        let result = parser.parse();
2068
2069        // Should fail with depth limit error
2070        assert!(result.is_err());
2071        let err_msg = result.unwrap_err();
2072        assert!(
2073            err_msg.contains("depth") || err_msg.contains("32"),
2074            "Expected depth limit error, got: {}",
2075            err_msg
2076        );
2077    }
2078
2079    #[test]
2080    fn test_parse_empty_quotation_type() {
2081        // Test: ( [ -- ] -- )
2082        // An empty quotation type is also row-polymorphic: [ ..rest -- ..rest ]
2083        let source = ": empty-quot ( [ -- ] -- ) ;";
2084        let mut parser = Parser::new(source);
2085        let program = parser.parse().unwrap();
2086
2087        let effect = program.words[0].effect.as_ref().unwrap();
2088
2089        let (_, top) = effect.inputs.clone().pop().unwrap();
2090        match top {
2091            Type::Quotation(quot_effect) => {
2092                // Empty quotation preserves the stack (row-polymorphic)
2093                assert_eq!(quot_effect.inputs, StackType::RowVar("rest".to_string()));
2094                assert_eq!(quot_effect.outputs, StackType::RowVar("rest".to_string()));
2095            }
2096            _ => panic!("Expected Quotation type"),
2097        }
2098    }
2099
2100    #[test]
2101    fn test_parse_quotation_type_in_output() {
2102        // Test: ( -- [Int -- Int] )
2103        let source = ": maker ( -- [Int -- Int] ) ;";
2104        let mut parser = Parser::new(source);
2105        let program = parser.parse().unwrap();
2106
2107        let effect = program.words[0].effect.as_ref().unwrap();
2108
2109        // Output should be: Quotation(Int -- Int) on RowVar("rest")
2110        let (_, top) = effect.outputs.clone().pop().unwrap();
2111        match top {
2112            Type::Quotation(quot_effect) => {
2113                assert!(matches!(
2114                    quot_effect.inputs.clone().pop().unwrap().1,
2115                    Type::Int
2116                ));
2117                assert!(matches!(
2118                    quot_effect.outputs.clone().pop().unwrap().1,
2119                    Type::Int
2120                ));
2121            }
2122            _ => panic!("Expected Quotation type"),
2123        }
2124    }
2125
2126    #[test]
2127    fn test_parse_unclosed_quotation_type() {
2128        // Test: ( [Int -- Int -- )  (missing ])
2129        let source = ": broken ( [Int -- Int -- ) ;";
2130        let mut parser = Parser::new(source);
2131        let result = parser.parse();
2132
2133        assert!(result.is_err());
2134        let err_msg = result.unwrap_err();
2135        // Parser might error with various messages depending on where it fails
2136        // It should at least indicate a parsing problem
2137        assert!(
2138            err_msg.contains("Unclosed")
2139                || err_msg.contains("Expected")
2140                || err_msg.contains("Unexpected"),
2141            "Got error: {}",
2142            err_msg
2143        );
2144    }
2145
2146    #[test]
2147    fn test_parse_multiple_quotation_types() {
2148        // Test: ( [Int -- Int] [String -- Bool] -- )
2149        let source = ": multi ( [Int -- Int] [String -- Bool] -- ) ;";
2150        let mut parser = Parser::new(source);
2151        let program = parser.parse().unwrap();
2152
2153        let effect = program.words[0].effect.as_ref().unwrap();
2154
2155        // Pop second quotation (String -- Bool)
2156        let (rest, top) = effect.inputs.clone().pop().unwrap();
2157        match top {
2158            Type::Quotation(quot_effect) => {
2159                assert!(matches!(
2160                    quot_effect.inputs.clone().pop().unwrap().1,
2161                    Type::String
2162                ));
2163                assert!(matches!(
2164                    quot_effect.outputs.clone().pop().unwrap().1,
2165                    Type::Bool
2166                ));
2167            }
2168            _ => panic!("Expected Quotation type"),
2169        }
2170
2171        // Pop first quotation (Int -- Int)
2172        let (_, top2) = rest.pop().unwrap();
2173        match top2 {
2174            Type::Quotation(quot_effect) => {
2175                assert!(matches!(
2176                    quot_effect.inputs.clone().pop().unwrap().1,
2177                    Type::Int
2178                ));
2179                assert!(matches!(
2180                    quot_effect.outputs.clone().pop().unwrap().1,
2181                    Type::Int
2182                ));
2183            }
2184            _ => panic!("Expected Quotation type"),
2185        }
2186    }
2187
2188    #[test]
2189    fn test_parse_quotation_type_without_separator() {
2190        // Test: ( [Int] -- ) should be REJECTED
2191        //
2192        // Design decision: The '--' separator is REQUIRED for clarity.
2193        // [Int] looks like a list type in most languages, not a consumer function.
2194        // This would confuse users.
2195        //
2196        // Require explicit syntax:
2197        // - `[Int -- ]` for quotation that consumes Int and produces nothing
2198        // - `[ -- Int]` for quotation that produces Int
2199        // - `[Int -- Int]` for transformation
2200        let source = ": consumer ( [Int] -- ) ;";
2201        let mut parser = Parser::new(source);
2202        let result = parser.parse();
2203
2204        // Should fail with helpful error message
2205        assert!(result.is_err());
2206        let err_msg = result.unwrap_err();
2207        assert!(
2208            err_msg.contains("require") && err_msg.contains("--"),
2209            "Expected error about missing '--' separator, got: {}",
2210            err_msg
2211        );
2212    }
2213
2214    #[test]
2215    fn test_parse_bare_quotation_type_rejected() {
2216        // Test: ( Int Quotation -- Int ) should be REJECTED
2217        //
2218        // 'Quotation' looks like a type name but would be silently treated as a
2219        // type variable without this check. Users must use explicit effect syntax.
2220        let source = ": apply-twice ( Int Quotation -- Int ) ;";
2221        let mut parser = Parser::new(source);
2222        let result = parser.parse();
2223
2224        assert!(result.is_err());
2225        let err_msg = result.unwrap_err();
2226        assert!(
2227            err_msg.contains("Quotation") && err_msg.contains("not a valid type"),
2228            "Expected error about 'Quotation' not being valid, got: {}",
2229            err_msg
2230        );
2231        assert!(
2232            err_msg.contains("[Int -- Int]") || err_msg.contains("[ -- ]"),
2233            "Expected error to suggest explicit syntax, got: {}",
2234            err_msg
2235        );
2236    }
2237
2238    #[test]
2239    fn test_parse_no_stack_effect() {
2240        // Test word without stack effect (should still work)
2241        let source = ": test 1 2 add ;";
2242        let mut parser = Parser::new(source);
2243        let program = parser.parse().unwrap();
2244
2245        assert_eq!(program.words.len(), 1);
2246        assert!(program.words[0].effect.is_none());
2247    }
2248
2249    #[test]
2250    fn test_parse_simple_quotation() {
2251        let source = r#"
2252: test ( -- Quot )
2253  [ 1 add ] ;
2254"#;
2255
2256        let mut parser = Parser::new(source);
2257        let program = parser.parse().unwrap();
2258
2259        assert_eq!(program.words.len(), 1);
2260        assert_eq!(program.words[0].name, "test");
2261        assert_eq!(program.words[0].body.len(), 1);
2262
2263        match &program.words[0].body[0] {
2264            Statement::Quotation { body, .. } => {
2265                assert_eq!(body.len(), 2);
2266                assert_eq!(body[0], Statement::IntLiteral(1));
2267                assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "add"));
2268            }
2269            _ => panic!("Expected Quotation statement"),
2270        }
2271    }
2272
2273    #[test]
2274    fn test_parse_empty_quotation() {
2275        let source = ": test [ ] ;";
2276
2277        let mut parser = Parser::new(source);
2278        let program = parser.parse().unwrap();
2279
2280        assert_eq!(program.words.len(), 1);
2281
2282        match &program.words[0].body[0] {
2283            Statement::Quotation { body, .. } => {
2284                assert_eq!(body.len(), 0);
2285            }
2286            _ => panic!("Expected Quotation statement"),
2287        }
2288    }
2289
2290    #[test]
2291    fn test_parse_quotation_with_call() {
2292        let source = r#"
2293: test ( -- )
2294  5 [ 1 add ] call ;
2295"#;
2296
2297        let mut parser = Parser::new(source);
2298        let program = parser.parse().unwrap();
2299
2300        assert_eq!(program.words.len(), 1);
2301        assert_eq!(program.words[0].body.len(), 3);
2302
2303        assert_eq!(program.words[0].body[0], Statement::IntLiteral(5));
2304
2305        match &program.words[0].body[1] {
2306            Statement::Quotation { body, .. } => {
2307                assert_eq!(body.len(), 2);
2308            }
2309            _ => panic!("Expected Quotation"),
2310        }
2311
2312        assert!(matches!(
2313            &program.words[0].body[2],
2314            Statement::WordCall { name, .. } if name == "call"
2315        ));
2316    }
2317
2318    #[test]
2319    fn test_parse_nested_quotation() {
2320        let source = ": test [ [ 1 add ] call ] ;";
2321
2322        let mut parser = Parser::new(source);
2323        let program = parser.parse().unwrap();
2324
2325        assert_eq!(program.words.len(), 1);
2326
2327        match &program.words[0].body[0] {
2328            Statement::Quotation {
2329                body: outer_body, ..
2330            } => {
2331                assert_eq!(outer_body.len(), 2);
2332
2333                match &outer_body[0] {
2334                    Statement::Quotation {
2335                        body: inner_body, ..
2336                    } => {
2337                        assert_eq!(inner_body.len(), 2);
2338                        assert_eq!(inner_body[0], Statement::IntLiteral(1));
2339                        assert!(
2340                            matches!(&inner_body[1], Statement::WordCall { name, .. } if name == "add")
2341                        );
2342                    }
2343                    _ => panic!("Expected nested Quotation"),
2344                }
2345
2346                assert!(
2347                    matches!(&outer_body[1], Statement::WordCall { name, .. } if name == "call")
2348                );
2349            }
2350            _ => panic!("Expected Quotation"),
2351        }
2352    }
2353
2354    #[test]
2355    fn test_parse_while_with_quotations() {
2356        let source = r#"
2357: countdown ( Int -- )
2358  [ dup 0 > ] [ 1 subtract ] while drop ;
2359"#;
2360
2361        let mut parser = Parser::new(source);
2362        let program = parser.parse().unwrap();
2363
2364        assert_eq!(program.words.len(), 1);
2365        assert_eq!(program.words[0].body.len(), 4);
2366
2367        // First quotation: [ dup 0 > ]
2368        match &program.words[0].body[0] {
2369            Statement::Quotation { body: pred, .. } => {
2370                assert_eq!(pred.len(), 3);
2371                assert!(matches!(&pred[0], Statement::WordCall { name, .. } if name == "dup"));
2372                assert_eq!(pred[1], Statement::IntLiteral(0));
2373                assert!(matches!(&pred[2], Statement::WordCall { name, .. } if name == ">"));
2374            }
2375            _ => panic!("Expected predicate quotation"),
2376        }
2377
2378        // Second quotation: [ 1 subtract ]
2379        match &program.words[0].body[1] {
2380            Statement::Quotation { body, .. } => {
2381                assert_eq!(body.len(), 2);
2382                assert_eq!(body[0], Statement::IntLiteral(1));
2383                assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "subtract"));
2384            }
2385            _ => panic!("Expected body quotation"),
2386        }
2387
2388        // while call
2389        assert!(matches!(
2390            &program.words[0].body[2],
2391            Statement::WordCall { name, .. } if name == "while"
2392        ));
2393
2394        // drop
2395        assert!(matches!(
2396            &program.words[0].body[3],
2397            Statement::WordCall { name, .. } if name == "drop"
2398        ));
2399    }
2400
2401    #[test]
2402    fn test_parse_simple_closure_type() {
2403        // Test: ( Int -- Closure[Int -- Int] )
2404        let source = ": make-adder ( Int -- Closure[Int -- Int] ) ;";
2405        let mut parser = Parser::new(source);
2406        let program = parser.parse().unwrap();
2407
2408        assert_eq!(program.words.len(), 1);
2409        let word = &program.words[0];
2410        assert!(word.effect.is_some());
2411
2412        let effect = word.effect.as_ref().unwrap();
2413
2414        // Input: Int on RowVar("rest")
2415        let (input_rest, input_top) = effect.inputs.clone().pop().unwrap();
2416        assert_eq!(input_top, Type::Int);
2417        assert_eq!(input_rest, StackType::RowVar("rest".to_string()));
2418
2419        // Output: Closure[Int -- Int] on RowVar("rest")
2420        let (output_rest, output_top) = effect.outputs.clone().pop().unwrap();
2421        match output_top {
2422            Type::Closure { effect, captures } => {
2423                // Closure effect: Int -> Int
2424                assert_eq!(
2425                    effect.inputs,
2426                    StackType::Cons {
2427                        rest: Box::new(StackType::RowVar("rest".to_string())),
2428                        top: Type::Int
2429                    }
2430                );
2431                assert_eq!(
2432                    effect.outputs,
2433                    StackType::Cons {
2434                        rest: Box::new(StackType::RowVar("rest".to_string())),
2435                        top: Type::Int
2436                    }
2437                );
2438                // Captures should be empty (filled in by type checker)
2439                assert_eq!(captures.len(), 0);
2440            }
2441            _ => panic!("Expected Closure type, got {:?}", output_top),
2442        }
2443        assert_eq!(output_rest, StackType::RowVar("rest".to_string()));
2444    }
2445
2446    #[test]
2447    fn test_parse_closure_type_with_row_vars() {
2448        // Test: ( ..a Config -- ..a Closure[Request -- Response] )
2449        let source = ": make-handler ( ..a Config -- ..a Closure[Request -- Response] ) ;";
2450        let mut parser = Parser::new(source);
2451        let program = parser.parse().unwrap();
2452
2453        let effect = program.words[0].effect.as_ref().unwrap();
2454
2455        // Output: Closure on RowVar("a")
2456        let (rest, top) = effect.outputs.clone().pop().unwrap();
2457        match top {
2458            Type::Closure { effect, .. } => {
2459                // Closure effect: Request -> Response
2460                let (_, in_top) = effect.inputs.clone().pop().unwrap();
2461                assert_eq!(in_top, Type::Var("Request".to_string()));
2462                let (_, out_top) = effect.outputs.clone().pop().unwrap();
2463                assert_eq!(out_top, Type::Var("Response".to_string()));
2464            }
2465            _ => panic!("Expected Closure type"),
2466        }
2467        assert_eq!(rest, StackType::RowVar("a".to_string()));
2468    }
2469
2470    #[test]
2471    fn test_parse_closure_type_missing_bracket() {
2472        // Test: ( Int -- Closure ) should fail
2473        let source = ": broken ( Int -- Closure ) ;";
2474        let mut parser = Parser::new(source);
2475        let result = parser.parse();
2476
2477        assert!(result.is_err());
2478        let err_msg = result.unwrap_err();
2479        assert!(
2480            err_msg.contains("[") && err_msg.contains("Closure"),
2481            "Expected error about missing '[' after Closure, got: {}",
2482            err_msg
2483        );
2484    }
2485
2486    #[test]
2487    fn test_parse_closure_type_in_input() {
2488        // Test: ( Closure[Int -- Int] -- )
2489        let source = ": apply-closure ( Closure[Int -- Int] -- ) ;";
2490        let mut parser = Parser::new(source);
2491        let program = parser.parse().unwrap();
2492
2493        let effect = program.words[0].effect.as_ref().unwrap();
2494
2495        // Input: Closure[Int -- Int] on RowVar("rest")
2496        let (_, top) = effect.inputs.clone().pop().unwrap();
2497        match top {
2498            Type::Closure { effect, .. } => {
2499                // Verify closure effect
2500                assert!(matches!(effect.inputs.clone().pop().unwrap().1, Type::Int));
2501                assert!(matches!(effect.outputs.clone().pop().unwrap().1, Type::Int));
2502            }
2503            _ => panic!("Expected Closure type in input"),
2504        }
2505    }
2506
2507    // Tests for token position tracking
2508
2509    #[test]
2510    fn test_token_position_single_line() {
2511        // Test token positions on a single line
2512        let source = ": main ( -- ) ;";
2513        let tokens = tokenize(source);
2514
2515        // : is at line 0, column 0
2516        assert_eq!(tokens[0].text, ":");
2517        assert_eq!(tokens[0].line, 0);
2518        assert_eq!(tokens[0].column, 0);
2519
2520        // main is at line 0, column 2
2521        assert_eq!(tokens[1].text, "main");
2522        assert_eq!(tokens[1].line, 0);
2523        assert_eq!(tokens[1].column, 2);
2524
2525        // ( is at line 0, column 7
2526        assert_eq!(tokens[2].text, "(");
2527        assert_eq!(tokens[2].line, 0);
2528        assert_eq!(tokens[2].column, 7);
2529    }
2530
2531    #[test]
2532    fn test_token_position_multiline() {
2533        // Test token positions across multiple lines
2534        let source = ": main ( -- )\n  42\n;";
2535        let tokens = tokenize(source);
2536
2537        // Find the 42 token (after the newline)
2538        let token_42 = tokens.iter().find(|t| t.text == "42").unwrap();
2539        assert_eq!(token_42.line, 1);
2540        assert_eq!(token_42.column, 2); // After 2 spaces of indentation
2541
2542        // Find the ; token (on line 2)
2543        let token_semi = tokens.iter().find(|t| t.text == ";").unwrap();
2544        assert_eq!(token_semi.line, 2);
2545        assert_eq!(token_semi.column, 0);
2546    }
2547
2548    #[test]
2549    fn test_word_def_source_location_span() {
2550        // Test that word definitions capture correct start and end lines
2551        let source = r#": helper ( -- )
2552  "hello"
2553  write_line
2554;
2555
2556: main ( -- )
2557  helper
2558;"#;
2559
2560        let mut parser = Parser::new(source);
2561        let program = parser.parse().unwrap();
2562
2563        assert_eq!(program.words.len(), 2);
2564
2565        // First word: helper spans lines 0-3
2566        let helper = &program.words[0];
2567        assert_eq!(helper.name, "helper");
2568        let helper_source = helper.source.as_ref().unwrap();
2569        assert_eq!(helper_source.start_line, 0);
2570        assert_eq!(helper_source.end_line, 3);
2571
2572        // Second word: main spans lines 5-7
2573        let main_word = &program.words[1];
2574        assert_eq!(main_word.name, "main");
2575        let main_source = main_word.source.as_ref().unwrap();
2576        assert_eq!(main_source.start_line, 5);
2577        assert_eq!(main_source.end_line, 7);
2578    }
2579
2580    #[test]
2581    fn test_token_position_string_with_newline() {
2582        // Test that newlines inside strings are tracked correctly
2583        let source = "\"line1\\nline2\"";
2584        let tokens = tokenize(source);
2585
2586        // The string token should start at line 0, column 0
2587        assert_eq!(tokens.len(), 1);
2588        assert_eq!(tokens[0].line, 0);
2589        assert_eq!(tokens[0].column, 0);
2590    }
2591
2592    // ============================================================================
2593    //                         ADT PARSING TESTS
2594    // ============================================================================
2595
2596    #[test]
2597    fn test_parse_simple_union() {
2598        let source = r#"
2599union Message {
2600  Get { response-chan: Int }
2601  Set { value: Int }
2602}
2603
2604: main ( -- ) ;
2605"#;
2606
2607        let mut parser = Parser::new(source);
2608        let program = parser.parse().unwrap();
2609
2610        assert_eq!(program.unions.len(), 1);
2611        let union_def = &program.unions[0];
2612        assert_eq!(union_def.name, "Message");
2613        assert_eq!(union_def.variants.len(), 2);
2614
2615        // Check first variant
2616        assert_eq!(union_def.variants[0].name, "Get");
2617        assert_eq!(union_def.variants[0].fields.len(), 1);
2618        assert_eq!(union_def.variants[0].fields[0].name, "response-chan");
2619        assert_eq!(union_def.variants[0].fields[0].type_name, "Int");
2620
2621        // Check second variant
2622        assert_eq!(union_def.variants[1].name, "Set");
2623        assert_eq!(union_def.variants[1].fields.len(), 1);
2624        assert_eq!(union_def.variants[1].fields[0].name, "value");
2625        assert_eq!(union_def.variants[1].fields[0].type_name, "Int");
2626    }
2627
2628    #[test]
2629    fn test_parse_union_with_multiple_fields() {
2630        let source = r#"
2631union Report {
2632  Data { op: Int, delta: Int, total: Int }
2633  Empty
2634}
2635
2636: main ( -- ) ;
2637"#;
2638
2639        let mut parser = Parser::new(source);
2640        let program = parser.parse().unwrap();
2641
2642        assert_eq!(program.unions.len(), 1);
2643        let union_def = &program.unions[0];
2644        assert_eq!(union_def.name, "Report");
2645        assert_eq!(union_def.variants.len(), 2);
2646
2647        // Check Data variant with 3 fields
2648        let data_variant = &union_def.variants[0];
2649        assert_eq!(data_variant.name, "Data");
2650        assert_eq!(data_variant.fields.len(), 3);
2651        assert_eq!(data_variant.fields[0].name, "op");
2652        assert_eq!(data_variant.fields[1].name, "delta");
2653        assert_eq!(data_variant.fields[2].name, "total");
2654
2655        // Check Empty variant with no fields
2656        let empty_variant = &union_def.variants[1];
2657        assert_eq!(empty_variant.name, "Empty");
2658        assert_eq!(empty_variant.fields.len(), 0);
2659    }
2660
2661    #[test]
2662    fn test_parse_union_lowercase_name_error() {
2663        let source = r#"
2664union message {
2665  Get { }
2666}
2667"#;
2668
2669        let mut parser = Parser::new(source);
2670        let result = parser.parse();
2671        assert!(result.is_err());
2672        assert!(result.unwrap_err().contains("uppercase"));
2673    }
2674
2675    #[test]
2676    fn test_parse_union_empty_error() {
2677        let source = r#"
2678union Message {
2679}
2680"#;
2681
2682        let mut parser = Parser::new(source);
2683        let result = parser.parse();
2684        assert!(result.is_err());
2685        assert!(result.unwrap_err().contains("at least one variant"));
2686    }
2687
2688    #[test]
2689    fn test_parse_union_duplicate_variant_error() {
2690        let source = r#"
2691union Message {
2692  Get { x: Int }
2693  Get { y: String }
2694}
2695"#;
2696
2697        let mut parser = Parser::new(source);
2698        let result = parser.parse();
2699        assert!(result.is_err());
2700        let err = result.unwrap_err();
2701        assert!(err.contains("Duplicate variant name"));
2702        assert!(err.contains("Get"));
2703    }
2704
2705    #[test]
2706    fn test_parse_union_duplicate_field_error() {
2707        let source = r#"
2708union Data {
2709  Record { x: Int, x: String }
2710}
2711"#;
2712
2713        let mut parser = Parser::new(source);
2714        let result = parser.parse();
2715        assert!(result.is_err());
2716        let err = result.unwrap_err();
2717        assert!(err.contains("Duplicate field name"));
2718        assert!(err.contains("x"));
2719    }
2720
2721    #[test]
2722    fn test_parse_simple_match() {
2723        let source = r#"
2724: handle ( -- )
2725  match
2726    Get -> send-response
2727    Set -> process-set
2728  end
2729;
2730"#;
2731
2732        let mut parser = Parser::new(source);
2733        let program = parser.parse().unwrap();
2734
2735        assert_eq!(program.words.len(), 1);
2736        assert_eq!(program.words[0].body.len(), 1);
2737
2738        match &program.words[0].body[0] {
2739            Statement::Match { arms, span: _ } => {
2740                assert_eq!(arms.len(), 2);
2741
2742                // First arm: Get ->
2743                match &arms[0].pattern {
2744                    Pattern::Variant(name) => assert_eq!(name, "Get"),
2745                    _ => panic!("Expected Variant pattern"),
2746                }
2747                assert_eq!(arms[0].body.len(), 1);
2748
2749                // Second arm: Set ->
2750                match &arms[1].pattern {
2751                    Pattern::Variant(name) => assert_eq!(name, "Set"),
2752                    _ => panic!("Expected Variant pattern"),
2753                }
2754                assert_eq!(arms[1].body.len(), 1);
2755            }
2756            _ => panic!("Expected Match statement"),
2757        }
2758    }
2759
2760    #[test]
2761    fn test_parse_match_with_bindings() {
2762        let source = r#"
2763: handle ( -- )
2764  match
2765    Get { >chan } -> chan send-response
2766    Report { >delta >total } -> delta total process
2767  end
2768;
2769"#;
2770
2771        let mut parser = Parser::new(source);
2772        let program = parser.parse().unwrap();
2773
2774        assert_eq!(program.words.len(), 1);
2775
2776        match &program.words[0].body[0] {
2777            Statement::Match { arms, span: _ } => {
2778                assert_eq!(arms.len(), 2);
2779
2780                // First arm: Get { chan } ->
2781                match &arms[0].pattern {
2782                    Pattern::VariantWithBindings { name, bindings } => {
2783                        assert_eq!(name, "Get");
2784                        assert_eq!(bindings.len(), 1);
2785                        assert_eq!(bindings[0], "chan");
2786                    }
2787                    _ => panic!("Expected VariantWithBindings pattern"),
2788                }
2789
2790                // Second arm: Report { delta total } ->
2791                match &arms[1].pattern {
2792                    Pattern::VariantWithBindings { name, bindings } => {
2793                        assert_eq!(name, "Report");
2794                        assert_eq!(bindings.len(), 2);
2795                        assert_eq!(bindings[0], "delta");
2796                        assert_eq!(bindings[1], "total");
2797                    }
2798                    _ => panic!("Expected VariantWithBindings pattern"),
2799                }
2800            }
2801            _ => panic!("Expected Match statement"),
2802        }
2803    }
2804
2805    #[test]
2806    fn test_parse_match_bindings_require_prefix() {
2807        // Old syntax without > prefix should error
2808        let source = r#"
2809: handle ( -- )
2810  match
2811    Get { chan } -> chan send-response
2812  end
2813;
2814"#;
2815
2816        let mut parser = Parser::new(source);
2817        let result = parser.parse();
2818        assert!(result.is_err());
2819        let err = result.unwrap_err();
2820        assert!(err.contains(">chan"));
2821        assert!(err.contains("stack extraction"));
2822    }
2823
2824    #[test]
2825    fn test_parse_match_with_body_statements() {
2826        let source = r#"
2827: handle ( -- )
2828  match
2829    Get -> 1 2 add send-response
2830    Set -> process-value store
2831  end
2832;
2833"#;
2834
2835        let mut parser = Parser::new(source);
2836        let program = parser.parse().unwrap();
2837
2838        match &program.words[0].body[0] {
2839            Statement::Match { arms, span: _ } => {
2840                // Get arm has 4 statements: 1, 2, add, send-response
2841                assert_eq!(arms[0].body.len(), 4);
2842                assert_eq!(arms[0].body[0], Statement::IntLiteral(1));
2843                assert_eq!(arms[0].body[1], Statement::IntLiteral(2));
2844                assert!(
2845                    matches!(&arms[0].body[2], Statement::WordCall { name, .. } if name == "add")
2846                );
2847
2848                // Set arm has 2 statements: process-value, store
2849                assert_eq!(arms[1].body.len(), 2);
2850            }
2851            _ => panic!("Expected Match statement"),
2852        }
2853    }
2854
2855    #[test]
2856    fn test_parse_match_empty_error() {
2857        let source = r#"
2858: handle ( -- )
2859  match
2860  end
2861;
2862"#;
2863
2864        let mut parser = Parser::new(source);
2865        let result = parser.parse();
2866        assert!(result.is_err());
2867        assert!(result.unwrap_err().contains("at least one arm"));
2868    }
2869
2870    #[test]
2871    fn test_parse_symbol_literal() {
2872        let source = r#"
2873: main ( -- )
2874    :hello drop
2875;
2876"#;
2877
2878        let mut parser = Parser::new(source);
2879        let program = parser.parse().unwrap();
2880        assert_eq!(program.words.len(), 1);
2881
2882        let main = &program.words[0];
2883        assert_eq!(main.body.len(), 2);
2884
2885        match &main.body[0] {
2886            Statement::Symbol(name) => assert_eq!(name, "hello"),
2887            _ => panic!("Expected Symbol statement, got {:?}", main.body[0]),
2888        }
2889    }
2890
2891    #[test]
2892    fn test_parse_symbol_with_hyphen() {
2893        let source = r#"
2894: main ( -- )
2895    :hello-world drop
2896;
2897"#;
2898
2899        let mut parser = Parser::new(source);
2900        let program = parser.parse().unwrap();
2901
2902        match &program.words[0].body[0] {
2903            Statement::Symbol(name) => assert_eq!(name, "hello-world"),
2904            _ => panic!("Expected Symbol statement"),
2905        }
2906    }
2907
2908    #[test]
2909    fn test_parse_symbol_starting_with_digit_fails() {
2910        let source = r#"
2911: main ( -- )
2912    :123abc drop
2913;
2914"#;
2915
2916        let mut parser = Parser::new(source);
2917        let result = parser.parse();
2918        assert!(result.is_err());
2919        assert!(result.unwrap_err().contains("cannot start with a digit"));
2920    }
2921
2922    #[test]
2923    fn test_parse_symbol_with_invalid_char_fails() {
2924        let source = r#"
2925: main ( -- )
2926    :hello@world drop
2927;
2928"#;
2929
2930        let mut parser = Parser::new(source);
2931        let result = parser.parse();
2932        assert!(result.is_err());
2933        assert!(result.unwrap_err().contains("invalid character"));
2934    }
2935
2936    #[test]
2937    fn test_parse_symbol_special_chars_allowed() {
2938        // Test that ? and ! are allowed in symbol names
2939        let source = r#"
2940: main ( -- )
2941    :empty? drop
2942    :save! drop
2943;
2944"#;
2945
2946        let mut parser = Parser::new(source);
2947        let program = parser.parse().unwrap();
2948
2949        match &program.words[0].body[0] {
2950            Statement::Symbol(name) => assert_eq!(name, "empty?"),
2951            _ => panic!("Expected Symbol statement"),
2952        }
2953        match &program.words[0].body[2] {
2954            Statement::Symbol(name) => assert_eq!(name, "save!"),
2955            _ => panic!("Expected Symbol statement"),
2956        }
2957    }
2958}