seqc/
parser.rs

1//! Simple parser for Seq syntax
2//!
3//! Syntax:
4//! ```text
5//! : word-name ( stack-effect )
6//!   statement1
7//!   statement2
8//!   ... ;
9//! ```
10
11use crate::ast::{
12    Include, MatchArm, Pattern, Program, SourceLocation, Statement, UnionDef, UnionField,
13    UnionVariant, WordDef,
14};
15use crate::types::{Effect, SideEffect, StackType, Type};
16
17/// A token with source position information
18#[derive(Debug, Clone)]
19pub struct Token {
20    pub text: String,
21    /// Line number (0-indexed for LSP compatibility)
22    pub line: usize,
23    /// Column number (0-indexed)
24    pub column: usize,
25}
26
27impl Token {
28    fn new(text: String, line: usize, column: usize) -> Self {
29        Token { text, line, column }
30    }
31}
32
33impl PartialEq<&str> for Token {
34    fn eq(&self, other: &&str) -> bool {
35        self.text == *other
36    }
37}
38
39impl PartialEq<str> for Token {
40    fn eq(&self, other: &str) -> bool {
41        self.text == other
42    }
43}
44
45pub struct Parser {
46    tokens: Vec<Token>,
47    pos: usize,
48    /// Counter for assigning unique IDs to quotations
49    /// Used by the typechecker to track inferred types
50    next_quotation_id: usize,
51    /// Pending lint annotations collected from `# seq:allow(lint-id)` comments
52    pending_allowed_lints: Vec<String>,
53}
54
55impl Parser {
56    pub fn new(source: &str) -> Self {
57        let tokens = tokenize(source);
58        Parser {
59            tokens,
60            pos: 0,
61            next_quotation_id: 0,
62            pending_allowed_lints: Vec::new(),
63        }
64    }
65
66    pub fn parse(&mut self) -> Result<Program, String> {
67        let mut program = Program::new();
68
69        // Check for unclosed string error from tokenizer
70        if let Some(error_token) = self.tokens.iter().find(|t| *t == "<<<UNCLOSED_STRING>>>") {
71            return Err(format!(
72                "Unclosed string literal at line {}, column {} - missing closing quote",
73                error_token.line + 1, // 1-indexed for user display
74                error_token.column + 1
75            ));
76        }
77
78        while !self.is_at_end() {
79            self.skip_comments();
80            if self.is_at_end() {
81                break;
82            }
83
84            // Check for include statement
85            if self.check("include") {
86                let include = self.parse_include()?;
87                program.includes.push(include);
88                continue;
89            }
90
91            // Check for union definition
92            if self.check("union") {
93                let union_def = self.parse_union_def()?;
94                program.unions.push(union_def);
95                continue;
96            }
97
98            let word = self.parse_word_def()?;
99            program.words.push(word);
100        }
101
102        Ok(program)
103    }
104
105    /// Parse an include statement:
106    ///   include std:http     -> Include::Std("http")
107    ///   include ffi:readline -> Include::Ffi("readline")
108    ///   include "my-utils"   -> Include::Relative("my-utils")
109    fn parse_include(&mut self) -> Result<Include, String> {
110        self.consume("include");
111
112        let token = self
113            .advance()
114            .ok_or("Expected module name after 'include'")?
115            .clone();
116
117        // Check for std: prefix (tokenizer splits this into "std", ":", "name")
118        if token == "std" {
119            // Expect : token
120            if !self.consume(":") {
121                return Err("Expected ':' after 'std' in include statement".to_string());
122            }
123            // Get the module name
124            let name = self
125                .advance()
126                .ok_or("Expected module name after 'std:'")?
127                .clone();
128            return Ok(Include::Std(name));
129        }
130
131        // Check for ffi: prefix
132        if token == "ffi" {
133            // Expect : token
134            if !self.consume(":") {
135                return Err("Expected ':' after 'ffi' in include statement".to_string());
136            }
137            // Get the library name
138            let name = self
139                .advance()
140                .ok_or("Expected library name after 'ffi:'")?
141                .clone();
142            return Ok(Include::Ffi(name));
143        }
144
145        // Check for quoted string (relative path)
146        if token.starts_with('"') && token.ends_with('"') {
147            let path = token.trim_start_matches('"').trim_end_matches('"');
148            return Ok(Include::Relative(path.to_string()));
149        }
150
151        Err(format!(
152            "Invalid include syntax '{}'. Use 'include std:name', 'include ffi:lib', or 'include \"path\"'",
153            token
154        ))
155    }
156
157    /// Parse a union type definition:
158    ///   union Message {
159    ///     Get { response-chan: Int }
160    ///     Increment { response-chan: Int }
161    ///     Report { op: Int, delta: Int, total: Int }
162    ///   }
163    fn parse_union_def(&mut self) -> Result<UnionDef, String> {
164        // Capture start line from 'union' token
165        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
166
167        // Consume 'union' keyword
168        self.consume("union");
169
170        // Get union name (must start with uppercase)
171        let name = self
172            .advance()
173            .ok_or("Expected union name after 'union'")?
174            .clone();
175
176        if !name
177            .chars()
178            .next()
179            .map(|c| c.is_uppercase())
180            .unwrap_or(false)
181        {
182            return Err(format!(
183                "Union name '{}' must start with an uppercase letter",
184                name
185            ));
186        }
187
188        // Skip comments and newlines
189        self.skip_comments();
190
191        // Expect '{'
192        if !self.consume("{") {
193            return Err(format!(
194                "Expected '{{' after union name '{}', got '{}'",
195                name,
196                self.current()
197            ));
198        }
199
200        // Parse variants until '}'
201        let mut variants = Vec::new();
202        loop {
203            self.skip_comments();
204
205            if self.check("}") {
206                break;
207            }
208
209            if self.is_at_end() {
210                return Err(format!("Unexpected end of file in union '{}'", name));
211            }
212
213            variants.push(self.parse_union_variant()?);
214        }
215
216        // Capture end line from '}' token before consuming
217        let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
218
219        // Consume '}'
220        self.consume("}");
221
222        if variants.is_empty() {
223            return Err(format!("Union '{}' must have at least one variant", name));
224        }
225
226        // Check for duplicate variant names
227        let mut seen_variants = std::collections::HashSet::new();
228        for variant in &variants {
229            if !seen_variants.insert(&variant.name) {
230                return Err(format!(
231                    "Duplicate variant name '{}' in union '{}'",
232                    variant.name, name
233                ));
234            }
235        }
236
237        Ok(UnionDef {
238            name,
239            variants,
240            source: Some(SourceLocation::span(
241                std::path::PathBuf::new(),
242                start_line,
243                end_line,
244            )),
245        })
246    }
247
248    /// Parse a single union variant:
249    ///   Get { response-chan: Int }
250    ///   or just: Empty (no fields)
251    fn parse_union_variant(&mut self) -> Result<UnionVariant, String> {
252        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
253
254        // Get variant name (must start with uppercase)
255        let name = self.advance().ok_or("Expected variant name")?.clone();
256
257        if !name
258            .chars()
259            .next()
260            .map(|c| c.is_uppercase())
261            .unwrap_or(false)
262        {
263            return Err(format!(
264                "Variant name '{}' must start with an uppercase letter",
265                name
266            ));
267        }
268
269        self.skip_comments();
270
271        // Check for optional fields
272        let fields = if self.check("{") {
273            self.consume("{");
274            let fields = self.parse_union_fields()?;
275            if !self.consume("}") {
276                return Err(format!("Expected '}}' after variant '{}' fields", name));
277            }
278            fields
279        } else {
280            Vec::new()
281        };
282
283        Ok(UnionVariant {
284            name,
285            fields,
286            source: Some(SourceLocation::new(std::path::PathBuf::new(), start_line)),
287        })
288    }
289
290    /// Parse union fields: name: Type, name: Type, ...
291    fn parse_union_fields(&mut self) -> Result<Vec<UnionField>, String> {
292        let mut fields = Vec::new();
293
294        loop {
295            self.skip_comments();
296
297            if self.check("}") {
298                break;
299            }
300
301            // Get field name
302            let field_name = self.advance().ok_or("Expected field name")?.clone();
303
304            // Expect ':'
305            if !self.consume(":") {
306                return Err(format!(
307                    "Expected ':' after field name '{}', got '{}'",
308                    field_name,
309                    self.current()
310                ));
311            }
312
313            // Get type name
314            let type_name = self
315                .advance()
316                .ok_or("Expected type name after ':'")?
317                .clone();
318
319            fields.push(UnionField {
320                name: field_name,
321                type_name,
322            });
323
324            // Optional comma separator
325            self.skip_comments();
326            self.consume(",");
327        }
328
329        // Check for duplicate field names
330        let mut seen_fields = std::collections::HashSet::new();
331        for field in &fields {
332            if !seen_fields.insert(&field.name) {
333                return Err(format!("Duplicate field name '{}' in variant", field.name));
334            }
335        }
336
337        Ok(fields)
338    }
339
340    fn parse_word_def(&mut self) -> Result<WordDef, String> {
341        // Consume any pending lint annotations collected from comments before this word
342        let allowed_lints = std::mem::take(&mut self.pending_allowed_lints);
343
344        // Capture start line from ':' token
345        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
346
347        // Expect ':'
348        if !self.consume(":") {
349            return Err(format!(
350                "Expected ':' to start word definition, got '{}'",
351                self.current()
352            ));
353        }
354
355        // Get word name
356        let name = self
357            .advance()
358            .ok_or("Expected word name after ':'")?
359            .clone();
360
361        // Parse stack effect if present: ( ..a Int -- ..a Bool )
362        let effect = if self.check("(") {
363            Some(self.parse_stack_effect()?)
364        } else {
365            None
366        };
367
368        // Parse body until ';'
369        let mut body = Vec::new();
370        while !self.check(";") {
371            if self.is_at_end() {
372                return Err(format!("Unexpected end of file in word '{}'", name));
373            }
374
375            // Skip comments and newlines in body
376            self.skip_comments();
377            if self.check(";") {
378                break;
379            }
380
381            body.push(self.parse_statement()?);
382        }
383
384        // Capture end line from ';' token before consuming
385        let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
386
387        // Consume ';'
388        self.consume(";");
389
390        Ok(WordDef {
391            name,
392            effect,
393            body,
394            source: Some(crate::ast::SourceLocation::span(
395                std::path::PathBuf::new(),
396                start_line,
397                end_line,
398            )),
399            allowed_lints,
400        })
401    }
402
403    fn parse_statement(&mut self) -> Result<Statement, String> {
404        use crate::ast::Span;
405        let tok = self.advance_token().ok_or("Unexpected end of file")?;
406        let token = &tok.text;
407        let tok_line = tok.line;
408        let tok_column = tok.column;
409        let tok_len = tok.text.len();
410
411        // Check if it looks like a float literal (contains . or scientific notation)
412        // Must check this BEFORE integer parsing
413        if let Some(f) = is_float_literal(token)
414            .then(|| token.parse::<f64>().ok())
415            .flatten()
416        {
417            return Ok(Statement::FloatLiteral(f));
418        }
419
420        // Try to parse as hex literal (0x or 0X prefix)
421        if let Some(hex) = token
422            .strip_prefix("0x")
423            .or_else(|| token.strip_prefix("0X"))
424        {
425            return i64::from_str_radix(hex, 16)
426                .map(Statement::IntLiteral)
427                .map_err(|_| format!("Invalid hex literal: {}", token));
428        }
429
430        // Try to parse as binary literal (0b or 0B prefix)
431        if let Some(bin) = token
432            .strip_prefix("0b")
433            .or_else(|| token.strip_prefix("0B"))
434        {
435            return i64::from_str_radix(bin, 2)
436                .map(Statement::IntLiteral)
437                .map_err(|_| format!("Invalid binary literal: {}", token));
438        }
439
440        // Try to parse as decimal integer literal
441        if let Ok(n) = token.parse::<i64>() {
442            return Ok(Statement::IntLiteral(n));
443        }
444
445        // Try to parse as boolean literal
446        if token == "true" {
447            return Ok(Statement::BoolLiteral(true));
448        }
449        if token == "false" {
450            return Ok(Statement::BoolLiteral(false));
451        }
452
453        // Try to parse as symbol literal (:foo, :some-name)
454        if token == ":" {
455            // Get the next token as the symbol name
456            let name_tok = self
457                .advance_token()
458                .ok_or("Expected symbol name after ':', got end of input")?;
459            let name = &name_tok.text;
460            // Validate symbol name (identifier-like, kebab-case allowed)
461            if name.is_empty() {
462                return Err("Symbol name cannot be empty".to_string());
463            }
464            if name.starts_with(|c: char| c.is_ascii_digit()) {
465                return Err(format!(
466                    "Symbol name cannot start with a digit: ':{}'\n  Hint: Symbol names must start with a letter",
467                    name
468                ));
469            }
470            if let Some(bad_char) = name.chars().find(|c| {
471                !c.is_alphanumeric()
472                    && *c != '-'
473                    && *c != '_'
474                    && *c != '.'
475                    && *c != '?'
476                    && *c != '!'
477            }) {
478                return Err(format!(
479                    "Symbol name contains invalid character '{}': ':{}'\n  Hint: Allowed: letters, digits, - _ . ? !",
480                    bad_char, name
481                ));
482            }
483            return Ok(Statement::Symbol(name.clone()));
484        }
485
486        // Try to parse as string literal
487        if token.starts_with('"') {
488            // Validate token has at least opening and closing quotes
489            if token.len() < 2 || !token.ends_with('"') {
490                return Err(format!("Malformed string literal: {}", token));
491            }
492            // Strip exactly one quote from each end (not all quotes, which would
493            // incorrectly handle escaped quotes at string boundaries like "hello\"")
494            let raw = &token[1..token.len() - 1];
495            let unescaped = unescape_string(raw)?;
496            return Ok(Statement::StringLiteral(unescaped));
497        }
498
499        // Check for conditional
500        if token == "if" {
501            return self.parse_if();
502        }
503
504        // Check for quotation
505        if token == "[" {
506            return self.parse_quotation(tok_line, tok_column);
507        }
508
509        // Check for match expression
510        if token == "match" {
511            return self.parse_match();
512        }
513
514        // Otherwise it's a word call - preserve source span for precise diagnostics
515        Ok(Statement::WordCall {
516            name: token.to_string(),
517            span: Some(Span::new(tok_line, tok_column, tok_len)),
518        })
519    }
520
521    fn parse_if(&mut self) -> Result<Statement, String> {
522        let mut then_branch = Vec::new();
523
524        // Parse then branch until 'else' or 'then'
525        loop {
526            if self.is_at_end() {
527                return Err("Unexpected end of file in 'if' statement".to_string());
528            }
529
530            // Skip comments and newlines
531            self.skip_comments();
532
533            if self.check("else") {
534                self.advance();
535                // Parse else branch
536                break;
537            }
538
539            if self.check("then") {
540                self.advance();
541                // End of if without else
542                return Ok(Statement::If {
543                    then_branch,
544                    else_branch: None,
545                });
546            }
547
548            then_branch.push(self.parse_statement()?);
549        }
550
551        // Parse else branch until 'then'
552        let mut else_branch = Vec::new();
553        loop {
554            if self.is_at_end() {
555                return Err("Unexpected end of file in 'else' branch".to_string());
556            }
557
558            // Skip comments and newlines
559            self.skip_comments();
560
561            if self.check("then") {
562                self.advance();
563                return Ok(Statement::If {
564                    then_branch,
565                    else_branch: Some(else_branch),
566                });
567            }
568
569            else_branch.push(self.parse_statement()?);
570        }
571    }
572
573    fn parse_quotation(
574        &mut self,
575        start_line: usize,
576        start_column: usize,
577    ) -> Result<Statement, String> {
578        use crate::ast::QuotationSpan;
579        let mut body = Vec::new();
580
581        // Parse statements until ']'
582        loop {
583            if self.is_at_end() {
584                return Err("Unexpected end of file in quotation".to_string());
585            }
586
587            // Skip comments and newlines
588            self.skip_comments();
589
590            if self.check("]") {
591                let end_tok = self.advance_token().unwrap();
592                let end_line = end_tok.line;
593                let end_column = end_tok.column + 1; // exclusive
594                let id = self.next_quotation_id;
595                self.next_quotation_id += 1;
596                // Span from '[' to ']' inclusive
597                let span = QuotationSpan::new(start_line, start_column, end_line, end_column);
598                return Ok(Statement::Quotation {
599                    id,
600                    body,
601                    span: Some(span),
602                });
603            }
604
605            body.push(self.parse_statement()?);
606        }
607    }
608
609    /// Parse a match expression:
610    ///   match
611    ///     Get -> send-response
612    ///     Increment -> do-increment send-response
613    ///     Report -> aggregate-add
614    ///   end
615    fn parse_match(&mut self) -> Result<Statement, String> {
616        let mut arms = Vec::new();
617
618        loop {
619            self.skip_comments();
620
621            // Check for 'end' to terminate match
622            if self.check("end") {
623                self.advance();
624                break;
625            }
626
627            if self.is_at_end() {
628                return Err("Unexpected end of file in match expression".to_string());
629            }
630
631            arms.push(self.parse_match_arm()?);
632        }
633
634        if arms.is_empty() {
635            return Err("Match expression must have at least one arm".to_string());
636        }
637
638        Ok(Statement::Match { arms })
639    }
640
641    /// Parse a single match arm:
642    ///   Get -> send-response
643    ///   or with bindings:
644    ///   Get { chan } -> chan send-response
645    fn parse_match_arm(&mut self) -> Result<MatchArm, String> {
646        // Get variant name
647        let variant_name = self
648            .advance()
649            .ok_or("Expected variant name in match arm")?
650            .clone();
651
652        self.skip_comments();
653
654        // Check for optional bindings: { field1 field2 }
655        let pattern = if self.check("{") {
656            self.consume("{");
657            let mut bindings = Vec::new();
658
659            loop {
660                self.skip_comments();
661
662                if self.check("}") {
663                    break;
664                }
665
666                if self.is_at_end() {
667                    return Err(format!(
668                        "Unexpected end of file in match arm bindings for '{}'",
669                        variant_name
670                    ));
671                }
672
673                let token = self.advance().ok_or("Expected binding name")?.clone();
674
675                // Require > prefix to make clear these are stack extractions, not variables
676                if let Some(field_name) = token.strip_prefix('>') {
677                    if field_name.is_empty() {
678                        return Err(format!(
679                            "Expected field name after '>' in match bindings for '{}'",
680                            variant_name
681                        ));
682                    }
683                    bindings.push(field_name.to_string());
684                } else {
685                    return Err(format!(
686                        "Match bindings must use '>' prefix to indicate stack extraction. \
687                         Use '>{}' instead of '{}' in pattern for '{}'",
688                        token, token, variant_name
689                    ));
690                }
691            }
692
693            self.consume("}");
694            Pattern::VariantWithBindings {
695                name: variant_name,
696                bindings,
697            }
698        } else {
699            Pattern::Variant(variant_name.clone())
700        };
701
702        self.skip_comments();
703
704        // Expect '->' arrow
705        if !self.consume("->") {
706            return Err(format!(
707                "Expected '->' after pattern '{}', got '{}'",
708                match &pattern {
709                    Pattern::Variant(n) => n.clone(),
710                    Pattern::VariantWithBindings { name, .. } => name.clone(),
711                },
712                self.current()
713            ));
714        }
715
716        // Parse body until next pattern or 'end'
717        let mut body = Vec::new();
718        loop {
719            self.skip_comments();
720
721            // Check for end of arm (next pattern starts with uppercase, or 'end')
722            if self.check("end") {
723                break;
724            }
725
726            // Check if next token looks like a match pattern (not just any uppercase word).
727            // A pattern is: UppercaseName followed by '->' or '{'
728            // This prevents confusing 'Make-Get' (constructor call) with a pattern.
729            if let Some(token) = self.current_token()
730                && let Some(first_char) = token.text.chars().next()
731                && first_char.is_uppercase()
732            {
733                // Peek at next token to see if this is a pattern (followed by -> or {)
734                if let Some(next) = self.peek_at(1)
735                    && (next == "->" || next == "{")
736                {
737                    // This is the next pattern
738                    break;
739                }
740                // Otherwise it's just an uppercase word call (like Make-Get), continue parsing body
741            }
742
743            if self.is_at_end() {
744                return Err("Unexpected end of file in match arm body".to_string());
745            }
746
747            body.push(self.parse_statement()?);
748        }
749
750        Ok(MatchArm { pattern, body })
751    }
752
753    /// Parse a stack effect declaration: ( ..a Int -- ..a Bool )
754    /// With optional computational effects: ( ..a Int -- ..a Bool | Yield Int )
755    fn parse_stack_effect(&mut self) -> Result<Effect, String> {
756        // Consume '('
757        if !self.consume("(") {
758            return Err("Expected '(' to start stack effect".to_string());
759        }
760
761        // Parse input stack types (until '--' or ')')
762        let (input_row_var, input_types) =
763            self.parse_type_list_until(&["--", ")"], "stack effect inputs", 0)?;
764
765        // Consume '--'
766        if !self.consume("--") {
767            return Err("Expected '--' separator in stack effect".to_string());
768        }
769
770        // Parse output stack types (until ')' or '|')
771        let (output_row_var, output_types) =
772            self.parse_type_list_until(&[")", "|"], "stack effect outputs", 0)?;
773
774        // Parse optional computational effects after '|'
775        let effects = if self.consume("|") {
776            self.parse_effect_annotations()?
777        } else {
778            Vec::new()
779        };
780
781        // Consume ')'
782        if !self.consume(")") {
783            return Err("Expected ')' to end stack effect".to_string());
784        }
785
786        // Build input and output StackTypes
787        let inputs = self.build_stack_type(input_row_var, input_types);
788        let outputs = self.build_stack_type(output_row_var, output_types);
789
790        Ok(Effect::with_effects(inputs, outputs, effects))
791    }
792
793    /// Parse computational effect annotations after '|'
794    /// Example: | Yield Int
795    fn parse_effect_annotations(&mut self) -> Result<Vec<SideEffect>, String> {
796        let mut effects = Vec::new();
797
798        // Parse effects until we hit ')'
799        while let Some(token) = self.peek_at(0) {
800            if token == ")" {
801                break;
802            }
803
804            match token {
805                "Yield" => {
806                    self.advance(); // consume "Yield"
807                    // Parse the yield type
808                    if let Some(type_token) = self.current_token() {
809                        if type_token.text == ")" {
810                            return Err("Expected type after 'Yield'".to_string());
811                        }
812                        let type_token = type_token.clone();
813                        self.advance();
814                        let yield_type = self.parse_type(&type_token)?;
815                        effects.push(SideEffect::Yield(Box::new(yield_type)));
816                    } else {
817                        return Err("Expected type after 'Yield'".to_string());
818                    }
819                }
820                _ => {
821                    return Err(format!("Unknown effect '{}'. Expected 'Yield'", token));
822                }
823            }
824        }
825
826        if effects.is_empty() {
827            return Err("Expected at least one effect after '|'".to_string());
828        }
829
830        Ok(effects)
831    }
832
833    /// Parse a single type token into a Type
834    fn parse_type(&self, token: &Token) -> Result<Type, String> {
835        match token.text.as_str() {
836            "Int" => Ok(Type::Int),
837            "Float" => Ok(Type::Float),
838            "Bool" => Ok(Type::Bool),
839            "String" => Ok(Type::String),
840            // Reject 'Quotation' - it looks like a type but would be silently treated as a type variable.
841            // Users must use explicit effect syntax like [Int -- Int] instead.
842            "Quotation" => Err(format!(
843                "'Quotation' is not a valid type at line {}, column {}. Use explicit quotation syntax like [Int -- Int] or [ -- ] instead.",
844                token.line + 1,
845                token.column + 1
846            )),
847            _ => {
848                // Check if it's a type variable (starts with uppercase)
849                if let Some(first_char) = token.text.chars().next() {
850                    if first_char.is_uppercase() {
851                        Ok(Type::Var(token.text.to_string()))
852                    } else {
853                        Err(format!(
854                            "Unknown type: '{}' at line {}, column {}. Expected Int, Bool, String, Closure, or a type variable (uppercase)",
855                            token.text.escape_default(),
856                            token.line + 1, // 1-indexed for user display
857                            token.column + 1
858                        ))
859                    }
860                } else {
861                    Err(format!(
862                        "Invalid type: '{}' at line {}, column {}",
863                        token.text.escape_default(),
864                        token.line + 1,
865                        token.column + 1
866                    ))
867                }
868            }
869        }
870    }
871
872    /// Validate row variable name
873    /// Row variables must start with a lowercase letter and contain only alphanumeric characters
874    fn validate_row_var_name(&self, name: &str) -> Result<(), String> {
875        if name.is_empty() {
876            return Err("Row variable must have a name after '..'".to_string());
877        }
878
879        // Must start with lowercase letter
880        let first_char = name.chars().next().unwrap();
881        if !first_char.is_ascii_lowercase() {
882            return Err(format!(
883                "Row variable '..{}' must start with a lowercase letter (a-z)",
884                name
885            ));
886        }
887
888        // Rest must be alphanumeric or underscore
889        for ch in name.chars() {
890            if !ch.is_alphanumeric() && ch != '_' {
891                return Err(format!(
892                    "Row variable '..{}' can only contain letters, numbers, and underscores",
893                    name
894                ));
895            }
896        }
897
898        // Check for reserved keywords (type names that might confuse users)
899        match name {
900            "Int" | "Bool" | "String" => {
901                return Err(format!(
902                    "Row variable '..{}' cannot use type name as identifier",
903                    name
904                ));
905            }
906            _ => {}
907        }
908
909        Ok(())
910    }
911
912    /// Parse a list of types until one of the given terminators is reached
913    /// Returns (optional row variable, list of types)
914    /// Used by both parse_stack_effect and parse_quotation_type
915    ///
916    /// depth: Current nesting depth for quotation types (0 at top level)
917    fn parse_type_list_until(
918        &mut self,
919        terminators: &[&str],
920        context: &str,
921        depth: usize,
922    ) -> Result<(Option<String>, Vec<Type>), String> {
923        const MAX_QUOTATION_DEPTH: usize = 32;
924
925        if depth > MAX_QUOTATION_DEPTH {
926            return Err(format!(
927                "Quotation type nesting exceeds maximum depth of {} (possible deeply nested types or DOS attack)",
928                MAX_QUOTATION_DEPTH
929            ));
930        }
931
932        let mut types = Vec::new();
933        let mut row_var = None;
934
935        while !terminators.iter().any(|t| self.check(t)) {
936            // Skip comments and blank lines within type lists
937            self.skip_comments();
938
939            // Re-check terminators after skipping comments
940            if terminators.iter().any(|t| self.check(t)) {
941                break;
942            }
943
944            if self.is_at_end() {
945                return Err(format!(
946                    "Unexpected end while parsing {} - expected one of: {}",
947                    context,
948                    terminators.join(", ")
949                ));
950            }
951
952            let token = self
953                .advance_token()
954                .ok_or_else(|| format!("Unexpected end in {}", context))?
955                .clone();
956
957            // Check for row variable: ..name
958            if token.text.starts_with("..") {
959                let var_name = token.text.trim_start_matches("..").to_string();
960                self.validate_row_var_name(&var_name)?;
961                row_var = Some(var_name);
962            } else if token.text == "Closure" {
963                // Closure type: Closure[effect]
964                if !self.consume("[") {
965                    return Err("Expected '[' after 'Closure' in type signature".to_string());
966                }
967                let effect_type = self.parse_quotation_type(depth)?;
968                match effect_type {
969                    Type::Quotation(effect) => {
970                        types.push(Type::Closure {
971                            effect,
972                            captures: Vec::new(), // Filled in by type checker
973                        });
974                    }
975                    _ => unreachable!("parse_quotation_type should return Quotation"),
976                }
977            } else if token.text == "[" {
978                // Nested quotation type
979                types.push(self.parse_quotation_type(depth)?);
980            } else {
981                // Parse as concrete type
982                types.push(self.parse_type(&token)?);
983            }
984        }
985
986        Ok((row_var, types))
987    }
988
989    /// Parse a quotation type: [inputs -- outputs]
990    /// Note: The opening '[' has already been consumed
991    ///
992    /// depth: Current nesting depth (incremented for each nested quotation)
993    fn parse_quotation_type(&mut self, depth: usize) -> Result<Type, String> {
994        // Parse input stack types (until '--' or ']')
995        let (input_row_var, input_types) =
996            self.parse_type_list_until(&["--", "]"], "quotation type inputs", depth + 1)?;
997
998        // Require '--' separator for clarity
999        if !self.consume("--") {
1000            // Check if user closed with ] without separator
1001            if self.check("]") {
1002                return Err(
1003                    "Quotation types require '--' separator. Did you mean '[Int -- ]' or '[ -- Int]'?"
1004                        .to_string(),
1005                );
1006            }
1007            return Err("Expected '--' separator in quotation type".to_string());
1008        }
1009
1010        // Parse output stack types (until ']')
1011        let (output_row_var, output_types) =
1012            self.parse_type_list_until(&["]"], "quotation type outputs", depth + 1)?;
1013
1014        // Consume ']'
1015        if !self.consume("]") {
1016            return Err("Expected ']' to end quotation type".to_string());
1017        }
1018
1019        // Build input and output StackTypes
1020        let inputs = self.build_stack_type(input_row_var, input_types);
1021        let outputs = self.build_stack_type(output_row_var, output_types);
1022
1023        Ok(Type::Quotation(Box::new(Effect::new(inputs, outputs))))
1024    }
1025
1026    /// Build a StackType from an optional row variable and a list of types
1027    /// Example: row_var="a", types=[Int, Bool] => RowVar("a") with Int on top of Bool
1028    ///
1029    /// IMPORTANT: ALL stack effects are implicitly row-polymorphic in concatenative languages.
1030    /// This means:
1031    ///   ( -- )        becomes  ( ..rest -- ..rest )       - no-op, preserves stack
1032    ///   ( -- Int )    becomes  ( ..rest -- ..rest Int )   - pushes Int
1033    ///   ( Int -- )    becomes  ( ..rest Int -- ..rest )   - consumes Int
1034    ///   ( Int -- Int) becomes  ( ..rest Int -- ..rest Int ) - transforms top
1035    fn build_stack_type(&self, row_var: Option<String>, types: Vec<Type>) -> StackType {
1036        // Always use row polymorphism - this is fundamental to concatenative semantics
1037        let base = match row_var {
1038            Some(name) => StackType::RowVar(name),
1039            None => StackType::RowVar("rest".to_string()),
1040        };
1041
1042        // Push types onto the stack (bottom to top order)
1043        types.into_iter().fold(base, |stack, ty| stack.push(ty))
1044    }
1045
1046    fn skip_comments(&mut self) {
1047        loop {
1048            if self.check("#") {
1049                self.advance(); // consume #
1050
1051                // Collect all tokens until newline to reconstruct the comment text
1052                let mut comment_parts: Vec<String> = Vec::new();
1053                while !self.is_at_end() && self.current() != "\n" {
1054                    comment_parts.push(self.current().to_string());
1055                    self.advance();
1056                }
1057                if !self.is_at_end() {
1058                    self.advance(); // skip newline
1059                }
1060
1061                // Join parts and check for seq:allow annotation
1062                // Format: # seq:allow(lint-id) -> parts = ["seq", ":", "allow", "(", "lint-id", ")"]
1063                let comment = comment_parts.join("");
1064                if let Some(lint_id) = comment
1065                    .strip_prefix("seq:allow(")
1066                    .and_then(|s| s.strip_suffix(")"))
1067                {
1068                    self.pending_allowed_lints.push(lint_id.to_string());
1069                }
1070            } else if self.check("\n") {
1071                // Skip blank lines
1072                self.advance();
1073            } else {
1074                break;
1075            }
1076        }
1077    }
1078
1079    fn check(&self, expected: &str) -> bool {
1080        if self.is_at_end() {
1081            return false;
1082        }
1083        self.current() == expected
1084    }
1085
1086    fn consume(&mut self, expected: &str) -> bool {
1087        if self.check(expected) {
1088            self.advance();
1089            true
1090        } else {
1091            false
1092        }
1093    }
1094
1095    /// Get the text of the current token
1096    fn current(&self) -> &str {
1097        if self.is_at_end() {
1098            ""
1099        } else {
1100            &self.tokens[self.pos].text
1101        }
1102    }
1103
1104    /// Get the full current token with position info
1105    fn current_token(&self) -> Option<&Token> {
1106        if self.is_at_end() {
1107            None
1108        } else {
1109            Some(&self.tokens[self.pos])
1110        }
1111    }
1112
1113    /// Peek at a token N positions ahead without consuming
1114    fn peek_at(&self, n: usize) -> Option<&str> {
1115        let idx = self.pos + n;
1116        if idx < self.tokens.len() {
1117            Some(&self.tokens[idx].text)
1118        } else {
1119            None
1120        }
1121    }
1122
1123    /// Advance and return the token text (for compatibility with existing code)
1124    fn advance(&mut self) -> Option<&String> {
1125        if self.is_at_end() {
1126            None
1127        } else {
1128            let token = &self.tokens[self.pos];
1129            self.pos += 1;
1130            Some(&token.text)
1131        }
1132    }
1133
1134    /// Advance and return the full token with position info
1135    fn advance_token(&mut self) -> Option<&Token> {
1136        if self.is_at_end() {
1137            None
1138        } else {
1139            let token = &self.tokens[self.pos];
1140            self.pos += 1;
1141            Some(token)
1142        }
1143    }
1144
1145    fn is_at_end(&self) -> bool {
1146        self.pos >= self.tokens.len()
1147    }
1148}
1149
1150/// Check if a token looks like a float literal
1151///
1152/// Float literals contain either:
1153/// - A decimal point: `3.14`, `.5`, `5.`
1154/// - Scientific notation: `1e10`, `1E-5`, `1.5e3`
1155///
1156/// This check must happen BEFORE integer parsing to avoid
1157/// parsing "5" in "5.0" as an integer.
1158fn is_float_literal(token: &str) -> bool {
1159    // Skip leading minus sign for negative numbers
1160    let s = token.strip_prefix('-').unwrap_or(token);
1161
1162    // Must have at least one digit
1163    if s.is_empty() {
1164        return false;
1165    }
1166
1167    // Check for decimal point or scientific notation
1168    s.contains('.') || s.contains('e') || s.contains('E')
1169}
1170
1171/// Process escape sequences in a string literal
1172///
1173/// Supported escape sequences:
1174/// - `\"` -> `"`  (quote)
1175/// - `\\` -> `\`  (backslash)
1176/// - `\n` -> newline
1177/// - `\r` -> carriage return
1178/// - `\t` -> tab
1179/// - `\xNN` -> Unicode code point U+00NN (hex value 00-FF)
1180///
1181/// # Note on `\xNN` encoding
1182///
1183/// The `\xNN` escape creates a Unicode code point U+00NN, not a raw byte.
1184/// For values 0x00-0x7F (ASCII), this maps directly to the byte value.
1185/// For values 0x80-0xFF (Latin-1 Supplement), the character is stored as
1186/// a multi-byte UTF-8 sequence. For example:
1187/// - `\x41` -> 'A' (1 byte in UTF-8)
1188/// - `\x1b` -> ESC (1 byte in UTF-8, used for ANSI terminal codes)
1189/// - `\xFF` -> 'ÿ' (U+00FF, 2 bytes in UTF-8: 0xC3 0xBF)
1190///
1191/// This matches Python 3 and Rust string behavior. For terminal ANSI codes,
1192/// which are the primary use case, all values are in the ASCII range.
1193///
1194/// # Errors
1195/// Returns error if an unknown escape sequence is encountered
1196fn unescape_string(s: &str) -> Result<String, String> {
1197    let mut result = String::new();
1198    let mut chars = s.chars();
1199
1200    while let Some(ch) = chars.next() {
1201        if ch == '\\' {
1202            match chars.next() {
1203                Some('"') => result.push('"'),
1204                Some('\\') => result.push('\\'),
1205                Some('n') => result.push('\n'),
1206                Some('r') => result.push('\r'),
1207                Some('t') => result.push('\t'),
1208                Some('x') => {
1209                    // Hex escape: \xNN
1210                    let hex1 = chars.next().ok_or_else(|| {
1211                        "Incomplete hex escape sequence '\\x' - expected 2 hex digits".to_string()
1212                    })?;
1213                    let hex2 = chars.next().ok_or_else(|| {
1214                        format!(
1215                            "Incomplete hex escape sequence '\\x{}' - expected 2 hex digits",
1216                            hex1
1217                        )
1218                    })?;
1219
1220                    let hex_str: String = [hex1, hex2].iter().collect();
1221                    let byte_val = u8::from_str_radix(&hex_str, 16).map_err(|_| {
1222                        format!(
1223                            "Invalid hex escape sequence '\\x{}' - expected 2 hex digits (00-FF)",
1224                            hex_str
1225                        )
1226                    })?;
1227
1228                    result.push(byte_val as char);
1229                }
1230                Some(c) => {
1231                    return Err(format!(
1232                        "Unknown escape sequence '\\{}' in string literal. \
1233                         Supported: \\\" \\\\ \\n \\r \\t \\xNN",
1234                        c
1235                    ));
1236                }
1237                None => {
1238                    return Err("String ends with incomplete escape sequence '\\'".to_string());
1239                }
1240            }
1241        } else {
1242            result.push(ch);
1243        }
1244    }
1245
1246    Ok(result)
1247}
1248
1249fn tokenize(source: &str) -> Vec<Token> {
1250    let mut tokens = Vec::new();
1251    let mut current = String::new();
1252    let mut current_start_line = 0;
1253    let mut current_start_col = 0;
1254    let mut in_string = false;
1255    let mut prev_was_backslash = false;
1256
1257    // Track current position (0-indexed)
1258    let mut line = 0;
1259    let mut col = 0;
1260
1261    for ch in source.chars() {
1262        if in_string {
1263            current.push(ch);
1264            if ch == '"' && !prev_was_backslash {
1265                // Unescaped quote ends the string
1266                in_string = false;
1267                tokens.push(Token::new(
1268                    current.clone(),
1269                    current_start_line,
1270                    current_start_col,
1271                ));
1272                current.clear();
1273                prev_was_backslash = false;
1274            } else if ch == '\\' && !prev_was_backslash {
1275                // Start of escape sequence
1276                prev_was_backslash = true;
1277            } else {
1278                // Regular character or escaped character
1279                prev_was_backslash = false;
1280            }
1281            // Track newlines inside strings
1282            if ch == '\n' {
1283                line += 1;
1284                col = 0;
1285            } else {
1286                col += 1;
1287            }
1288        } else if ch == '"' {
1289            if !current.is_empty() {
1290                tokens.push(Token::new(
1291                    current.clone(),
1292                    current_start_line,
1293                    current_start_col,
1294                ));
1295                current.clear();
1296            }
1297            in_string = true;
1298            current_start_line = line;
1299            current_start_col = col;
1300            current.push(ch);
1301            prev_was_backslash = false;
1302            col += 1;
1303        } else if ch.is_whitespace() {
1304            if !current.is_empty() {
1305                tokens.push(Token::new(
1306                    current.clone(),
1307                    current_start_line,
1308                    current_start_col,
1309                ));
1310                current.clear();
1311            }
1312            // Preserve newlines for comment handling
1313            if ch == '\n' {
1314                tokens.push(Token::new("\n".to_string(), line, col));
1315                line += 1;
1316                col = 0;
1317            } else {
1318                col += 1;
1319            }
1320        } else if "():;[]{},".contains(ch) {
1321            if !current.is_empty() {
1322                tokens.push(Token::new(
1323                    current.clone(),
1324                    current_start_line,
1325                    current_start_col,
1326                ));
1327                current.clear();
1328            }
1329            tokens.push(Token::new(ch.to_string(), line, col));
1330            col += 1;
1331        } else {
1332            if current.is_empty() {
1333                current_start_line = line;
1334                current_start_col = col;
1335            }
1336            current.push(ch);
1337            col += 1;
1338        }
1339    }
1340
1341    // Check for unclosed string literal
1342    if in_string {
1343        // Return error by adding a special error token
1344        // The parser will handle this as a parse error
1345        tokens.push(Token::new(
1346            "<<<UNCLOSED_STRING>>>".to_string(),
1347            current_start_line,
1348            current_start_col,
1349        ));
1350    } else if !current.is_empty() {
1351        tokens.push(Token::new(current, current_start_line, current_start_col));
1352    }
1353
1354    tokens
1355}
1356
1357#[cfg(test)]
1358mod tests {
1359    use super::*;
1360
1361    #[test]
1362    fn test_parse_hello_world() {
1363        let source = r#"
1364: main ( -- )
1365  "Hello, World!" write_line ;
1366"#;
1367
1368        let mut parser = Parser::new(source);
1369        let program = parser.parse().unwrap();
1370
1371        assert_eq!(program.words.len(), 1);
1372        assert_eq!(program.words[0].name, "main");
1373        assert_eq!(program.words[0].body.len(), 2);
1374
1375        match &program.words[0].body[0] {
1376            Statement::StringLiteral(s) => assert_eq!(s, "Hello, World!"),
1377            _ => panic!("Expected StringLiteral"),
1378        }
1379
1380        match &program.words[0].body[1] {
1381            Statement::WordCall { name, .. } => assert_eq!(name, "write_line"),
1382            _ => panic!("Expected WordCall"),
1383        }
1384    }
1385
1386    #[test]
1387    fn test_parse_with_numbers() {
1388        let source = ": add-example ( -- ) 2 3 add ;";
1389
1390        let mut parser = Parser::new(source);
1391        let program = parser.parse().unwrap();
1392
1393        assert_eq!(program.words[0].body.len(), 3);
1394        assert_eq!(program.words[0].body[0], Statement::IntLiteral(2));
1395        assert_eq!(program.words[0].body[1], Statement::IntLiteral(3));
1396        assert!(matches!(
1397            &program.words[0].body[2],
1398            Statement::WordCall { name, .. } if name == "add"
1399        ));
1400    }
1401
1402    #[test]
1403    fn test_parse_hex_literals() {
1404        let source = ": test ( -- ) 0xFF 0x10 0X1A ;";
1405        let mut parser = Parser::new(source);
1406        let program = parser.parse().unwrap();
1407
1408        assert_eq!(program.words[0].body[0], Statement::IntLiteral(255));
1409        assert_eq!(program.words[0].body[1], Statement::IntLiteral(16));
1410        assert_eq!(program.words[0].body[2], Statement::IntLiteral(26));
1411    }
1412
1413    #[test]
1414    fn test_parse_binary_literals() {
1415        let source = ": test ( -- ) 0b1010 0B1111 0b0 ;";
1416        let mut parser = Parser::new(source);
1417        let program = parser.parse().unwrap();
1418
1419        assert_eq!(program.words[0].body[0], Statement::IntLiteral(10));
1420        assert_eq!(program.words[0].body[1], Statement::IntLiteral(15));
1421        assert_eq!(program.words[0].body[2], Statement::IntLiteral(0));
1422    }
1423
1424    #[test]
1425    fn test_parse_invalid_hex_literal() {
1426        let source = ": test ( -- ) 0xGG ;";
1427        let mut parser = Parser::new(source);
1428        let err = parser.parse().unwrap_err();
1429        assert!(err.contains("Invalid hex literal"));
1430    }
1431
1432    #[test]
1433    fn test_parse_invalid_binary_literal() {
1434        let source = ": test ( -- ) 0b123 ;";
1435        let mut parser = Parser::new(source);
1436        let err = parser.parse().unwrap_err();
1437        assert!(err.contains("Invalid binary literal"));
1438    }
1439
1440    #[test]
1441    fn test_parse_escaped_quotes() {
1442        let source = r#": main ( -- ) "Say \"hello\" there" write_line ;"#;
1443
1444        let mut parser = Parser::new(source);
1445        let program = parser.parse().unwrap();
1446
1447        assert_eq!(program.words.len(), 1);
1448        assert_eq!(program.words[0].body.len(), 2);
1449
1450        match &program.words[0].body[0] {
1451            // Escape sequences should be processed: \" becomes actual quote
1452            Statement::StringLiteral(s) => assert_eq!(s, "Say \"hello\" there"),
1453            _ => panic!("Expected StringLiteral with escaped quotes"),
1454        }
1455    }
1456
1457    /// Regression test for issue #117: escaped quote at end of string
1458    /// Previously failed with "String ends with incomplete escape sequence"
1459    #[test]
1460    fn test_escaped_quote_at_end_of_string() {
1461        let source = r#": main ( -- ) "hello\"" io.write-line ;"#;
1462
1463        let mut parser = Parser::new(source);
1464        let program = parser.parse().unwrap();
1465
1466        assert_eq!(program.words.len(), 1);
1467        match &program.words[0].body[0] {
1468            Statement::StringLiteral(s) => assert_eq!(s, "hello\""),
1469            _ => panic!("Expected StringLiteral ending with escaped quote"),
1470        }
1471    }
1472
1473    /// Test escaped quote at start of string (boundary case)
1474    #[test]
1475    fn test_escaped_quote_at_start_of_string() {
1476        let source = r#": main ( -- ) "\"hello" io.write-line ;"#;
1477
1478        let mut parser = Parser::new(source);
1479        let program = parser.parse().unwrap();
1480
1481        match &program.words[0].body[0] {
1482            Statement::StringLiteral(s) => assert_eq!(s, "\"hello"),
1483            _ => panic!("Expected StringLiteral starting with escaped quote"),
1484        }
1485    }
1486
1487    #[test]
1488    fn test_escape_sequences() {
1489        let source = r#": main ( -- ) "Line 1\nLine 2\tTabbed" write_line ;"#;
1490
1491        let mut parser = Parser::new(source);
1492        let program = parser.parse().unwrap();
1493
1494        match &program.words[0].body[0] {
1495            Statement::StringLiteral(s) => assert_eq!(s, "Line 1\nLine 2\tTabbed"),
1496            _ => panic!("Expected StringLiteral"),
1497        }
1498    }
1499
1500    #[test]
1501    fn test_unknown_escape_sequence() {
1502        let source = r#": main ( -- ) "Bad \q sequence" write_line ;"#;
1503
1504        let mut parser = Parser::new(source);
1505        let result = parser.parse();
1506
1507        assert!(result.is_err());
1508        assert!(result.unwrap_err().contains("Unknown escape sequence"));
1509    }
1510
1511    #[test]
1512    fn test_hex_escape_sequence() {
1513        // \x1b is ESC (27), \x41 is 'A' (65)
1514        let source = r#": main ( -- ) "\x1b[2K\x41" io.write-line ;"#;
1515
1516        let mut parser = Parser::new(source);
1517        let program = parser.parse().unwrap();
1518
1519        match &program.words[0].body[0] {
1520            Statement::StringLiteral(s) => {
1521                assert_eq!(s.len(), 5); // ESC [ 2 K A
1522                assert_eq!(s.as_bytes()[0], 0x1b); // ESC
1523                assert_eq!(s.as_bytes()[4], 0x41); // 'A'
1524            }
1525            _ => panic!("Expected StringLiteral"),
1526        }
1527    }
1528
1529    #[test]
1530    fn test_hex_escape_null_byte() {
1531        let source = r#": main ( -- ) "before\x00after" io.write-line ;"#;
1532
1533        let mut parser = Parser::new(source);
1534        let program = parser.parse().unwrap();
1535
1536        match &program.words[0].body[0] {
1537            Statement::StringLiteral(s) => {
1538                assert_eq!(s.len(), 12); // "before" + NUL + "after"
1539                assert_eq!(s.as_bytes()[6], 0x00);
1540            }
1541            _ => panic!("Expected StringLiteral"),
1542        }
1543    }
1544
1545    #[test]
1546    fn test_hex_escape_uppercase() {
1547        // Both uppercase and lowercase hex digits should work
1548        // Note: Values > 0x7F become Unicode code points (U+00NN), multi-byte in UTF-8
1549        let source = r#": main ( -- ) "\x41\x42\x4F" io.write-line ;"#;
1550
1551        let mut parser = Parser::new(source);
1552        let program = parser.parse().unwrap();
1553
1554        match &program.words[0].body[0] {
1555            Statement::StringLiteral(s) => {
1556                assert_eq!(s, "ABO"); // 0x41='A', 0x42='B', 0x4F='O'
1557            }
1558            _ => panic!("Expected StringLiteral"),
1559        }
1560    }
1561
1562    #[test]
1563    fn test_hex_escape_high_bytes() {
1564        // Values > 0x7F become Unicode code points (Latin-1), which are multi-byte in UTF-8
1565        let source = r#": main ( -- ) "\xFF" io.write-line ;"#;
1566
1567        let mut parser = Parser::new(source);
1568        let program = parser.parse().unwrap();
1569
1570        match &program.words[0].body[0] {
1571            Statement::StringLiteral(s) => {
1572                // \xFF becomes U+00FF (ÿ), which is 2 bytes in UTF-8: C3 BF
1573                assert_eq!(s, "\u{00FF}");
1574                assert_eq!(s.chars().next().unwrap(), 'ÿ');
1575            }
1576            _ => panic!("Expected StringLiteral"),
1577        }
1578    }
1579
1580    #[test]
1581    fn test_hex_escape_incomplete() {
1582        // \x with only one hex digit
1583        let source = r#": main ( -- ) "\x1" io.write-line ;"#;
1584
1585        let mut parser = Parser::new(source);
1586        let result = parser.parse();
1587
1588        assert!(result.is_err());
1589        assert!(result.unwrap_err().contains("Incomplete hex escape"));
1590    }
1591
1592    #[test]
1593    fn test_hex_escape_invalid_digits() {
1594        // \xGG is not valid hex
1595        let source = r#": main ( -- ) "\xGG" io.write-line ;"#;
1596
1597        let mut parser = Parser::new(source);
1598        let result = parser.parse();
1599
1600        assert!(result.is_err());
1601        assert!(result.unwrap_err().contains("Invalid hex escape"));
1602    }
1603
1604    #[test]
1605    fn test_hex_escape_at_end_of_string() {
1606        // \x at end of string with no digits
1607        let source = r#": main ( -- ) "test\x" io.write-line ;"#;
1608
1609        let mut parser = Parser::new(source);
1610        let result = parser.parse();
1611
1612        assert!(result.is_err());
1613        assert!(result.unwrap_err().contains("Incomplete hex escape"));
1614    }
1615
1616    #[test]
1617    fn test_unclosed_string_literal() {
1618        let source = r#": main ( -- ) "unclosed string ;"#;
1619
1620        let mut parser = Parser::new(source);
1621        let result = parser.parse();
1622
1623        assert!(result.is_err());
1624        let err_msg = result.unwrap_err();
1625        assert!(err_msg.contains("Unclosed string literal"));
1626        // Should include position information (line 1, column 15 for the opening quote)
1627        assert!(
1628            err_msg.contains("line 1"),
1629            "Expected line number in error: {}",
1630            err_msg
1631        );
1632        assert!(
1633            err_msg.contains("column 15"),
1634            "Expected column number in error: {}",
1635            err_msg
1636        );
1637    }
1638
1639    #[test]
1640    fn test_multiple_word_definitions() {
1641        let source = r#"
1642: double ( Int -- Int )
1643  2 multiply ;
1644
1645: quadruple ( Int -- Int )
1646  double double ;
1647"#;
1648
1649        let mut parser = Parser::new(source);
1650        let program = parser.parse().unwrap();
1651
1652        assert_eq!(program.words.len(), 2);
1653        assert_eq!(program.words[0].name, "double");
1654        assert_eq!(program.words[1].name, "quadruple");
1655
1656        // Verify stack effects were parsed
1657        assert!(program.words[0].effect.is_some());
1658        assert!(program.words[1].effect.is_some());
1659    }
1660
1661    #[test]
1662    fn test_user_word_calling_user_word() {
1663        let source = r#"
1664: helper ( -- )
1665  "helper called" write_line ;
1666
1667: main ( -- )
1668  helper ;
1669"#;
1670
1671        let mut parser = Parser::new(source);
1672        let program = parser.parse().unwrap();
1673
1674        assert_eq!(program.words.len(), 2);
1675
1676        // Check main calls helper
1677        match &program.words[1].body[0] {
1678            Statement::WordCall { name, .. } => assert_eq!(name, "helper"),
1679            _ => panic!("Expected WordCall to helper"),
1680        }
1681    }
1682
1683    #[test]
1684    fn test_parse_simple_stack_effect() {
1685        // Test: ( Int -- Bool )
1686        // With implicit row polymorphism, this becomes: ( ..rest Int -- ..rest Bool )
1687        let source = ": test ( Int -- Bool ) 1 ;";
1688        let mut parser = Parser::new(source);
1689        let program = parser.parse().unwrap();
1690
1691        assert_eq!(program.words.len(), 1);
1692        let word = &program.words[0];
1693        assert!(word.effect.is_some());
1694
1695        let effect = word.effect.as_ref().unwrap();
1696
1697        // Input: Int on RowVar("rest") (implicit row polymorphism)
1698        assert_eq!(
1699            effect.inputs,
1700            StackType::Cons {
1701                rest: Box::new(StackType::RowVar("rest".to_string())),
1702                top: Type::Int
1703            }
1704        );
1705
1706        // Output: Bool on RowVar("rest") (implicit row polymorphism)
1707        assert_eq!(
1708            effect.outputs,
1709            StackType::Cons {
1710                rest: Box::new(StackType::RowVar("rest".to_string())),
1711                top: Type::Bool
1712            }
1713        );
1714    }
1715
1716    #[test]
1717    fn test_parse_row_polymorphic_stack_effect() {
1718        // Test: ( ..a Int -- ..a Bool )
1719        let source = ": test ( ..a Int -- ..a Bool ) 1 ;";
1720        let mut parser = Parser::new(source);
1721        let program = parser.parse().unwrap();
1722
1723        assert_eq!(program.words.len(), 1);
1724        let word = &program.words[0];
1725        assert!(word.effect.is_some());
1726
1727        let effect = word.effect.as_ref().unwrap();
1728
1729        // Input: Int on RowVar("a")
1730        assert_eq!(
1731            effect.inputs,
1732            StackType::Cons {
1733                rest: Box::new(StackType::RowVar("a".to_string())),
1734                top: Type::Int
1735            }
1736        );
1737
1738        // Output: Bool on RowVar("a")
1739        assert_eq!(
1740            effect.outputs,
1741            StackType::Cons {
1742                rest: Box::new(StackType::RowVar("a".to_string())),
1743                top: Type::Bool
1744            }
1745        );
1746    }
1747
1748    #[test]
1749    fn test_parse_invalid_row_var_starts_with_digit() {
1750        // Test: Row variable cannot start with digit
1751        let source = ": test ( ..123 Int -- ) ;";
1752        let mut parser = Parser::new(source);
1753        let result = parser.parse();
1754
1755        assert!(result.is_err());
1756        let err_msg = result.unwrap_err();
1757        assert!(
1758            err_msg.contains("lowercase letter"),
1759            "Expected error about lowercase letter, got: {}",
1760            err_msg
1761        );
1762    }
1763
1764    #[test]
1765    fn test_parse_invalid_row_var_starts_with_uppercase() {
1766        // Test: Row variable cannot start with uppercase (that's a type variable)
1767        let source = ": test ( ..Int Int -- ) ;";
1768        let mut parser = Parser::new(source);
1769        let result = parser.parse();
1770
1771        assert!(result.is_err());
1772        let err_msg = result.unwrap_err();
1773        assert!(
1774            err_msg.contains("lowercase letter") || err_msg.contains("type name"),
1775            "Expected error about lowercase letter or type name, got: {}",
1776            err_msg
1777        );
1778    }
1779
1780    #[test]
1781    fn test_parse_invalid_row_var_with_special_chars() {
1782        // Test: Row variable cannot contain special characters
1783        let source = ": test ( ..a-b Int -- ) ;";
1784        let mut parser = Parser::new(source);
1785        let result = parser.parse();
1786
1787        assert!(result.is_err());
1788        let err_msg = result.unwrap_err();
1789        assert!(
1790            err_msg.contains("letters, numbers, and underscores")
1791                || err_msg.contains("Unknown type"),
1792            "Expected error about valid characters, got: {}",
1793            err_msg
1794        );
1795    }
1796
1797    #[test]
1798    fn test_parse_valid_row_var_with_underscore() {
1799        // Test: Row variable CAN contain underscore
1800        let source = ": test ( ..my_row Int -- ..my_row Bool ) ;";
1801        let mut parser = Parser::new(source);
1802        let result = parser.parse();
1803
1804        assert!(result.is_ok(), "Should accept row variable with underscore");
1805    }
1806
1807    #[test]
1808    fn test_parse_multiple_types_stack_effect() {
1809        // Test: ( Int String -- Bool )
1810        // With implicit row polymorphism: ( ..rest Int String -- ..rest Bool )
1811        let source = ": test ( Int String -- Bool ) 1 ;";
1812        let mut parser = Parser::new(source);
1813        let program = parser.parse().unwrap();
1814
1815        let effect = program.words[0].effect.as_ref().unwrap();
1816
1817        // Input: String on Int on RowVar("rest")
1818        let (rest, top) = effect.inputs.clone().pop().unwrap();
1819        assert_eq!(top, Type::String);
1820        let (rest2, top2) = rest.pop().unwrap();
1821        assert_eq!(top2, Type::Int);
1822        assert_eq!(rest2, StackType::RowVar("rest".to_string()));
1823
1824        // Output: Bool on RowVar("rest") (implicit row polymorphism)
1825        assert_eq!(
1826            effect.outputs,
1827            StackType::Cons {
1828                rest: Box::new(StackType::RowVar("rest".to_string())),
1829                top: Type::Bool
1830            }
1831        );
1832    }
1833
1834    #[test]
1835    fn test_parse_type_variable() {
1836        // Test: ( ..a T -- ..a T T ) for dup
1837        let source = ": dup ( ..a T -- ..a T T ) ;";
1838        let mut parser = Parser::new(source);
1839        let program = parser.parse().unwrap();
1840
1841        let effect = program.words[0].effect.as_ref().unwrap();
1842
1843        // Input: T on RowVar("a")
1844        assert_eq!(
1845            effect.inputs,
1846            StackType::Cons {
1847                rest: Box::new(StackType::RowVar("a".to_string())),
1848                top: Type::Var("T".to_string())
1849            }
1850        );
1851
1852        // Output: T on T on RowVar("a")
1853        let (rest, top) = effect.outputs.clone().pop().unwrap();
1854        assert_eq!(top, Type::Var("T".to_string()));
1855        let (rest2, top2) = rest.pop().unwrap();
1856        assert_eq!(top2, Type::Var("T".to_string()));
1857        assert_eq!(rest2, StackType::RowVar("a".to_string()));
1858    }
1859
1860    #[test]
1861    fn test_parse_empty_stack_effect() {
1862        // Test: ( -- )
1863        // In concatenative languages, even empty effects are row-polymorphic
1864        // ( -- ) means ( ..rest -- ..rest ) - preserves stack
1865        let source = ": test ( -- ) ;";
1866        let mut parser = Parser::new(source);
1867        let program = parser.parse().unwrap();
1868
1869        let effect = program.words[0].effect.as_ref().unwrap();
1870
1871        // Both inputs and outputs should use the same implicit row variable
1872        assert_eq!(effect.inputs, StackType::RowVar("rest".to_string()));
1873        assert_eq!(effect.outputs, StackType::RowVar("rest".to_string()));
1874    }
1875
1876    #[test]
1877    fn test_parse_invalid_type() {
1878        // Test invalid type (lowercase, not a row var)
1879        let source = ": test ( invalid -- Bool ) ;";
1880        let mut parser = Parser::new(source);
1881        let result = parser.parse();
1882
1883        assert!(result.is_err());
1884        assert!(result.unwrap_err().contains("Unknown type"));
1885    }
1886
1887    #[test]
1888    fn test_parse_unclosed_stack_effect() {
1889        // Test unclosed stack effect - parser tries to parse all tokens until ')' or EOF
1890        // In this case, it encounters "body" which is an invalid type
1891        let source = ": test ( Int -- Bool body ;";
1892        let mut parser = Parser::new(source);
1893        let result = parser.parse();
1894
1895        assert!(result.is_err());
1896        let err_msg = result.unwrap_err();
1897        // Parser will try to parse "body" as a type and fail
1898        assert!(err_msg.contains("Unknown type"));
1899    }
1900
1901    #[test]
1902    fn test_parse_simple_quotation_type() {
1903        // Test: ( [Int -- Int] -- )
1904        let source = ": apply ( [Int -- Int] -- ) ;";
1905        let mut parser = Parser::new(source);
1906        let program = parser.parse().unwrap();
1907
1908        let effect = program.words[0].effect.as_ref().unwrap();
1909
1910        // Input should be: Quotation(Int -- Int) on RowVar("rest")
1911        let (rest, top) = effect.inputs.clone().pop().unwrap();
1912        match top {
1913            Type::Quotation(quot_effect) => {
1914                // Check quotation's input: Int on RowVar("rest")
1915                assert_eq!(
1916                    quot_effect.inputs,
1917                    StackType::Cons {
1918                        rest: Box::new(StackType::RowVar("rest".to_string())),
1919                        top: Type::Int
1920                    }
1921                );
1922                // Check quotation's output: Int on RowVar("rest")
1923                assert_eq!(
1924                    quot_effect.outputs,
1925                    StackType::Cons {
1926                        rest: Box::new(StackType::RowVar("rest".to_string())),
1927                        top: Type::Int
1928                    }
1929                );
1930            }
1931            _ => panic!("Expected Quotation type, got {:?}", top),
1932        }
1933        assert_eq!(rest, StackType::RowVar("rest".to_string()));
1934    }
1935
1936    #[test]
1937    fn test_parse_quotation_type_with_row_vars() {
1938        // Test: ( ..a [..a T -- ..a Bool] -- ..a )
1939        let source = ": test ( ..a [..a T -- ..a Bool] -- ..a ) ;";
1940        let mut parser = Parser::new(source);
1941        let program = parser.parse().unwrap();
1942
1943        let effect = program.words[0].effect.as_ref().unwrap();
1944
1945        // Input: Quotation on RowVar("a")
1946        let (rest, top) = effect.inputs.clone().pop().unwrap();
1947        match top {
1948            Type::Quotation(quot_effect) => {
1949                // Check quotation's input: T on RowVar("a")
1950                let (q_in_rest, q_in_top) = quot_effect.inputs.clone().pop().unwrap();
1951                assert_eq!(q_in_top, Type::Var("T".to_string()));
1952                assert_eq!(q_in_rest, StackType::RowVar("a".to_string()));
1953
1954                // Check quotation's output: Bool on RowVar("a")
1955                let (q_out_rest, q_out_top) = quot_effect.outputs.clone().pop().unwrap();
1956                assert_eq!(q_out_top, Type::Bool);
1957                assert_eq!(q_out_rest, StackType::RowVar("a".to_string()));
1958            }
1959            _ => panic!("Expected Quotation type, got {:?}", top),
1960        }
1961        assert_eq!(rest, StackType::RowVar("a".to_string()));
1962    }
1963
1964    #[test]
1965    fn test_parse_nested_quotation_type() {
1966        // Test: ( [[Int -- Int] -- Bool] -- )
1967        let source = ": nested ( [[Int -- Int] -- Bool] -- ) ;";
1968        let mut parser = Parser::new(source);
1969        let program = parser.parse().unwrap();
1970
1971        let effect = program.words[0].effect.as_ref().unwrap();
1972
1973        // Input: Quotation([Int -- Int] -- Bool) on RowVar("rest")
1974        let (_, top) = effect.inputs.clone().pop().unwrap();
1975        match top {
1976            Type::Quotation(outer_effect) => {
1977                // Outer quotation input: [Int -- Int] on RowVar("rest")
1978                let (_, outer_in_top) = outer_effect.inputs.clone().pop().unwrap();
1979                match outer_in_top {
1980                    Type::Quotation(inner_effect) => {
1981                        // Inner quotation: Int -- Int
1982                        assert!(matches!(
1983                            inner_effect.inputs.clone().pop().unwrap().1,
1984                            Type::Int
1985                        ));
1986                        assert!(matches!(
1987                            inner_effect.outputs.clone().pop().unwrap().1,
1988                            Type::Int
1989                        ));
1990                    }
1991                    _ => panic!("Expected nested Quotation type"),
1992                }
1993
1994                // Outer quotation output: Bool
1995                let (_, outer_out_top) = outer_effect.outputs.clone().pop().unwrap();
1996                assert_eq!(outer_out_top, Type::Bool);
1997            }
1998            _ => panic!("Expected Quotation type"),
1999        }
2000    }
2001
2002    #[test]
2003    fn test_parse_deeply_nested_quotation_type_exceeds_limit() {
2004        // Test: Deeply nested quotation types should fail with max depth error
2005        // Build a quotation type nested 35 levels deep (exceeds MAX_QUOTATION_DEPTH = 32)
2006        let mut source = String::from(": deep ( ");
2007
2008        // Build opening brackets: [[[[[[...
2009        for _ in 0..35 {
2010            source.push_str("[ -- ");
2011        }
2012
2013        source.push_str("Int");
2014
2015        // Build closing brackets: ...]]]]]]
2016        for _ in 0..35 {
2017            source.push_str(" ]");
2018        }
2019
2020        source.push_str(" -- ) ;");
2021
2022        let mut parser = Parser::new(&source);
2023        let result = parser.parse();
2024
2025        // Should fail with depth limit error
2026        assert!(result.is_err());
2027        let err_msg = result.unwrap_err();
2028        assert!(
2029            err_msg.contains("depth") || err_msg.contains("32"),
2030            "Expected depth limit error, got: {}",
2031            err_msg
2032        );
2033    }
2034
2035    #[test]
2036    fn test_parse_empty_quotation_type() {
2037        // Test: ( [ -- ] -- )
2038        // An empty quotation type is also row-polymorphic: [ ..rest -- ..rest ]
2039        let source = ": empty-quot ( [ -- ] -- ) ;";
2040        let mut parser = Parser::new(source);
2041        let program = parser.parse().unwrap();
2042
2043        let effect = program.words[0].effect.as_ref().unwrap();
2044
2045        let (_, top) = effect.inputs.clone().pop().unwrap();
2046        match top {
2047            Type::Quotation(quot_effect) => {
2048                // Empty quotation preserves the stack (row-polymorphic)
2049                assert_eq!(quot_effect.inputs, StackType::RowVar("rest".to_string()));
2050                assert_eq!(quot_effect.outputs, StackType::RowVar("rest".to_string()));
2051            }
2052            _ => panic!("Expected Quotation type"),
2053        }
2054    }
2055
2056    #[test]
2057    fn test_parse_quotation_type_in_output() {
2058        // Test: ( -- [Int -- Int] )
2059        let source = ": maker ( -- [Int -- Int] ) ;";
2060        let mut parser = Parser::new(source);
2061        let program = parser.parse().unwrap();
2062
2063        let effect = program.words[0].effect.as_ref().unwrap();
2064
2065        // Output should be: Quotation(Int -- Int) on RowVar("rest")
2066        let (_, top) = effect.outputs.clone().pop().unwrap();
2067        match top {
2068            Type::Quotation(quot_effect) => {
2069                assert!(matches!(
2070                    quot_effect.inputs.clone().pop().unwrap().1,
2071                    Type::Int
2072                ));
2073                assert!(matches!(
2074                    quot_effect.outputs.clone().pop().unwrap().1,
2075                    Type::Int
2076                ));
2077            }
2078            _ => panic!("Expected Quotation type"),
2079        }
2080    }
2081
2082    #[test]
2083    fn test_parse_unclosed_quotation_type() {
2084        // Test: ( [Int -- Int -- )  (missing ])
2085        let source = ": broken ( [Int -- Int -- ) ;";
2086        let mut parser = Parser::new(source);
2087        let result = parser.parse();
2088
2089        assert!(result.is_err());
2090        let err_msg = result.unwrap_err();
2091        // Parser might error with various messages depending on where it fails
2092        // It should at least indicate a parsing problem
2093        assert!(
2094            err_msg.contains("Unclosed")
2095                || err_msg.contains("Expected")
2096                || err_msg.contains("Unexpected"),
2097            "Got error: {}",
2098            err_msg
2099        );
2100    }
2101
2102    #[test]
2103    fn test_parse_multiple_quotation_types() {
2104        // Test: ( [Int -- Int] [String -- Bool] -- )
2105        let source = ": multi ( [Int -- Int] [String -- Bool] -- ) ;";
2106        let mut parser = Parser::new(source);
2107        let program = parser.parse().unwrap();
2108
2109        let effect = program.words[0].effect.as_ref().unwrap();
2110
2111        // Pop second quotation (String -- Bool)
2112        let (rest, top) = effect.inputs.clone().pop().unwrap();
2113        match top {
2114            Type::Quotation(quot_effect) => {
2115                assert!(matches!(
2116                    quot_effect.inputs.clone().pop().unwrap().1,
2117                    Type::String
2118                ));
2119                assert!(matches!(
2120                    quot_effect.outputs.clone().pop().unwrap().1,
2121                    Type::Bool
2122                ));
2123            }
2124            _ => panic!("Expected Quotation type"),
2125        }
2126
2127        // Pop first quotation (Int -- Int)
2128        let (_, top2) = rest.pop().unwrap();
2129        match top2 {
2130            Type::Quotation(quot_effect) => {
2131                assert!(matches!(
2132                    quot_effect.inputs.clone().pop().unwrap().1,
2133                    Type::Int
2134                ));
2135                assert!(matches!(
2136                    quot_effect.outputs.clone().pop().unwrap().1,
2137                    Type::Int
2138                ));
2139            }
2140            _ => panic!("Expected Quotation type"),
2141        }
2142    }
2143
2144    #[test]
2145    fn test_parse_quotation_type_without_separator() {
2146        // Test: ( [Int] -- ) should be REJECTED
2147        //
2148        // Design decision: The '--' separator is REQUIRED for clarity.
2149        // [Int] looks like a list type in most languages, not a consumer function.
2150        // This would confuse users.
2151        //
2152        // Require explicit syntax:
2153        // - `[Int -- ]` for quotation that consumes Int and produces nothing
2154        // - `[ -- Int]` for quotation that produces Int
2155        // - `[Int -- Int]` for transformation
2156        let source = ": consumer ( [Int] -- ) ;";
2157        let mut parser = Parser::new(source);
2158        let result = parser.parse();
2159
2160        // Should fail with helpful error message
2161        assert!(result.is_err());
2162        let err_msg = result.unwrap_err();
2163        assert!(
2164            err_msg.contains("require") && err_msg.contains("--"),
2165            "Expected error about missing '--' separator, got: {}",
2166            err_msg
2167        );
2168    }
2169
2170    #[test]
2171    fn test_parse_bare_quotation_type_rejected() {
2172        // Test: ( Int Quotation -- Int ) should be REJECTED
2173        //
2174        // 'Quotation' looks like a type name but would be silently treated as a
2175        // type variable without this check. Users must use explicit effect syntax.
2176        let source = ": apply-twice ( Int Quotation -- Int ) ;";
2177        let mut parser = Parser::new(source);
2178        let result = parser.parse();
2179
2180        assert!(result.is_err());
2181        let err_msg = result.unwrap_err();
2182        assert!(
2183            err_msg.contains("Quotation") && err_msg.contains("not a valid type"),
2184            "Expected error about 'Quotation' not being valid, got: {}",
2185            err_msg
2186        );
2187        assert!(
2188            err_msg.contains("[Int -- Int]") || err_msg.contains("[ -- ]"),
2189            "Expected error to suggest explicit syntax, got: {}",
2190            err_msg
2191        );
2192    }
2193
2194    #[test]
2195    fn test_parse_no_stack_effect() {
2196        // Test word without stack effect (should still work)
2197        let source = ": test 1 2 add ;";
2198        let mut parser = Parser::new(source);
2199        let program = parser.parse().unwrap();
2200
2201        assert_eq!(program.words.len(), 1);
2202        assert!(program.words[0].effect.is_none());
2203    }
2204
2205    #[test]
2206    fn test_parse_simple_quotation() {
2207        let source = r#"
2208: test ( -- Quot )
2209  [ 1 add ] ;
2210"#;
2211
2212        let mut parser = Parser::new(source);
2213        let program = parser.parse().unwrap();
2214
2215        assert_eq!(program.words.len(), 1);
2216        assert_eq!(program.words[0].name, "test");
2217        assert_eq!(program.words[0].body.len(), 1);
2218
2219        match &program.words[0].body[0] {
2220            Statement::Quotation { body, .. } => {
2221                assert_eq!(body.len(), 2);
2222                assert_eq!(body[0], Statement::IntLiteral(1));
2223                assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "add"));
2224            }
2225            _ => panic!("Expected Quotation statement"),
2226        }
2227    }
2228
2229    #[test]
2230    fn test_parse_empty_quotation() {
2231        let source = ": test [ ] ;";
2232
2233        let mut parser = Parser::new(source);
2234        let program = parser.parse().unwrap();
2235
2236        assert_eq!(program.words.len(), 1);
2237
2238        match &program.words[0].body[0] {
2239            Statement::Quotation { body, .. } => {
2240                assert_eq!(body.len(), 0);
2241            }
2242            _ => panic!("Expected Quotation statement"),
2243        }
2244    }
2245
2246    #[test]
2247    fn test_parse_quotation_with_call() {
2248        let source = r#"
2249: test ( -- )
2250  5 [ 1 add ] call ;
2251"#;
2252
2253        let mut parser = Parser::new(source);
2254        let program = parser.parse().unwrap();
2255
2256        assert_eq!(program.words.len(), 1);
2257        assert_eq!(program.words[0].body.len(), 3);
2258
2259        assert_eq!(program.words[0].body[0], Statement::IntLiteral(5));
2260
2261        match &program.words[0].body[1] {
2262            Statement::Quotation { body, .. } => {
2263                assert_eq!(body.len(), 2);
2264            }
2265            _ => panic!("Expected Quotation"),
2266        }
2267
2268        assert!(matches!(
2269            &program.words[0].body[2],
2270            Statement::WordCall { name, .. } if name == "call"
2271        ));
2272    }
2273
2274    #[test]
2275    fn test_parse_nested_quotation() {
2276        let source = ": test [ [ 1 add ] call ] ;";
2277
2278        let mut parser = Parser::new(source);
2279        let program = parser.parse().unwrap();
2280
2281        assert_eq!(program.words.len(), 1);
2282
2283        match &program.words[0].body[0] {
2284            Statement::Quotation {
2285                body: outer_body, ..
2286            } => {
2287                assert_eq!(outer_body.len(), 2);
2288
2289                match &outer_body[0] {
2290                    Statement::Quotation {
2291                        body: inner_body, ..
2292                    } => {
2293                        assert_eq!(inner_body.len(), 2);
2294                        assert_eq!(inner_body[0], Statement::IntLiteral(1));
2295                        assert!(
2296                            matches!(&inner_body[1], Statement::WordCall { name, .. } if name == "add")
2297                        );
2298                    }
2299                    _ => panic!("Expected nested Quotation"),
2300                }
2301
2302                assert!(
2303                    matches!(&outer_body[1], Statement::WordCall { name, .. } if name == "call")
2304                );
2305            }
2306            _ => panic!("Expected Quotation"),
2307        }
2308    }
2309
2310    #[test]
2311    fn test_parse_while_with_quotations() {
2312        let source = r#"
2313: countdown ( Int -- )
2314  [ dup 0 > ] [ 1 subtract ] while drop ;
2315"#;
2316
2317        let mut parser = Parser::new(source);
2318        let program = parser.parse().unwrap();
2319
2320        assert_eq!(program.words.len(), 1);
2321        assert_eq!(program.words[0].body.len(), 4);
2322
2323        // First quotation: [ dup 0 > ]
2324        match &program.words[0].body[0] {
2325            Statement::Quotation { body: pred, .. } => {
2326                assert_eq!(pred.len(), 3);
2327                assert!(matches!(&pred[0], Statement::WordCall { name, .. } if name == "dup"));
2328                assert_eq!(pred[1], Statement::IntLiteral(0));
2329                assert!(matches!(&pred[2], Statement::WordCall { name, .. } if name == ">"));
2330            }
2331            _ => panic!("Expected predicate quotation"),
2332        }
2333
2334        // Second quotation: [ 1 subtract ]
2335        match &program.words[0].body[1] {
2336            Statement::Quotation { body, .. } => {
2337                assert_eq!(body.len(), 2);
2338                assert_eq!(body[0], Statement::IntLiteral(1));
2339                assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "subtract"));
2340            }
2341            _ => panic!("Expected body quotation"),
2342        }
2343
2344        // while call
2345        assert!(matches!(
2346            &program.words[0].body[2],
2347            Statement::WordCall { name, .. } if name == "while"
2348        ));
2349
2350        // drop
2351        assert!(matches!(
2352            &program.words[0].body[3],
2353            Statement::WordCall { name, .. } if name == "drop"
2354        ));
2355    }
2356
2357    #[test]
2358    fn test_parse_simple_closure_type() {
2359        // Test: ( Int -- Closure[Int -- Int] )
2360        let source = ": make-adder ( Int -- Closure[Int -- Int] ) ;";
2361        let mut parser = Parser::new(source);
2362        let program = parser.parse().unwrap();
2363
2364        assert_eq!(program.words.len(), 1);
2365        let word = &program.words[0];
2366        assert!(word.effect.is_some());
2367
2368        let effect = word.effect.as_ref().unwrap();
2369
2370        // Input: Int on RowVar("rest")
2371        let (input_rest, input_top) = effect.inputs.clone().pop().unwrap();
2372        assert_eq!(input_top, Type::Int);
2373        assert_eq!(input_rest, StackType::RowVar("rest".to_string()));
2374
2375        // Output: Closure[Int -- Int] on RowVar("rest")
2376        let (output_rest, output_top) = effect.outputs.clone().pop().unwrap();
2377        match output_top {
2378            Type::Closure { effect, captures } => {
2379                // Closure effect: Int -> Int
2380                assert_eq!(
2381                    effect.inputs,
2382                    StackType::Cons {
2383                        rest: Box::new(StackType::RowVar("rest".to_string())),
2384                        top: Type::Int
2385                    }
2386                );
2387                assert_eq!(
2388                    effect.outputs,
2389                    StackType::Cons {
2390                        rest: Box::new(StackType::RowVar("rest".to_string())),
2391                        top: Type::Int
2392                    }
2393                );
2394                // Captures should be empty (filled in by type checker)
2395                assert_eq!(captures.len(), 0);
2396            }
2397            _ => panic!("Expected Closure type, got {:?}", output_top),
2398        }
2399        assert_eq!(output_rest, StackType::RowVar("rest".to_string()));
2400    }
2401
2402    #[test]
2403    fn test_parse_closure_type_with_row_vars() {
2404        // Test: ( ..a Config -- ..a Closure[Request -- Response] )
2405        let source = ": make-handler ( ..a Config -- ..a Closure[Request -- Response] ) ;";
2406        let mut parser = Parser::new(source);
2407        let program = parser.parse().unwrap();
2408
2409        let effect = program.words[0].effect.as_ref().unwrap();
2410
2411        // Output: Closure on RowVar("a")
2412        let (rest, top) = effect.outputs.clone().pop().unwrap();
2413        match top {
2414            Type::Closure { effect, .. } => {
2415                // Closure effect: Request -> Response
2416                let (_, in_top) = effect.inputs.clone().pop().unwrap();
2417                assert_eq!(in_top, Type::Var("Request".to_string()));
2418                let (_, out_top) = effect.outputs.clone().pop().unwrap();
2419                assert_eq!(out_top, Type::Var("Response".to_string()));
2420            }
2421            _ => panic!("Expected Closure type"),
2422        }
2423        assert_eq!(rest, StackType::RowVar("a".to_string()));
2424    }
2425
2426    #[test]
2427    fn test_parse_closure_type_missing_bracket() {
2428        // Test: ( Int -- Closure ) should fail
2429        let source = ": broken ( Int -- Closure ) ;";
2430        let mut parser = Parser::new(source);
2431        let result = parser.parse();
2432
2433        assert!(result.is_err());
2434        let err_msg = result.unwrap_err();
2435        assert!(
2436            err_msg.contains("[") && err_msg.contains("Closure"),
2437            "Expected error about missing '[' after Closure, got: {}",
2438            err_msg
2439        );
2440    }
2441
2442    #[test]
2443    fn test_parse_closure_type_in_input() {
2444        // Test: ( Closure[Int -- Int] -- )
2445        let source = ": apply-closure ( Closure[Int -- Int] -- ) ;";
2446        let mut parser = Parser::new(source);
2447        let program = parser.parse().unwrap();
2448
2449        let effect = program.words[0].effect.as_ref().unwrap();
2450
2451        // Input: Closure[Int -- Int] on RowVar("rest")
2452        let (_, top) = effect.inputs.clone().pop().unwrap();
2453        match top {
2454            Type::Closure { effect, .. } => {
2455                // Verify closure effect
2456                assert!(matches!(effect.inputs.clone().pop().unwrap().1, Type::Int));
2457                assert!(matches!(effect.outputs.clone().pop().unwrap().1, Type::Int));
2458            }
2459            _ => panic!("Expected Closure type in input"),
2460        }
2461    }
2462
2463    // Tests for token position tracking
2464
2465    #[test]
2466    fn test_token_position_single_line() {
2467        // Test token positions on a single line
2468        let source = ": main ( -- ) ;";
2469        let tokens = tokenize(source);
2470
2471        // : is at line 0, column 0
2472        assert_eq!(tokens[0].text, ":");
2473        assert_eq!(tokens[0].line, 0);
2474        assert_eq!(tokens[0].column, 0);
2475
2476        // main is at line 0, column 2
2477        assert_eq!(tokens[1].text, "main");
2478        assert_eq!(tokens[1].line, 0);
2479        assert_eq!(tokens[1].column, 2);
2480
2481        // ( is at line 0, column 7
2482        assert_eq!(tokens[2].text, "(");
2483        assert_eq!(tokens[2].line, 0);
2484        assert_eq!(tokens[2].column, 7);
2485    }
2486
2487    #[test]
2488    fn test_token_position_multiline() {
2489        // Test token positions across multiple lines
2490        let source = ": main ( -- )\n  42\n;";
2491        let tokens = tokenize(source);
2492
2493        // Find the 42 token (after the newline)
2494        let token_42 = tokens.iter().find(|t| t.text == "42").unwrap();
2495        assert_eq!(token_42.line, 1);
2496        assert_eq!(token_42.column, 2); // After 2 spaces of indentation
2497
2498        // Find the ; token (on line 2)
2499        let token_semi = tokens.iter().find(|t| t.text == ";").unwrap();
2500        assert_eq!(token_semi.line, 2);
2501        assert_eq!(token_semi.column, 0);
2502    }
2503
2504    #[test]
2505    fn test_word_def_source_location_span() {
2506        // Test that word definitions capture correct start and end lines
2507        let source = r#": helper ( -- )
2508  "hello"
2509  write_line
2510;
2511
2512: main ( -- )
2513  helper
2514;"#;
2515
2516        let mut parser = Parser::new(source);
2517        let program = parser.parse().unwrap();
2518
2519        assert_eq!(program.words.len(), 2);
2520
2521        // First word: helper spans lines 0-3
2522        let helper = &program.words[0];
2523        assert_eq!(helper.name, "helper");
2524        let helper_source = helper.source.as_ref().unwrap();
2525        assert_eq!(helper_source.start_line, 0);
2526        assert_eq!(helper_source.end_line, 3);
2527
2528        // Second word: main spans lines 5-7
2529        let main_word = &program.words[1];
2530        assert_eq!(main_word.name, "main");
2531        let main_source = main_word.source.as_ref().unwrap();
2532        assert_eq!(main_source.start_line, 5);
2533        assert_eq!(main_source.end_line, 7);
2534    }
2535
2536    #[test]
2537    fn test_token_position_string_with_newline() {
2538        // Test that newlines inside strings are tracked correctly
2539        let source = "\"line1\\nline2\"";
2540        let tokens = tokenize(source);
2541
2542        // The string token should start at line 0, column 0
2543        assert_eq!(tokens.len(), 1);
2544        assert_eq!(tokens[0].line, 0);
2545        assert_eq!(tokens[0].column, 0);
2546    }
2547
2548    // ============================================================================
2549    //                         ADT PARSING TESTS
2550    // ============================================================================
2551
2552    #[test]
2553    fn test_parse_simple_union() {
2554        let source = r#"
2555union Message {
2556  Get { response-chan: Int }
2557  Set { value: Int }
2558}
2559
2560: main ( -- ) ;
2561"#;
2562
2563        let mut parser = Parser::new(source);
2564        let program = parser.parse().unwrap();
2565
2566        assert_eq!(program.unions.len(), 1);
2567        let union_def = &program.unions[0];
2568        assert_eq!(union_def.name, "Message");
2569        assert_eq!(union_def.variants.len(), 2);
2570
2571        // Check first variant
2572        assert_eq!(union_def.variants[0].name, "Get");
2573        assert_eq!(union_def.variants[0].fields.len(), 1);
2574        assert_eq!(union_def.variants[0].fields[0].name, "response-chan");
2575        assert_eq!(union_def.variants[0].fields[0].type_name, "Int");
2576
2577        // Check second variant
2578        assert_eq!(union_def.variants[1].name, "Set");
2579        assert_eq!(union_def.variants[1].fields.len(), 1);
2580        assert_eq!(union_def.variants[1].fields[0].name, "value");
2581        assert_eq!(union_def.variants[1].fields[0].type_name, "Int");
2582    }
2583
2584    #[test]
2585    fn test_parse_union_with_multiple_fields() {
2586        let source = r#"
2587union Report {
2588  Data { op: Int, delta: Int, total: Int }
2589  Empty
2590}
2591
2592: main ( -- ) ;
2593"#;
2594
2595        let mut parser = Parser::new(source);
2596        let program = parser.parse().unwrap();
2597
2598        assert_eq!(program.unions.len(), 1);
2599        let union_def = &program.unions[0];
2600        assert_eq!(union_def.name, "Report");
2601        assert_eq!(union_def.variants.len(), 2);
2602
2603        // Check Data variant with 3 fields
2604        let data_variant = &union_def.variants[0];
2605        assert_eq!(data_variant.name, "Data");
2606        assert_eq!(data_variant.fields.len(), 3);
2607        assert_eq!(data_variant.fields[0].name, "op");
2608        assert_eq!(data_variant.fields[1].name, "delta");
2609        assert_eq!(data_variant.fields[2].name, "total");
2610
2611        // Check Empty variant with no fields
2612        let empty_variant = &union_def.variants[1];
2613        assert_eq!(empty_variant.name, "Empty");
2614        assert_eq!(empty_variant.fields.len(), 0);
2615    }
2616
2617    #[test]
2618    fn test_parse_union_lowercase_name_error() {
2619        let source = r#"
2620union message {
2621  Get { }
2622}
2623"#;
2624
2625        let mut parser = Parser::new(source);
2626        let result = parser.parse();
2627        assert!(result.is_err());
2628        assert!(result.unwrap_err().contains("uppercase"));
2629    }
2630
2631    #[test]
2632    fn test_parse_union_empty_error() {
2633        let source = r#"
2634union Message {
2635}
2636"#;
2637
2638        let mut parser = Parser::new(source);
2639        let result = parser.parse();
2640        assert!(result.is_err());
2641        assert!(result.unwrap_err().contains("at least one variant"));
2642    }
2643
2644    #[test]
2645    fn test_parse_union_duplicate_variant_error() {
2646        let source = r#"
2647union Message {
2648  Get { x: Int }
2649  Get { y: String }
2650}
2651"#;
2652
2653        let mut parser = Parser::new(source);
2654        let result = parser.parse();
2655        assert!(result.is_err());
2656        let err = result.unwrap_err();
2657        assert!(err.contains("Duplicate variant name"));
2658        assert!(err.contains("Get"));
2659    }
2660
2661    #[test]
2662    fn test_parse_union_duplicate_field_error() {
2663        let source = r#"
2664union Data {
2665  Record { x: Int, x: String }
2666}
2667"#;
2668
2669        let mut parser = Parser::new(source);
2670        let result = parser.parse();
2671        assert!(result.is_err());
2672        let err = result.unwrap_err();
2673        assert!(err.contains("Duplicate field name"));
2674        assert!(err.contains("x"));
2675    }
2676
2677    #[test]
2678    fn test_parse_simple_match() {
2679        let source = r#"
2680: handle ( -- )
2681  match
2682    Get -> send-response
2683    Set -> process-set
2684  end
2685;
2686"#;
2687
2688        let mut parser = Parser::new(source);
2689        let program = parser.parse().unwrap();
2690
2691        assert_eq!(program.words.len(), 1);
2692        assert_eq!(program.words[0].body.len(), 1);
2693
2694        match &program.words[0].body[0] {
2695            Statement::Match { arms } => {
2696                assert_eq!(arms.len(), 2);
2697
2698                // First arm: Get ->
2699                match &arms[0].pattern {
2700                    Pattern::Variant(name) => assert_eq!(name, "Get"),
2701                    _ => panic!("Expected Variant pattern"),
2702                }
2703                assert_eq!(arms[0].body.len(), 1);
2704
2705                // Second arm: Set ->
2706                match &arms[1].pattern {
2707                    Pattern::Variant(name) => assert_eq!(name, "Set"),
2708                    _ => panic!("Expected Variant pattern"),
2709                }
2710                assert_eq!(arms[1].body.len(), 1);
2711            }
2712            _ => panic!("Expected Match statement"),
2713        }
2714    }
2715
2716    #[test]
2717    fn test_parse_match_with_bindings() {
2718        let source = r#"
2719: handle ( -- )
2720  match
2721    Get { >chan } -> chan send-response
2722    Report { >delta >total } -> delta total process
2723  end
2724;
2725"#;
2726
2727        let mut parser = Parser::new(source);
2728        let program = parser.parse().unwrap();
2729
2730        assert_eq!(program.words.len(), 1);
2731
2732        match &program.words[0].body[0] {
2733            Statement::Match { arms } => {
2734                assert_eq!(arms.len(), 2);
2735
2736                // First arm: Get { chan } ->
2737                match &arms[0].pattern {
2738                    Pattern::VariantWithBindings { name, bindings } => {
2739                        assert_eq!(name, "Get");
2740                        assert_eq!(bindings.len(), 1);
2741                        assert_eq!(bindings[0], "chan");
2742                    }
2743                    _ => panic!("Expected VariantWithBindings pattern"),
2744                }
2745
2746                // Second arm: Report { delta total } ->
2747                match &arms[1].pattern {
2748                    Pattern::VariantWithBindings { name, bindings } => {
2749                        assert_eq!(name, "Report");
2750                        assert_eq!(bindings.len(), 2);
2751                        assert_eq!(bindings[0], "delta");
2752                        assert_eq!(bindings[1], "total");
2753                    }
2754                    _ => panic!("Expected VariantWithBindings pattern"),
2755                }
2756            }
2757            _ => panic!("Expected Match statement"),
2758        }
2759    }
2760
2761    #[test]
2762    fn test_parse_match_bindings_require_prefix() {
2763        // Old syntax without > prefix should error
2764        let source = r#"
2765: handle ( -- )
2766  match
2767    Get { chan } -> chan send-response
2768  end
2769;
2770"#;
2771
2772        let mut parser = Parser::new(source);
2773        let result = parser.parse();
2774        assert!(result.is_err());
2775        let err = result.unwrap_err();
2776        assert!(err.contains(">chan"));
2777        assert!(err.contains("stack extraction"));
2778    }
2779
2780    #[test]
2781    fn test_parse_match_with_body_statements() {
2782        let source = r#"
2783: handle ( -- )
2784  match
2785    Get -> 1 2 add send-response
2786    Set -> process-value store
2787  end
2788;
2789"#;
2790
2791        let mut parser = Parser::new(source);
2792        let program = parser.parse().unwrap();
2793
2794        match &program.words[0].body[0] {
2795            Statement::Match { arms } => {
2796                // Get arm has 4 statements: 1, 2, add, send-response
2797                assert_eq!(arms[0].body.len(), 4);
2798                assert_eq!(arms[0].body[0], Statement::IntLiteral(1));
2799                assert_eq!(arms[0].body[1], Statement::IntLiteral(2));
2800                assert!(
2801                    matches!(&arms[0].body[2], Statement::WordCall { name, .. } if name == "add")
2802                );
2803
2804                // Set arm has 2 statements: process-value, store
2805                assert_eq!(arms[1].body.len(), 2);
2806            }
2807            _ => panic!("Expected Match statement"),
2808        }
2809    }
2810
2811    #[test]
2812    fn test_parse_match_empty_error() {
2813        let source = r#"
2814: handle ( -- )
2815  match
2816  end
2817;
2818"#;
2819
2820        let mut parser = Parser::new(source);
2821        let result = parser.parse();
2822        assert!(result.is_err());
2823        assert!(result.unwrap_err().contains("at least one arm"));
2824    }
2825
2826    #[test]
2827    fn test_parse_symbol_literal() {
2828        let source = r#"
2829: main ( -- )
2830    :hello drop
2831;
2832"#;
2833
2834        let mut parser = Parser::new(source);
2835        let program = parser.parse().unwrap();
2836        assert_eq!(program.words.len(), 1);
2837
2838        let main = &program.words[0];
2839        assert_eq!(main.body.len(), 2);
2840
2841        match &main.body[0] {
2842            Statement::Symbol(name) => assert_eq!(name, "hello"),
2843            _ => panic!("Expected Symbol statement, got {:?}", main.body[0]),
2844        }
2845    }
2846
2847    #[test]
2848    fn test_parse_symbol_with_hyphen() {
2849        let source = r#"
2850: main ( -- )
2851    :hello-world drop
2852;
2853"#;
2854
2855        let mut parser = Parser::new(source);
2856        let program = parser.parse().unwrap();
2857
2858        match &program.words[0].body[0] {
2859            Statement::Symbol(name) => assert_eq!(name, "hello-world"),
2860            _ => panic!("Expected Symbol statement"),
2861        }
2862    }
2863
2864    #[test]
2865    fn test_parse_symbol_starting_with_digit_fails() {
2866        let source = r#"
2867: main ( -- )
2868    :123abc drop
2869;
2870"#;
2871
2872        let mut parser = Parser::new(source);
2873        let result = parser.parse();
2874        assert!(result.is_err());
2875        assert!(result.unwrap_err().contains("cannot start with a digit"));
2876    }
2877
2878    #[test]
2879    fn test_parse_symbol_with_invalid_char_fails() {
2880        let source = r#"
2881: main ( -- )
2882    :hello@world drop
2883;
2884"#;
2885
2886        let mut parser = Parser::new(source);
2887        let result = parser.parse();
2888        assert!(result.is_err());
2889        assert!(result.unwrap_err().contains("invalid character"));
2890    }
2891
2892    #[test]
2893    fn test_parse_symbol_special_chars_allowed() {
2894        // Test that ? and ! are allowed in symbol names
2895        let source = r#"
2896: main ( -- )
2897    :empty? drop
2898    :save! drop
2899;
2900"#;
2901
2902        let mut parser = Parser::new(source);
2903        let program = parser.parse().unwrap();
2904
2905        match &program.words[0].body[0] {
2906            Statement::Symbol(name) => assert_eq!(name, "empty?"),
2907            _ => panic!("Expected Symbol statement"),
2908        }
2909        match &program.words[0].body[2] {
2910            Statement::Symbol(name) => assert_eq!(name, "save!"),
2911            _ => panic!("Expected Symbol statement"),
2912        }
2913    }
2914}