seqc/
parser.rs

1//! Simple parser for Seq syntax
2//!
3//! Syntax:
4//! ```text
5//! : word-name ( stack-effect )
6//!   statement1
7//!   statement2
8//!   ... ;
9//! ```
10
11use crate::ast::{
12    Include, MatchArm, Pattern, Program, SourceLocation, Statement, UnionDef, UnionField,
13    UnionVariant, WordDef,
14};
15use crate::types::{Effect, SideEffect, StackType, Type};
16
17/// A token with source position information
18#[derive(Debug, Clone)]
19pub struct Token {
20    pub text: String,
21    /// Line number (0-indexed for LSP compatibility)
22    pub line: usize,
23    /// Column number (0-indexed)
24    pub column: usize,
25}
26
27impl Token {
28    fn new(text: String, line: usize, column: usize) -> Self {
29        Token { text, line, column }
30    }
31}
32
33impl PartialEq<&str> for Token {
34    fn eq(&self, other: &&str) -> bool {
35        self.text == *other
36    }
37}
38
39impl PartialEq<str> for Token {
40    fn eq(&self, other: &str) -> bool {
41        self.text == other
42    }
43}
44
45pub struct Parser {
46    tokens: Vec<Token>,
47    pos: usize,
48    /// Counter for assigning unique IDs to quotations
49    /// Used by the typechecker to track inferred types
50    next_quotation_id: usize,
51    /// Pending lint annotations collected from `# seq:allow(lint-id)` comments
52    pending_allowed_lints: Vec<String>,
53}
54
55impl Parser {
56    pub fn new(source: &str) -> Self {
57        let tokens = tokenize(source);
58        Parser {
59            tokens,
60            pos: 0,
61            next_quotation_id: 0,
62            pending_allowed_lints: Vec::new(),
63        }
64    }
65
66    pub fn parse(&mut self) -> Result<Program, String> {
67        let mut program = Program::new();
68
69        // Check for unclosed string error from tokenizer
70        if let Some(error_token) = self.tokens.iter().find(|t| *t == "<<<UNCLOSED_STRING>>>") {
71            return Err(format!(
72                "Unclosed string literal at line {}, column {} - missing closing quote",
73                error_token.line + 1, // 1-indexed for user display
74                error_token.column + 1
75            ));
76        }
77
78        while !self.is_at_end() {
79            self.skip_comments();
80            if self.is_at_end() {
81                break;
82            }
83
84            // Check for include statement
85            if self.check("include") {
86                let include = self.parse_include()?;
87                program.includes.push(include);
88                continue;
89            }
90
91            // Check for union definition
92            if self.check("union") {
93                let union_def = self.parse_union_def()?;
94                program.unions.push(union_def);
95                continue;
96            }
97
98            let word = self.parse_word_def()?;
99            program.words.push(word);
100        }
101
102        Ok(program)
103    }
104
105    /// Parse an include statement:
106    ///   include std:http     -> Include::Std("http")
107    ///   include ffi:readline -> Include::Ffi("readline")
108    ///   include "my-utils"   -> Include::Relative("my-utils")
109    fn parse_include(&mut self) -> Result<Include, String> {
110        self.consume("include");
111
112        let token = self
113            .advance()
114            .ok_or("Expected module name after 'include'")?
115            .clone();
116
117        // Check for std: prefix (tokenizer splits this into "std", ":", "name")
118        if token == "std" {
119            // Expect : token
120            if !self.consume(":") {
121                return Err("Expected ':' after 'std' in include statement".to_string());
122            }
123            // Get the module name
124            let name = self
125                .advance()
126                .ok_or("Expected module name after 'std:'")?
127                .clone();
128            return Ok(Include::Std(name));
129        }
130
131        // Check for ffi: prefix
132        if token == "ffi" {
133            // Expect : token
134            if !self.consume(":") {
135                return Err("Expected ':' after 'ffi' in include statement".to_string());
136            }
137            // Get the library name
138            let name = self
139                .advance()
140                .ok_or("Expected library name after 'ffi:'")?
141                .clone();
142            return Ok(Include::Ffi(name));
143        }
144
145        // Check for quoted string (relative path)
146        if token.starts_with('"') && token.ends_with('"') {
147            let path = token.trim_start_matches('"').trim_end_matches('"');
148            return Ok(Include::Relative(path.to_string()));
149        }
150
151        Err(format!(
152            "Invalid include syntax '{}'. Use 'include std:name', 'include ffi:lib', or 'include \"path\"'",
153            token
154        ))
155    }
156
157    /// Parse a union type definition:
158    ///   union Message {
159    ///     Get { response-chan: Int }
160    ///     Increment { response-chan: Int }
161    ///     Report { op: Int, delta: Int, total: Int }
162    ///   }
163    fn parse_union_def(&mut self) -> Result<UnionDef, String> {
164        // Capture start line from 'union' token
165        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
166
167        // Consume 'union' keyword
168        self.consume("union");
169
170        // Get union name (must start with uppercase)
171        let name = self
172            .advance()
173            .ok_or("Expected union name after 'union'")?
174            .clone();
175
176        if !name
177            .chars()
178            .next()
179            .map(|c| c.is_uppercase())
180            .unwrap_or(false)
181        {
182            return Err(format!(
183                "Union name '{}' must start with an uppercase letter",
184                name
185            ));
186        }
187
188        // Skip comments and newlines
189        self.skip_comments();
190
191        // Expect '{'
192        if !self.consume("{") {
193            return Err(format!(
194                "Expected '{{' after union name '{}', got '{}'",
195                name,
196                self.current()
197            ));
198        }
199
200        // Parse variants until '}'
201        let mut variants = Vec::new();
202        loop {
203            self.skip_comments();
204
205            if self.check("}") {
206                break;
207            }
208
209            if self.is_at_end() {
210                return Err(format!("Unexpected end of file in union '{}'", name));
211            }
212
213            variants.push(self.parse_union_variant()?);
214        }
215
216        // Capture end line from '}' token before consuming
217        let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
218
219        // Consume '}'
220        self.consume("}");
221
222        if variants.is_empty() {
223            return Err(format!("Union '{}' must have at least one variant", name));
224        }
225
226        // Check for duplicate variant names
227        let mut seen_variants = std::collections::HashSet::new();
228        for variant in &variants {
229            if !seen_variants.insert(&variant.name) {
230                return Err(format!(
231                    "Duplicate variant name '{}' in union '{}'",
232                    variant.name, name
233                ));
234            }
235        }
236
237        Ok(UnionDef {
238            name,
239            variants,
240            source: Some(SourceLocation::span(
241                std::path::PathBuf::new(),
242                start_line,
243                end_line,
244            )),
245        })
246    }
247
248    /// Parse a single union variant:
249    ///   Get { response-chan: Int }
250    ///   or just: Empty (no fields)
251    fn parse_union_variant(&mut self) -> Result<UnionVariant, String> {
252        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
253
254        // Get variant name (must start with uppercase)
255        let name = self.advance().ok_or("Expected variant name")?.clone();
256
257        if !name
258            .chars()
259            .next()
260            .map(|c| c.is_uppercase())
261            .unwrap_or(false)
262        {
263            return Err(format!(
264                "Variant name '{}' must start with an uppercase letter",
265                name
266            ));
267        }
268
269        self.skip_comments();
270
271        // Check for optional fields
272        let fields = if self.check("{") {
273            self.consume("{");
274            let fields = self.parse_union_fields()?;
275            if !self.consume("}") {
276                return Err(format!("Expected '}}' after variant '{}' fields", name));
277            }
278            fields
279        } else {
280            Vec::new()
281        };
282
283        Ok(UnionVariant {
284            name,
285            fields,
286            source: Some(SourceLocation::new(std::path::PathBuf::new(), start_line)),
287        })
288    }
289
290    /// Parse union fields: name: Type, name: Type, ...
291    fn parse_union_fields(&mut self) -> Result<Vec<UnionField>, String> {
292        let mut fields = Vec::new();
293
294        loop {
295            self.skip_comments();
296
297            if self.check("}") {
298                break;
299            }
300
301            // Get field name
302            let field_name = self.advance().ok_or("Expected field name")?.clone();
303
304            // Expect ':'
305            if !self.consume(":") {
306                return Err(format!(
307                    "Expected ':' after field name '{}', got '{}'",
308                    field_name,
309                    self.current()
310                ));
311            }
312
313            // Get type name
314            let type_name = self
315                .advance()
316                .ok_or("Expected type name after ':'")?
317                .clone();
318
319            fields.push(UnionField {
320                name: field_name,
321                type_name,
322            });
323
324            // Optional comma separator
325            self.skip_comments();
326            self.consume(",");
327        }
328
329        // Check for duplicate field names
330        let mut seen_fields = std::collections::HashSet::new();
331        for field in &fields {
332            if !seen_fields.insert(&field.name) {
333                return Err(format!("Duplicate field name '{}' in variant", field.name));
334            }
335        }
336
337        Ok(fields)
338    }
339
340    fn parse_word_def(&mut self) -> Result<WordDef, String> {
341        // Consume any pending lint annotations collected from comments before this word
342        let allowed_lints = std::mem::take(&mut self.pending_allowed_lints);
343
344        // Capture start line from ':' token
345        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
346
347        // Expect ':'
348        if !self.consume(":") {
349            return Err(format!(
350                "Expected ':' to start word definition, got '{}'",
351                self.current()
352            ));
353        }
354
355        // Get word name
356        let name = self
357            .advance()
358            .ok_or("Expected word name after ':'")?
359            .clone();
360
361        // Parse stack effect if present: ( ..a Int -- ..a Bool )
362        let effect = if self.check("(") {
363            Some(self.parse_stack_effect()?)
364        } else {
365            None
366        };
367
368        // Parse body until ';'
369        let mut body = Vec::new();
370        while !self.check(";") {
371            if self.is_at_end() {
372                return Err(format!("Unexpected end of file in word '{}'", name));
373            }
374
375            // Skip comments and newlines in body
376            self.skip_comments();
377            if self.check(";") {
378                break;
379            }
380
381            body.push(self.parse_statement()?);
382        }
383
384        // Capture end line from ';' token before consuming
385        let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
386
387        // Consume ';'
388        self.consume(";");
389
390        Ok(WordDef {
391            name,
392            effect,
393            body,
394            source: Some(crate::ast::SourceLocation::span(
395                std::path::PathBuf::new(),
396                start_line,
397                end_line,
398            )),
399            allowed_lints,
400        })
401    }
402
403    fn parse_statement(&mut self) -> Result<Statement, String> {
404        use crate::ast::Span;
405        let tok = self.advance_token().ok_or("Unexpected end of file")?;
406        let token = &tok.text;
407        let tok_line = tok.line;
408        let tok_column = tok.column;
409        let tok_len = tok.text.len();
410
411        // Check if it looks like a float literal (contains . or scientific notation)
412        // Must check this BEFORE integer parsing
413        if let Some(f) = is_float_literal(token)
414            .then(|| token.parse::<f64>().ok())
415            .flatten()
416        {
417            return Ok(Statement::FloatLiteral(f));
418        }
419
420        // Try to parse as hex literal (0x or 0X prefix)
421        if let Some(hex) = token
422            .strip_prefix("0x")
423            .or_else(|| token.strip_prefix("0X"))
424        {
425            return i64::from_str_radix(hex, 16)
426                .map(Statement::IntLiteral)
427                .map_err(|_| format!("Invalid hex literal: {}", token));
428        }
429
430        // Try to parse as binary literal (0b or 0B prefix)
431        if let Some(bin) = token
432            .strip_prefix("0b")
433            .or_else(|| token.strip_prefix("0B"))
434        {
435            return i64::from_str_radix(bin, 2)
436                .map(Statement::IntLiteral)
437                .map_err(|_| format!("Invalid binary literal: {}", token));
438        }
439
440        // Try to parse as decimal integer literal
441        if let Ok(n) = token.parse::<i64>() {
442            return Ok(Statement::IntLiteral(n));
443        }
444
445        // Try to parse as boolean literal
446        if token == "true" {
447            return Ok(Statement::BoolLiteral(true));
448        }
449        if token == "false" {
450            return Ok(Statement::BoolLiteral(false));
451        }
452
453        // Try to parse as symbol literal (:foo, :some-name)
454        if token == ":" {
455            // Get the next token as the symbol name
456            let name_tok = self
457                .advance_token()
458                .ok_or("Expected symbol name after ':', got end of input")?;
459            let name = &name_tok.text;
460            // Validate symbol name (identifier-like, kebab-case allowed)
461            if name.is_empty() {
462                return Err("Symbol name cannot be empty".to_string());
463            }
464            if name.starts_with(|c: char| c.is_ascii_digit()) {
465                return Err(format!(
466                    "Symbol name cannot start with a digit: ':{}'\n  Hint: Symbol names must start with a letter",
467                    name
468                ));
469            }
470            if let Some(bad_char) = name.chars().find(|c| {
471                !c.is_alphanumeric()
472                    && *c != '-'
473                    && *c != '_'
474                    && *c != '.'
475                    && *c != '?'
476                    && *c != '!'
477            }) {
478                return Err(format!(
479                    "Symbol name contains invalid character '{}': ':{}'\n  Hint: Allowed: letters, digits, - _ . ? !",
480                    bad_char, name
481                ));
482            }
483            return Ok(Statement::Symbol(name.clone()));
484        }
485
486        // Try to parse as string literal
487        if token.starts_with('"') {
488            // Validate token has at least opening and closing quotes
489            if token.len() < 2 || !token.ends_with('"') {
490                return Err(format!("Malformed string literal: {}", token));
491            }
492            // Strip exactly one quote from each end (not all quotes, which would
493            // incorrectly handle escaped quotes at string boundaries like "hello\"")
494            let raw = &token[1..token.len() - 1];
495            let unescaped = unescape_string(raw)?;
496            return Ok(Statement::StringLiteral(unescaped));
497        }
498
499        // Check for conditional
500        if token == "if" {
501            return self.parse_if();
502        }
503
504        // Check for quotation
505        if token == "[" {
506            return self.parse_quotation(tok_line, tok_column);
507        }
508
509        // Check for match expression
510        if token == "match" {
511            return self.parse_match();
512        }
513
514        // Otherwise it's a word call - preserve source span for precise diagnostics
515        Ok(Statement::WordCall {
516            name: token.to_string(),
517            span: Some(Span::new(tok_line, tok_column, tok_len)),
518        })
519    }
520
521    fn parse_if(&mut self) -> Result<Statement, String> {
522        let mut then_branch = Vec::new();
523
524        // Parse then branch until 'else' or 'then'
525        loop {
526            if self.is_at_end() {
527                return Err("Unexpected end of file in 'if' statement".to_string());
528            }
529
530            // Skip comments and newlines
531            self.skip_comments();
532
533            if self.check("else") {
534                self.advance();
535                // Parse else branch
536                break;
537            }
538
539            if self.check("then") {
540                self.advance();
541                // End of if without else
542                return Ok(Statement::If {
543                    then_branch,
544                    else_branch: None,
545                });
546            }
547
548            then_branch.push(self.parse_statement()?);
549        }
550
551        // Parse else branch until 'then'
552        let mut else_branch = Vec::new();
553        loop {
554            if self.is_at_end() {
555                return Err("Unexpected end of file in 'else' branch".to_string());
556            }
557
558            // Skip comments and newlines
559            self.skip_comments();
560
561            if self.check("then") {
562                self.advance();
563                return Ok(Statement::If {
564                    then_branch,
565                    else_branch: Some(else_branch),
566                });
567            }
568
569            else_branch.push(self.parse_statement()?);
570        }
571    }
572
573    fn parse_quotation(
574        &mut self,
575        start_line: usize,
576        start_column: usize,
577    ) -> Result<Statement, String> {
578        use crate::ast::QuotationSpan;
579        let mut body = Vec::new();
580
581        // Parse statements until ']'
582        loop {
583            if self.is_at_end() {
584                return Err("Unexpected end of file in quotation".to_string());
585            }
586
587            // Skip comments and newlines
588            self.skip_comments();
589
590            if self.check("]") {
591                let end_tok = self.advance_token().unwrap();
592                let end_line = end_tok.line;
593                let end_column = end_tok.column + 1; // exclusive
594                let id = self.next_quotation_id;
595                self.next_quotation_id += 1;
596                // Span from '[' to ']' inclusive
597                let span = QuotationSpan::new(start_line, start_column, end_line, end_column);
598                return Ok(Statement::Quotation {
599                    id,
600                    body,
601                    span: Some(span),
602                });
603            }
604
605            body.push(self.parse_statement()?);
606        }
607    }
608
609    /// Parse a match expression:
610    ///   match
611    ///     Get -> send-response
612    ///     Increment -> do-increment send-response
613    ///     Report -> aggregate-add
614    ///   end
615    fn parse_match(&mut self) -> Result<Statement, String> {
616        let mut arms = Vec::new();
617
618        loop {
619            self.skip_comments();
620
621            // Check for 'end' to terminate match
622            if self.check("end") {
623                self.advance();
624                break;
625            }
626
627            if self.is_at_end() {
628                return Err("Unexpected end of file in match expression".to_string());
629            }
630
631            arms.push(self.parse_match_arm()?);
632        }
633
634        if arms.is_empty() {
635            return Err("Match expression must have at least one arm".to_string());
636        }
637
638        Ok(Statement::Match { arms })
639    }
640
641    /// Parse a single match arm:
642    ///   Get -> send-response
643    ///   or with bindings:
644    ///   Get { chan } -> chan send-response
645    fn parse_match_arm(&mut self) -> Result<MatchArm, String> {
646        // Get variant name
647        let variant_name = self
648            .advance()
649            .ok_or("Expected variant name in match arm")?
650            .clone();
651
652        self.skip_comments();
653
654        // Check for optional bindings: { field1 field2 }
655        let pattern = if self.check("{") {
656            self.consume("{");
657            let mut bindings = Vec::new();
658
659            loop {
660                self.skip_comments();
661
662                if self.check("}") {
663                    break;
664                }
665
666                if self.is_at_end() {
667                    return Err(format!(
668                        "Unexpected end of file in match arm bindings for '{}'",
669                        variant_name
670                    ));
671                }
672
673                let token = self.advance().ok_or("Expected binding name")?.clone();
674
675                // Require > prefix to make clear these are stack extractions, not variables
676                if let Some(field_name) = token.strip_prefix('>') {
677                    if field_name.is_empty() {
678                        return Err(format!(
679                            "Expected field name after '>' in match bindings for '{}'",
680                            variant_name
681                        ));
682                    }
683                    bindings.push(field_name.to_string());
684                } else {
685                    return Err(format!(
686                        "Match bindings must use '>' prefix to indicate stack extraction. \
687                         Use '>{}' instead of '{}' in pattern for '{}'",
688                        token, token, variant_name
689                    ));
690                }
691            }
692
693            self.consume("}");
694            Pattern::VariantWithBindings {
695                name: variant_name,
696                bindings,
697            }
698        } else {
699            Pattern::Variant(variant_name.clone())
700        };
701
702        self.skip_comments();
703
704        // Expect '->' arrow
705        if !self.consume("->") {
706            return Err(format!(
707                "Expected '->' after pattern '{}', got '{}'",
708                match &pattern {
709                    Pattern::Variant(n) => n.clone(),
710                    Pattern::VariantWithBindings { name, .. } => name.clone(),
711                },
712                self.current()
713            ));
714        }
715
716        // Parse body until next pattern or 'end'
717        let mut body = Vec::new();
718        loop {
719            self.skip_comments();
720
721            // Check for end of arm (next pattern starts with uppercase, or 'end')
722            if self.check("end") {
723                break;
724            }
725
726            // Check if next token looks like a match pattern (not just any uppercase word).
727            // A pattern is: UppercaseName followed by '->' or '{'
728            // This prevents confusing 'Make-Get' (constructor call) with a pattern.
729            if let Some(token) = self.current_token()
730                && let Some(first_char) = token.text.chars().next()
731                && first_char.is_uppercase()
732            {
733                // Peek at next token to see if this is a pattern (followed by -> or {)
734                if let Some(next) = self.peek_at(1)
735                    && (next == "->" || next == "{")
736                {
737                    // This is the next pattern
738                    break;
739                }
740                // Otherwise it's just an uppercase word call (like Make-Get), continue parsing body
741            }
742
743            if self.is_at_end() {
744                return Err("Unexpected end of file in match arm body".to_string());
745            }
746
747            body.push(self.parse_statement()?);
748        }
749
750        Ok(MatchArm { pattern, body })
751    }
752
753    /// Parse a stack effect declaration: ( ..a Int -- ..a Bool )
754    /// With optional computational effects: ( ..a Int -- ..a Bool | Yield Int )
755    fn parse_stack_effect(&mut self) -> Result<Effect, String> {
756        // Consume '('
757        if !self.consume("(") {
758            return Err("Expected '(' to start stack effect".to_string());
759        }
760
761        // Parse input stack types (until '--' or ')')
762        let (input_row_var, input_types) =
763            self.parse_type_list_until(&["--", ")"], "stack effect inputs", 0)?;
764
765        // Consume '--'
766        if !self.consume("--") {
767            return Err("Expected '--' separator in stack effect".to_string());
768        }
769
770        // Parse output stack types (until ')' or '|')
771        let (output_row_var, output_types) =
772            self.parse_type_list_until(&[")", "|"], "stack effect outputs", 0)?;
773
774        // Parse optional computational effects after '|'
775        let effects = if self.consume("|") {
776            self.parse_effect_annotations()?
777        } else {
778            Vec::new()
779        };
780
781        // Consume ')'
782        if !self.consume(")") {
783            return Err("Expected ')' to end stack effect".to_string());
784        }
785
786        // Build input and output StackTypes
787        let inputs = self.build_stack_type(input_row_var, input_types);
788        let outputs = self.build_stack_type(output_row_var, output_types);
789
790        Ok(Effect::with_effects(inputs, outputs, effects))
791    }
792
793    /// Parse computational effect annotations after '|'
794    /// Example: | Yield Int
795    fn parse_effect_annotations(&mut self) -> Result<Vec<SideEffect>, String> {
796        let mut effects = Vec::new();
797
798        // Parse effects until we hit ')'
799        while let Some(token) = self.peek_at(0) {
800            if token == ")" {
801                break;
802            }
803
804            match token {
805                "Yield" => {
806                    self.advance(); // consume "Yield"
807                    // Parse the yield type
808                    if let Some(type_token) = self.current_token() {
809                        if type_token.text == ")" {
810                            return Err("Expected type after 'Yield'".to_string());
811                        }
812                        let type_token = type_token.clone();
813                        self.advance();
814                        let yield_type = self.parse_type(&type_token)?;
815                        effects.push(SideEffect::Yield(Box::new(yield_type)));
816                    } else {
817                        return Err("Expected type after 'Yield'".to_string());
818                    }
819                }
820                _ => {
821                    return Err(format!("Unknown effect '{}'. Expected 'Yield'", token));
822                }
823            }
824        }
825
826        if effects.is_empty() {
827            return Err("Expected at least one effect after '|'".to_string());
828        }
829
830        Ok(effects)
831    }
832
833    /// Parse a single type token into a Type
834    fn parse_type(&self, token: &Token) -> Result<Type, String> {
835        match token.text.as_str() {
836            "Int" => Ok(Type::Int),
837            "Float" => Ok(Type::Float),
838            "Bool" => Ok(Type::Bool),
839            "String" => Ok(Type::String),
840            // Reject 'Quotation' - it looks like a type but would be silently treated as a type variable.
841            // Users must use explicit effect syntax like [Int -- Int] instead.
842            "Quotation" => Err(format!(
843                "'Quotation' is not a valid type at line {}, column {}. Use explicit quotation syntax like [Int -- Int] or [ -- ] instead.",
844                token.line + 1,
845                token.column + 1
846            )),
847            _ => {
848                // Check if it's a type variable (starts with uppercase)
849                if let Some(first_char) = token.text.chars().next() {
850                    if first_char.is_uppercase() {
851                        Ok(Type::Var(token.text.to_string()))
852                    } else {
853                        Err(format!(
854                            "Unknown type: '{}' at line {}, column {}. Expected Int, Bool, String, Closure, or a type variable (uppercase)",
855                            token.text.escape_default(),
856                            token.line + 1, // 1-indexed for user display
857                            token.column + 1
858                        ))
859                    }
860                } else {
861                    Err(format!(
862                        "Invalid type: '{}' at line {}, column {}",
863                        token.text.escape_default(),
864                        token.line + 1,
865                        token.column + 1
866                    ))
867                }
868            }
869        }
870    }
871
872    /// Validate row variable name
873    /// Row variables must start with a lowercase letter and contain only alphanumeric characters
874    fn validate_row_var_name(&self, name: &str) -> Result<(), String> {
875        if name.is_empty() {
876            return Err("Row variable must have a name after '..'".to_string());
877        }
878
879        // Must start with lowercase letter
880        let first_char = name.chars().next().unwrap();
881        if !first_char.is_ascii_lowercase() {
882            return Err(format!(
883                "Row variable '..{}' must start with a lowercase letter (a-z)",
884                name
885            ));
886        }
887
888        // Rest must be alphanumeric or underscore
889        for ch in name.chars() {
890            if !ch.is_alphanumeric() && ch != '_' {
891                return Err(format!(
892                    "Row variable '..{}' can only contain letters, numbers, and underscores",
893                    name
894                ));
895            }
896        }
897
898        // Check for reserved keywords (type names that might confuse users)
899        match name {
900            "Int" | "Bool" | "String" => {
901                return Err(format!(
902                    "Row variable '..{}' cannot use type name as identifier",
903                    name
904                ));
905            }
906            _ => {}
907        }
908
909        Ok(())
910    }
911
912    /// Parse a list of types until one of the given terminators is reached
913    /// Returns (optional row variable, list of types)
914    /// Used by both parse_stack_effect and parse_quotation_type
915    ///
916    /// depth: Current nesting depth for quotation types (0 at top level)
917    fn parse_type_list_until(
918        &mut self,
919        terminators: &[&str],
920        context: &str,
921        depth: usize,
922    ) -> Result<(Option<String>, Vec<Type>), String> {
923        const MAX_QUOTATION_DEPTH: usize = 32;
924
925        if depth > MAX_QUOTATION_DEPTH {
926            return Err(format!(
927                "Quotation type nesting exceeds maximum depth of {} (possible deeply nested types or DOS attack)",
928                MAX_QUOTATION_DEPTH
929            ));
930        }
931
932        let mut types = Vec::new();
933        let mut row_var = None;
934
935        while !terminators.iter().any(|t| self.check(t)) {
936            // Skip comments and blank lines within type lists
937            self.skip_comments();
938
939            // Re-check terminators after skipping comments
940            if terminators.iter().any(|t| self.check(t)) {
941                break;
942            }
943
944            if self.is_at_end() {
945                return Err(format!(
946                    "Unexpected end while parsing {} - expected one of: {}",
947                    context,
948                    terminators.join(", ")
949                ));
950            }
951
952            let token = self
953                .advance_token()
954                .ok_or_else(|| format!("Unexpected end in {}", context))?
955                .clone();
956
957            // Check for row variable: ..name
958            if token.text.starts_with("..") {
959                let var_name = token.text.trim_start_matches("..").to_string();
960                self.validate_row_var_name(&var_name)?;
961                row_var = Some(var_name);
962            } else if token.text == "Closure" {
963                // Closure type: Closure[effect]
964                if !self.consume("[") {
965                    return Err("Expected '[' after 'Closure' in type signature".to_string());
966                }
967                let effect_type = self.parse_quotation_type(depth)?;
968                match effect_type {
969                    Type::Quotation(effect) => {
970                        types.push(Type::Closure {
971                            effect,
972                            captures: Vec::new(), // Filled in by type checker
973                        });
974                    }
975                    _ => unreachable!("parse_quotation_type should return Quotation"),
976                }
977            } else if token.text == "[" {
978                // Nested quotation type
979                types.push(self.parse_quotation_type(depth)?);
980            } else {
981                // Parse as concrete type
982                types.push(self.parse_type(&token)?);
983            }
984        }
985
986        Ok((row_var, types))
987    }
988
989    /// Parse a quotation type: [inputs -- outputs]
990    /// Note: The opening '[' has already been consumed
991    ///
992    /// depth: Current nesting depth (incremented for each nested quotation)
993    fn parse_quotation_type(&mut self, depth: usize) -> Result<Type, String> {
994        // Parse input stack types (until '--' or ']')
995        let (input_row_var, input_types) =
996            self.parse_type_list_until(&["--", "]"], "quotation type inputs", depth + 1)?;
997
998        // Require '--' separator for clarity
999        if !self.consume("--") {
1000            // Check if user closed with ] without separator
1001            if self.check("]") {
1002                return Err(
1003                    "Quotation types require '--' separator. Did you mean '[Int -- ]' or '[ -- Int]'?"
1004                        .to_string(),
1005                );
1006            }
1007            return Err("Expected '--' separator in quotation type".to_string());
1008        }
1009
1010        // Parse output stack types (until ']')
1011        let (output_row_var, output_types) =
1012            self.parse_type_list_until(&["]"], "quotation type outputs", depth + 1)?;
1013
1014        // Consume ']'
1015        if !self.consume("]") {
1016            return Err("Expected ']' to end quotation type".to_string());
1017        }
1018
1019        // Build input and output StackTypes
1020        let inputs = self.build_stack_type(input_row_var, input_types);
1021        let outputs = self.build_stack_type(output_row_var, output_types);
1022
1023        Ok(Type::Quotation(Box::new(Effect::new(inputs, outputs))))
1024    }
1025
1026    /// Build a StackType from an optional row variable and a list of types
1027    /// Example: row_var="a", types=[Int, Bool] => RowVar("a") with Int on top of Bool
1028    ///
1029    /// IMPORTANT: ALL stack effects are implicitly row-polymorphic in concatenative languages.
1030    /// This means:
1031    ///   ( -- )        becomes  ( ..rest -- ..rest )       - no-op, preserves stack
1032    ///   ( -- Int )    becomes  ( ..rest -- ..rest Int )   - pushes Int
1033    ///   ( Int -- )    becomes  ( ..rest Int -- ..rest )   - consumes Int
1034    ///   ( Int -- Int) becomes  ( ..rest Int -- ..rest Int ) - transforms top
1035    fn build_stack_type(&self, row_var: Option<String>, types: Vec<Type>) -> StackType {
1036        // Always use row polymorphism - this is fundamental to concatenative semantics
1037        let base = match row_var {
1038            Some(name) => StackType::RowVar(name),
1039            None => StackType::RowVar("rest".to_string()),
1040        };
1041
1042        // Push types onto the stack (bottom to top order)
1043        types.into_iter().fold(base, |stack, ty| stack.push(ty))
1044    }
1045
1046    fn skip_comments(&mut self) {
1047        loop {
1048            // Check for comment: either standalone "#" or token starting with "#"
1049            // The latter handles shebangs like "#!/usr/bin/env seqc"
1050            let is_comment = if self.is_at_end() {
1051                false
1052            } else {
1053                let tok = self.current();
1054                tok == "#" || tok.starts_with("#!")
1055            };
1056
1057            if is_comment {
1058                self.advance(); // consume # or shebang token
1059
1060                // Collect all tokens until newline to reconstruct the comment text
1061                let mut comment_parts: Vec<String> = Vec::new();
1062                while !self.is_at_end() && self.current() != "\n" {
1063                    comment_parts.push(self.current().to_string());
1064                    self.advance();
1065                }
1066                if !self.is_at_end() {
1067                    self.advance(); // skip newline
1068                }
1069
1070                // Join parts and check for seq:allow annotation
1071                // Format: # seq:allow(lint-id) -> parts = ["seq", ":", "allow", "(", "lint-id", ")"]
1072                let comment = comment_parts.join("");
1073                if let Some(lint_id) = comment
1074                    .strip_prefix("seq:allow(")
1075                    .and_then(|s| s.strip_suffix(")"))
1076                {
1077                    self.pending_allowed_lints.push(lint_id.to_string());
1078                }
1079            } else if self.check("\n") {
1080                // Skip blank lines
1081                self.advance();
1082            } else {
1083                break;
1084            }
1085        }
1086    }
1087
1088    fn check(&self, expected: &str) -> bool {
1089        if self.is_at_end() {
1090            return false;
1091        }
1092        self.current() == expected
1093    }
1094
1095    fn consume(&mut self, expected: &str) -> bool {
1096        if self.check(expected) {
1097            self.advance();
1098            true
1099        } else {
1100            false
1101        }
1102    }
1103
1104    /// Get the text of the current token
1105    fn current(&self) -> &str {
1106        if self.is_at_end() {
1107            ""
1108        } else {
1109            &self.tokens[self.pos].text
1110        }
1111    }
1112
1113    /// Get the full current token with position info
1114    fn current_token(&self) -> Option<&Token> {
1115        if self.is_at_end() {
1116            None
1117        } else {
1118            Some(&self.tokens[self.pos])
1119        }
1120    }
1121
1122    /// Peek at a token N positions ahead without consuming
1123    fn peek_at(&self, n: usize) -> Option<&str> {
1124        let idx = self.pos + n;
1125        if idx < self.tokens.len() {
1126            Some(&self.tokens[idx].text)
1127        } else {
1128            None
1129        }
1130    }
1131
1132    /// Advance and return the token text (for compatibility with existing code)
1133    fn advance(&mut self) -> Option<&String> {
1134        if self.is_at_end() {
1135            None
1136        } else {
1137            let token = &self.tokens[self.pos];
1138            self.pos += 1;
1139            Some(&token.text)
1140        }
1141    }
1142
1143    /// Advance and return the full token with position info
1144    fn advance_token(&mut self) -> Option<&Token> {
1145        if self.is_at_end() {
1146            None
1147        } else {
1148            let token = &self.tokens[self.pos];
1149            self.pos += 1;
1150            Some(token)
1151        }
1152    }
1153
1154    fn is_at_end(&self) -> bool {
1155        self.pos >= self.tokens.len()
1156    }
1157}
1158
1159/// Check if a token looks like a float literal
1160///
1161/// Float literals contain either:
1162/// - A decimal point: `3.14`, `.5`, `5.`
1163/// - Scientific notation: `1e10`, `1E-5`, `1.5e3`
1164///
1165/// This check must happen BEFORE integer parsing to avoid
1166/// parsing "5" in "5.0" as an integer.
1167fn is_float_literal(token: &str) -> bool {
1168    // Skip leading minus sign for negative numbers
1169    let s = token.strip_prefix('-').unwrap_or(token);
1170
1171    // Must have at least one digit
1172    if s.is_empty() {
1173        return false;
1174    }
1175
1176    // Check for decimal point or scientific notation
1177    s.contains('.') || s.contains('e') || s.contains('E')
1178}
1179
1180/// Process escape sequences in a string literal
1181///
1182/// Supported escape sequences:
1183/// - `\"` -> `"`  (quote)
1184/// - `\\` -> `\`  (backslash)
1185/// - `\n` -> newline
1186/// - `\r` -> carriage return
1187/// - `\t` -> tab
1188/// - `\xNN` -> Unicode code point U+00NN (hex value 00-FF)
1189///
1190/// # Note on `\xNN` encoding
1191///
1192/// The `\xNN` escape creates a Unicode code point U+00NN, not a raw byte.
1193/// For values 0x00-0x7F (ASCII), this maps directly to the byte value.
1194/// For values 0x80-0xFF (Latin-1 Supplement), the character is stored as
1195/// a multi-byte UTF-8 sequence. For example:
1196/// - `\x41` -> 'A' (1 byte in UTF-8)
1197/// - `\x1b` -> ESC (1 byte in UTF-8, used for ANSI terminal codes)
1198/// - `\xFF` -> 'ÿ' (U+00FF, 2 bytes in UTF-8: 0xC3 0xBF)
1199///
1200/// This matches Python 3 and Rust string behavior. For terminal ANSI codes,
1201/// which are the primary use case, all values are in the ASCII range.
1202///
1203/// # Errors
1204/// Returns error if an unknown escape sequence is encountered
1205fn unescape_string(s: &str) -> Result<String, String> {
1206    let mut result = String::new();
1207    let mut chars = s.chars();
1208
1209    while let Some(ch) = chars.next() {
1210        if ch == '\\' {
1211            match chars.next() {
1212                Some('"') => result.push('"'),
1213                Some('\\') => result.push('\\'),
1214                Some('n') => result.push('\n'),
1215                Some('r') => result.push('\r'),
1216                Some('t') => result.push('\t'),
1217                Some('x') => {
1218                    // Hex escape: \xNN
1219                    let hex1 = chars.next().ok_or_else(|| {
1220                        "Incomplete hex escape sequence '\\x' - expected 2 hex digits".to_string()
1221                    })?;
1222                    let hex2 = chars.next().ok_or_else(|| {
1223                        format!(
1224                            "Incomplete hex escape sequence '\\x{}' - expected 2 hex digits",
1225                            hex1
1226                        )
1227                    })?;
1228
1229                    let hex_str: String = [hex1, hex2].iter().collect();
1230                    let byte_val = u8::from_str_radix(&hex_str, 16).map_err(|_| {
1231                        format!(
1232                            "Invalid hex escape sequence '\\x{}' - expected 2 hex digits (00-FF)",
1233                            hex_str
1234                        )
1235                    })?;
1236
1237                    result.push(byte_val as char);
1238                }
1239                Some(c) => {
1240                    return Err(format!(
1241                        "Unknown escape sequence '\\{}' in string literal. \
1242                         Supported: \\\" \\\\ \\n \\r \\t \\xNN",
1243                        c
1244                    ));
1245                }
1246                None => {
1247                    return Err("String ends with incomplete escape sequence '\\'".to_string());
1248                }
1249            }
1250        } else {
1251            result.push(ch);
1252        }
1253    }
1254
1255    Ok(result)
1256}
1257
1258fn tokenize(source: &str) -> Vec<Token> {
1259    let mut tokens = Vec::new();
1260    let mut current = String::new();
1261    let mut current_start_line = 0;
1262    let mut current_start_col = 0;
1263    let mut in_string = false;
1264    let mut prev_was_backslash = false;
1265
1266    // Track current position (0-indexed)
1267    let mut line = 0;
1268    let mut col = 0;
1269
1270    for ch in source.chars() {
1271        if in_string {
1272            current.push(ch);
1273            if ch == '"' && !prev_was_backslash {
1274                // Unescaped quote ends the string
1275                in_string = false;
1276                tokens.push(Token::new(
1277                    current.clone(),
1278                    current_start_line,
1279                    current_start_col,
1280                ));
1281                current.clear();
1282                prev_was_backslash = false;
1283            } else if ch == '\\' && !prev_was_backslash {
1284                // Start of escape sequence
1285                prev_was_backslash = true;
1286            } else {
1287                // Regular character or escaped character
1288                prev_was_backslash = false;
1289            }
1290            // Track newlines inside strings
1291            if ch == '\n' {
1292                line += 1;
1293                col = 0;
1294            } else {
1295                col += 1;
1296            }
1297        } else if ch == '"' {
1298            if !current.is_empty() {
1299                tokens.push(Token::new(
1300                    current.clone(),
1301                    current_start_line,
1302                    current_start_col,
1303                ));
1304                current.clear();
1305            }
1306            in_string = true;
1307            current_start_line = line;
1308            current_start_col = col;
1309            current.push(ch);
1310            prev_was_backslash = false;
1311            col += 1;
1312        } else if ch.is_whitespace() {
1313            if !current.is_empty() {
1314                tokens.push(Token::new(
1315                    current.clone(),
1316                    current_start_line,
1317                    current_start_col,
1318                ));
1319                current.clear();
1320            }
1321            // Preserve newlines for comment handling
1322            if ch == '\n' {
1323                tokens.push(Token::new("\n".to_string(), line, col));
1324                line += 1;
1325                col = 0;
1326            } else {
1327                col += 1;
1328            }
1329        } else if "():;[]{},".contains(ch) {
1330            if !current.is_empty() {
1331                tokens.push(Token::new(
1332                    current.clone(),
1333                    current_start_line,
1334                    current_start_col,
1335                ));
1336                current.clear();
1337            }
1338            tokens.push(Token::new(ch.to_string(), line, col));
1339            col += 1;
1340        } else {
1341            if current.is_empty() {
1342                current_start_line = line;
1343                current_start_col = col;
1344            }
1345            current.push(ch);
1346            col += 1;
1347        }
1348    }
1349
1350    // Check for unclosed string literal
1351    if in_string {
1352        // Return error by adding a special error token
1353        // The parser will handle this as a parse error
1354        tokens.push(Token::new(
1355            "<<<UNCLOSED_STRING>>>".to_string(),
1356            current_start_line,
1357            current_start_col,
1358        ));
1359    } else if !current.is_empty() {
1360        tokens.push(Token::new(current, current_start_line, current_start_col));
1361    }
1362
1363    tokens
1364}
1365
1366#[cfg(test)]
1367mod tests {
1368    use super::*;
1369
1370    #[test]
1371    fn test_parse_hello_world() {
1372        let source = r#"
1373: main ( -- )
1374  "Hello, World!" write_line ;
1375"#;
1376
1377        let mut parser = Parser::new(source);
1378        let program = parser.parse().unwrap();
1379
1380        assert_eq!(program.words.len(), 1);
1381        assert_eq!(program.words[0].name, "main");
1382        assert_eq!(program.words[0].body.len(), 2);
1383
1384        match &program.words[0].body[0] {
1385            Statement::StringLiteral(s) => assert_eq!(s, "Hello, World!"),
1386            _ => panic!("Expected StringLiteral"),
1387        }
1388
1389        match &program.words[0].body[1] {
1390            Statement::WordCall { name, .. } => assert_eq!(name, "write_line"),
1391            _ => panic!("Expected WordCall"),
1392        }
1393    }
1394
1395    #[test]
1396    fn test_parse_with_numbers() {
1397        let source = ": add-example ( -- ) 2 3 add ;";
1398
1399        let mut parser = Parser::new(source);
1400        let program = parser.parse().unwrap();
1401
1402        assert_eq!(program.words[0].body.len(), 3);
1403        assert_eq!(program.words[0].body[0], Statement::IntLiteral(2));
1404        assert_eq!(program.words[0].body[1], Statement::IntLiteral(3));
1405        assert!(matches!(
1406            &program.words[0].body[2],
1407            Statement::WordCall { name, .. } if name == "add"
1408        ));
1409    }
1410
1411    #[test]
1412    fn test_parse_hex_literals() {
1413        let source = ": test ( -- ) 0xFF 0x10 0X1A ;";
1414        let mut parser = Parser::new(source);
1415        let program = parser.parse().unwrap();
1416
1417        assert_eq!(program.words[0].body[0], Statement::IntLiteral(255));
1418        assert_eq!(program.words[0].body[1], Statement::IntLiteral(16));
1419        assert_eq!(program.words[0].body[2], Statement::IntLiteral(26));
1420    }
1421
1422    #[test]
1423    fn test_parse_binary_literals() {
1424        let source = ": test ( -- ) 0b1010 0B1111 0b0 ;";
1425        let mut parser = Parser::new(source);
1426        let program = parser.parse().unwrap();
1427
1428        assert_eq!(program.words[0].body[0], Statement::IntLiteral(10));
1429        assert_eq!(program.words[0].body[1], Statement::IntLiteral(15));
1430        assert_eq!(program.words[0].body[2], Statement::IntLiteral(0));
1431    }
1432
1433    #[test]
1434    fn test_parse_invalid_hex_literal() {
1435        let source = ": test ( -- ) 0xGG ;";
1436        let mut parser = Parser::new(source);
1437        let err = parser.parse().unwrap_err();
1438        assert!(err.contains("Invalid hex literal"));
1439    }
1440
1441    #[test]
1442    fn test_parse_invalid_binary_literal() {
1443        let source = ": test ( -- ) 0b123 ;";
1444        let mut parser = Parser::new(source);
1445        let err = parser.parse().unwrap_err();
1446        assert!(err.contains("Invalid binary literal"));
1447    }
1448
1449    #[test]
1450    fn test_parse_escaped_quotes() {
1451        let source = r#": main ( -- ) "Say \"hello\" there" write_line ;"#;
1452
1453        let mut parser = Parser::new(source);
1454        let program = parser.parse().unwrap();
1455
1456        assert_eq!(program.words.len(), 1);
1457        assert_eq!(program.words[0].body.len(), 2);
1458
1459        match &program.words[0].body[0] {
1460            // Escape sequences should be processed: \" becomes actual quote
1461            Statement::StringLiteral(s) => assert_eq!(s, "Say \"hello\" there"),
1462            _ => panic!("Expected StringLiteral with escaped quotes"),
1463        }
1464    }
1465
1466    /// Regression test for issue #117: escaped quote at end of string
1467    /// Previously failed with "String ends with incomplete escape sequence"
1468    #[test]
1469    fn test_escaped_quote_at_end_of_string() {
1470        let source = r#": main ( -- ) "hello\"" io.write-line ;"#;
1471
1472        let mut parser = Parser::new(source);
1473        let program = parser.parse().unwrap();
1474
1475        assert_eq!(program.words.len(), 1);
1476        match &program.words[0].body[0] {
1477            Statement::StringLiteral(s) => assert_eq!(s, "hello\""),
1478            _ => panic!("Expected StringLiteral ending with escaped quote"),
1479        }
1480    }
1481
1482    /// Test escaped quote at start of string (boundary case)
1483    #[test]
1484    fn test_escaped_quote_at_start_of_string() {
1485        let source = r#": main ( -- ) "\"hello" io.write-line ;"#;
1486
1487        let mut parser = Parser::new(source);
1488        let program = parser.parse().unwrap();
1489
1490        match &program.words[0].body[0] {
1491            Statement::StringLiteral(s) => assert_eq!(s, "\"hello"),
1492            _ => panic!("Expected StringLiteral starting with escaped quote"),
1493        }
1494    }
1495
1496    #[test]
1497    fn test_escape_sequences() {
1498        let source = r#": main ( -- ) "Line 1\nLine 2\tTabbed" write_line ;"#;
1499
1500        let mut parser = Parser::new(source);
1501        let program = parser.parse().unwrap();
1502
1503        match &program.words[0].body[0] {
1504            Statement::StringLiteral(s) => assert_eq!(s, "Line 1\nLine 2\tTabbed"),
1505            _ => panic!("Expected StringLiteral"),
1506        }
1507    }
1508
1509    #[test]
1510    fn test_unknown_escape_sequence() {
1511        let source = r#": main ( -- ) "Bad \q sequence" write_line ;"#;
1512
1513        let mut parser = Parser::new(source);
1514        let result = parser.parse();
1515
1516        assert!(result.is_err());
1517        assert!(result.unwrap_err().contains("Unknown escape sequence"));
1518    }
1519
1520    #[test]
1521    fn test_hex_escape_sequence() {
1522        // \x1b is ESC (27), \x41 is 'A' (65)
1523        let source = r#": main ( -- ) "\x1b[2K\x41" io.write-line ;"#;
1524
1525        let mut parser = Parser::new(source);
1526        let program = parser.parse().unwrap();
1527
1528        match &program.words[0].body[0] {
1529            Statement::StringLiteral(s) => {
1530                assert_eq!(s.len(), 5); // ESC [ 2 K A
1531                assert_eq!(s.as_bytes()[0], 0x1b); // ESC
1532                assert_eq!(s.as_bytes()[4], 0x41); // 'A'
1533            }
1534            _ => panic!("Expected StringLiteral"),
1535        }
1536    }
1537
1538    #[test]
1539    fn test_hex_escape_null_byte() {
1540        let source = r#": main ( -- ) "before\x00after" io.write-line ;"#;
1541
1542        let mut parser = Parser::new(source);
1543        let program = parser.parse().unwrap();
1544
1545        match &program.words[0].body[0] {
1546            Statement::StringLiteral(s) => {
1547                assert_eq!(s.len(), 12); // "before" + NUL + "after"
1548                assert_eq!(s.as_bytes()[6], 0x00);
1549            }
1550            _ => panic!("Expected StringLiteral"),
1551        }
1552    }
1553
1554    #[test]
1555    fn test_hex_escape_uppercase() {
1556        // Both uppercase and lowercase hex digits should work
1557        // Note: Values > 0x7F become Unicode code points (U+00NN), multi-byte in UTF-8
1558        let source = r#": main ( -- ) "\x41\x42\x4F" io.write-line ;"#;
1559
1560        let mut parser = Parser::new(source);
1561        let program = parser.parse().unwrap();
1562
1563        match &program.words[0].body[0] {
1564            Statement::StringLiteral(s) => {
1565                assert_eq!(s, "ABO"); // 0x41='A', 0x42='B', 0x4F='O'
1566            }
1567            _ => panic!("Expected StringLiteral"),
1568        }
1569    }
1570
1571    #[test]
1572    fn test_hex_escape_high_bytes() {
1573        // Values > 0x7F become Unicode code points (Latin-1), which are multi-byte in UTF-8
1574        let source = r#": main ( -- ) "\xFF" io.write-line ;"#;
1575
1576        let mut parser = Parser::new(source);
1577        let program = parser.parse().unwrap();
1578
1579        match &program.words[0].body[0] {
1580            Statement::StringLiteral(s) => {
1581                // \xFF becomes U+00FF (ÿ), which is 2 bytes in UTF-8: C3 BF
1582                assert_eq!(s, "\u{00FF}");
1583                assert_eq!(s.chars().next().unwrap(), 'ÿ');
1584            }
1585            _ => panic!("Expected StringLiteral"),
1586        }
1587    }
1588
1589    #[test]
1590    fn test_hex_escape_incomplete() {
1591        // \x with only one hex digit
1592        let source = r#": main ( -- ) "\x1" io.write-line ;"#;
1593
1594        let mut parser = Parser::new(source);
1595        let result = parser.parse();
1596
1597        assert!(result.is_err());
1598        assert!(result.unwrap_err().contains("Incomplete hex escape"));
1599    }
1600
1601    #[test]
1602    fn test_hex_escape_invalid_digits() {
1603        // \xGG is not valid hex
1604        let source = r#": main ( -- ) "\xGG" io.write-line ;"#;
1605
1606        let mut parser = Parser::new(source);
1607        let result = parser.parse();
1608
1609        assert!(result.is_err());
1610        assert!(result.unwrap_err().contains("Invalid hex escape"));
1611    }
1612
1613    #[test]
1614    fn test_hex_escape_at_end_of_string() {
1615        // \x at end of string with no digits
1616        let source = r#": main ( -- ) "test\x" io.write-line ;"#;
1617
1618        let mut parser = Parser::new(source);
1619        let result = parser.parse();
1620
1621        assert!(result.is_err());
1622        assert!(result.unwrap_err().contains("Incomplete hex escape"));
1623    }
1624
1625    #[test]
1626    fn test_unclosed_string_literal() {
1627        let source = r#": main ( -- ) "unclosed string ;"#;
1628
1629        let mut parser = Parser::new(source);
1630        let result = parser.parse();
1631
1632        assert!(result.is_err());
1633        let err_msg = result.unwrap_err();
1634        assert!(err_msg.contains("Unclosed string literal"));
1635        // Should include position information (line 1, column 15 for the opening quote)
1636        assert!(
1637            err_msg.contains("line 1"),
1638            "Expected line number in error: {}",
1639            err_msg
1640        );
1641        assert!(
1642            err_msg.contains("column 15"),
1643            "Expected column number in error: {}",
1644            err_msg
1645        );
1646    }
1647
1648    #[test]
1649    fn test_multiple_word_definitions() {
1650        let source = r#"
1651: double ( Int -- Int )
1652  2 multiply ;
1653
1654: quadruple ( Int -- Int )
1655  double double ;
1656"#;
1657
1658        let mut parser = Parser::new(source);
1659        let program = parser.parse().unwrap();
1660
1661        assert_eq!(program.words.len(), 2);
1662        assert_eq!(program.words[0].name, "double");
1663        assert_eq!(program.words[1].name, "quadruple");
1664
1665        // Verify stack effects were parsed
1666        assert!(program.words[0].effect.is_some());
1667        assert!(program.words[1].effect.is_some());
1668    }
1669
1670    #[test]
1671    fn test_user_word_calling_user_word() {
1672        let source = r#"
1673: helper ( -- )
1674  "helper called" write_line ;
1675
1676: main ( -- )
1677  helper ;
1678"#;
1679
1680        let mut parser = Parser::new(source);
1681        let program = parser.parse().unwrap();
1682
1683        assert_eq!(program.words.len(), 2);
1684
1685        // Check main calls helper
1686        match &program.words[1].body[0] {
1687            Statement::WordCall { name, .. } => assert_eq!(name, "helper"),
1688            _ => panic!("Expected WordCall to helper"),
1689        }
1690    }
1691
1692    #[test]
1693    fn test_parse_simple_stack_effect() {
1694        // Test: ( Int -- Bool )
1695        // With implicit row polymorphism, this becomes: ( ..rest Int -- ..rest Bool )
1696        let source = ": test ( Int -- Bool ) 1 ;";
1697        let mut parser = Parser::new(source);
1698        let program = parser.parse().unwrap();
1699
1700        assert_eq!(program.words.len(), 1);
1701        let word = &program.words[0];
1702        assert!(word.effect.is_some());
1703
1704        let effect = word.effect.as_ref().unwrap();
1705
1706        // Input: Int on RowVar("rest") (implicit row polymorphism)
1707        assert_eq!(
1708            effect.inputs,
1709            StackType::Cons {
1710                rest: Box::new(StackType::RowVar("rest".to_string())),
1711                top: Type::Int
1712            }
1713        );
1714
1715        // Output: Bool on RowVar("rest") (implicit row polymorphism)
1716        assert_eq!(
1717            effect.outputs,
1718            StackType::Cons {
1719                rest: Box::new(StackType::RowVar("rest".to_string())),
1720                top: Type::Bool
1721            }
1722        );
1723    }
1724
1725    #[test]
1726    fn test_parse_row_polymorphic_stack_effect() {
1727        // Test: ( ..a Int -- ..a Bool )
1728        let source = ": test ( ..a Int -- ..a Bool ) 1 ;";
1729        let mut parser = Parser::new(source);
1730        let program = parser.parse().unwrap();
1731
1732        assert_eq!(program.words.len(), 1);
1733        let word = &program.words[0];
1734        assert!(word.effect.is_some());
1735
1736        let effect = word.effect.as_ref().unwrap();
1737
1738        // Input: Int on RowVar("a")
1739        assert_eq!(
1740            effect.inputs,
1741            StackType::Cons {
1742                rest: Box::new(StackType::RowVar("a".to_string())),
1743                top: Type::Int
1744            }
1745        );
1746
1747        // Output: Bool on RowVar("a")
1748        assert_eq!(
1749            effect.outputs,
1750            StackType::Cons {
1751                rest: Box::new(StackType::RowVar("a".to_string())),
1752                top: Type::Bool
1753            }
1754        );
1755    }
1756
1757    #[test]
1758    fn test_parse_invalid_row_var_starts_with_digit() {
1759        // Test: Row variable cannot start with digit
1760        let source = ": test ( ..123 Int -- ) ;";
1761        let mut parser = Parser::new(source);
1762        let result = parser.parse();
1763
1764        assert!(result.is_err());
1765        let err_msg = result.unwrap_err();
1766        assert!(
1767            err_msg.contains("lowercase letter"),
1768            "Expected error about lowercase letter, got: {}",
1769            err_msg
1770        );
1771    }
1772
1773    #[test]
1774    fn test_parse_invalid_row_var_starts_with_uppercase() {
1775        // Test: Row variable cannot start with uppercase (that's a type variable)
1776        let source = ": test ( ..Int Int -- ) ;";
1777        let mut parser = Parser::new(source);
1778        let result = parser.parse();
1779
1780        assert!(result.is_err());
1781        let err_msg = result.unwrap_err();
1782        assert!(
1783            err_msg.contains("lowercase letter") || err_msg.contains("type name"),
1784            "Expected error about lowercase letter or type name, got: {}",
1785            err_msg
1786        );
1787    }
1788
1789    #[test]
1790    fn test_parse_invalid_row_var_with_special_chars() {
1791        // Test: Row variable cannot contain special characters
1792        let source = ": test ( ..a-b Int -- ) ;";
1793        let mut parser = Parser::new(source);
1794        let result = parser.parse();
1795
1796        assert!(result.is_err());
1797        let err_msg = result.unwrap_err();
1798        assert!(
1799            err_msg.contains("letters, numbers, and underscores")
1800                || err_msg.contains("Unknown type"),
1801            "Expected error about valid characters, got: {}",
1802            err_msg
1803        );
1804    }
1805
1806    #[test]
1807    fn test_parse_valid_row_var_with_underscore() {
1808        // Test: Row variable CAN contain underscore
1809        let source = ": test ( ..my_row Int -- ..my_row Bool ) ;";
1810        let mut parser = Parser::new(source);
1811        let result = parser.parse();
1812
1813        assert!(result.is_ok(), "Should accept row variable with underscore");
1814    }
1815
1816    #[test]
1817    fn test_parse_multiple_types_stack_effect() {
1818        // Test: ( Int String -- Bool )
1819        // With implicit row polymorphism: ( ..rest Int String -- ..rest Bool )
1820        let source = ": test ( Int String -- Bool ) 1 ;";
1821        let mut parser = Parser::new(source);
1822        let program = parser.parse().unwrap();
1823
1824        let effect = program.words[0].effect.as_ref().unwrap();
1825
1826        // Input: String on Int on RowVar("rest")
1827        let (rest, top) = effect.inputs.clone().pop().unwrap();
1828        assert_eq!(top, Type::String);
1829        let (rest2, top2) = rest.pop().unwrap();
1830        assert_eq!(top2, Type::Int);
1831        assert_eq!(rest2, StackType::RowVar("rest".to_string()));
1832
1833        // Output: Bool on RowVar("rest") (implicit row polymorphism)
1834        assert_eq!(
1835            effect.outputs,
1836            StackType::Cons {
1837                rest: Box::new(StackType::RowVar("rest".to_string())),
1838                top: Type::Bool
1839            }
1840        );
1841    }
1842
1843    #[test]
1844    fn test_parse_type_variable() {
1845        // Test: ( ..a T -- ..a T T ) for dup
1846        let source = ": dup ( ..a T -- ..a T T ) ;";
1847        let mut parser = Parser::new(source);
1848        let program = parser.parse().unwrap();
1849
1850        let effect = program.words[0].effect.as_ref().unwrap();
1851
1852        // Input: T on RowVar("a")
1853        assert_eq!(
1854            effect.inputs,
1855            StackType::Cons {
1856                rest: Box::new(StackType::RowVar("a".to_string())),
1857                top: Type::Var("T".to_string())
1858            }
1859        );
1860
1861        // Output: T on T on RowVar("a")
1862        let (rest, top) = effect.outputs.clone().pop().unwrap();
1863        assert_eq!(top, Type::Var("T".to_string()));
1864        let (rest2, top2) = rest.pop().unwrap();
1865        assert_eq!(top2, Type::Var("T".to_string()));
1866        assert_eq!(rest2, StackType::RowVar("a".to_string()));
1867    }
1868
1869    #[test]
1870    fn test_parse_empty_stack_effect() {
1871        // Test: ( -- )
1872        // In concatenative languages, even empty effects are row-polymorphic
1873        // ( -- ) means ( ..rest -- ..rest ) - preserves stack
1874        let source = ": test ( -- ) ;";
1875        let mut parser = Parser::new(source);
1876        let program = parser.parse().unwrap();
1877
1878        let effect = program.words[0].effect.as_ref().unwrap();
1879
1880        // Both inputs and outputs should use the same implicit row variable
1881        assert_eq!(effect.inputs, StackType::RowVar("rest".to_string()));
1882        assert_eq!(effect.outputs, StackType::RowVar("rest".to_string()));
1883    }
1884
1885    #[test]
1886    fn test_parse_invalid_type() {
1887        // Test invalid type (lowercase, not a row var)
1888        let source = ": test ( invalid -- Bool ) ;";
1889        let mut parser = Parser::new(source);
1890        let result = parser.parse();
1891
1892        assert!(result.is_err());
1893        assert!(result.unwrap_err().contains("Unknown type"));
1894    }
1895
1896    #[test]
1897    fn test_parse_unclosed_stack_effect() {
1898        // Test unclosed stack effect - parser tries to parse all tokens until ')' or EOF
1899        // In this case, it encounters "body" which is an invalid type
1900        let source = ": test ( Int -- Bool body ;";
1901        let mut parser = Parser::new(source);
1902        let result = parser.parse();
1903
1904        assert!(result.is_err());
1905        let err_msg = result.unwrap_err();
1906        // Parser will try to parse "body" as a type and fail
1907        assert!(err_msg.contains("Unknown type"));
1908    }
1909
1910    #[test]
1911    fn test_parse_simple_quotation_type() {
1912        // Test: ( [Int -- Int] -- )
1913        let source = ": apply ( [Int -- Int] -- ) ;";
1914        let mut parser = Parser::new(source);
1915        let program = parser.parse().unwrap();
1916
1917        let effect = program.words[0].effect.as_ref().unwrap();
1918
1919        // Input should be: Quotation(Int -- Int) on RowVar("rest")
1920        let (rest, top) = effect.inputs.clone().pop().unwrap();
1921        match top {
1922            Type::Quotation(quot_effect) => {
1923                // Check quotation's input: Int on RowVar("rest")
1924                assert_eq!(
1925                    quot_effect.inputs,
1926                    StackType::Cons {
1927                        rest: Box::new(StackType::RowVar("rest".to_string())),
1928                        top: Type::Int
1929                    }
1930                );
1931                // Check quotation's output: Int on RowVar("rest")
1932                assert_eq!(
1933                    quot_effect.outputs,
1934                    StackType::Cons {
1935                        rest: Box::new(StackType::RowVar("rest".to_string())),
1936                        top: Type::Int
1937                    }
1938                );
1939            }
1940            _ => panic!("Expected Quotation type, got {:?}", top),
1941        }
1942        assert_eq!(rest, StackType::RowVar("rest".to_string()));
1943    }
1944
1945    #[test]
1946    fn test_parse_quotation_type_with_row_vars() {
1947        // Test: ( ..a [..a T -- ..a Bool] -- ..a )
1948        let source = ": test ( ..a [..a T -- ..a Bool] -- ..a ) ;";
1949        let mut parser = Parser::new(source);
1950        let program = parser.parse().unwrap();
1951
1952        let effect = program.words[0].effect.as_ref().unwrap();
1953
1954        // Input: Quotation on RowVar("a")
1955        let (rest, top) = effect.inputs.clone().pop().unwrap();
1956        match top {
1957            Type::Quotation(quot_effect) => {
1958                // Check quotation's input: T on RowVar("a")
1959                let (q_in_rest, q_in_top) = quot_effect.inputs.clone().pop().unwrap();
1960                assert_eq!(q_in_top, Type::Var("T".to_string()));
1961                assert_eq!(q_in_rest, StackType::RowVar("a".to_string()));
1962
1963                // Check quotation's output: Bool on RowVar("a")
1964                let (q_out_rest, q_out_top) = quot_effect.outputs.clone().pop().unwrap();
1965                assert_eq!(q_out_top, Type::Bool);
1966                assert_eq!(q_out_rest, StackType::RowVar("a".to_string()));
1967            }
1968            _ => panic!("Expected Quotation type, got {:?}", top),
1969        }
1970        assert_eq!(rest, StackType::RowVar("a".to_string()));
1971    }
1972
1973    #[test]
1974    fn test_parse_nested_quotation_type() {
1975        // Test: ( [[Int -- Int] -- Bool] -- )
1976        let source = ": nested ( [[Int -- Int] -- Bool] -- ) ;";
1977        let mut parser = Parser::new(source);
1978        let program = parser.parse().unwrap();
1979
1980        let effect = program.words[0].effect.as_ref().unwrap();
1981
1982        // Input: Quotation([Int -- Int] -- Bool) on RowVar("rest")
1983        let (_, top) = effect.inputs.clone().pop().unwrap();
1984        match top {
1985            Type::Quotation(outer_effect) => {
1986                // Outer quotation input: [Int -- Int] on RowVar("rest")
1987                let (_, outer_in_top) = outer_effect.inputs.clone().pop().unwrap();
1988                match outer_in_top {
1989                    Type::Quotation(inner_effect) => {
1990                        // Inner quotation: Int -- Int
1991                        assert!(matches!(
1992                            inner_effect.inputs.clone().pop().unwrap().1,
1993                            Type::Int
1994                        ));
1995                        assert!(matches!(
1996                            inner_effect.outputs.clone().pop().unwrap().1,
1997                            Type::Int
1998                        ));
1999                    }
2000                    _ => panic!("Expected nested Quotation type"),
2001                }
2002
2003                // Outer quotation output: Bool
2004                let (_, outer_out_top) = outer_effect.outputs.clone().pop().unwrap();
2005                assert_eq!(outer_out_top, Type::Bool);
2006            }
2007            _ => panic!("Expected Quotation type"),
2008        }
2009    }
2010
2011    #[test]
2012    fn test_parse_deeply_nested_quotation_type_exceeds_limit() {
2013        // Test: Deeply nested quotation types should fail with max depth error
2014        // Build a quotation type nested 35 levels deep (exceeds MAX_QUOTATION_DEPTH = 32)
2015        let mut source = String::from(": deep ( ");
2016
2017        // Build opening brackets: [[[[[[...
2018        for _ in 0..35 {
2019            source.push_str("[ -- ");
2020        }
2021
2022        source.push_str("Int");
2023
2024        // Build closing brackets: ...]]]]]]
2025        for _ in 0..35 {
2026            source.push_str(" ]");
2027        }
2028
2029        source.push_str(" -- ) ;");
2030
2031        let mut parser = Parser::new(&source);
2032        let result = parser.parse();
2033
2034        // Should fail with depth limit error
2035        assert!(result.is_err());
2036        let err_msg = result.unwrap_err();
2037        assert!(
2038            err_msg.contains("depth") || err_msg.contains("32"),
2039            "Expected depth limit error, got: {}",
2040            err_msg
2041        );
2042    }
2043
2044    #[test]
2045    fn test_parse_empty_quotation_type() {
2046        // Test: ( [ -- ] -- )
2047        // An empty quotation type is also row-polymorphic: [ ..rest -- ..rest ]
2048        let source = ": empty-quot ( [ -- ] -- ) ;";
2049        let mut parser = Parser::new(source);
2050        let program = parser.parse().unwrap();
2051
2052        let effect = program.words[0].effect.as_ref().unwrap();
2053
2054        let (_, top) = effect.inputs.clone().pop().unwrap();
2055        match top {
2056            Type::Quotation(quot_effect) => {
2057                // Empty quotation preserves the stack (row-polymorphic)
2058                assert_eq!(quot_effect.inputs, StackType::RowVar("rest".to_string()));
2059                assert_eq!(quot_effect.outputs, StackType::RowVar("rest".to_string()));
2060            }
2061            _ => panic!("Expected Quotation type"),
2062        }
2063    }
2064
2065    #[test]
2066    fn test_parse_quotation_type_in_output() {
2067        // Test: ( -- [Int -- Int] )
2068        let source = ": maker ( -- [Int -- Int] ) ;";
2069        let mut parser = Parser::new(source);
2070        let program = parser.parse().unwrap();
2071
2072        let effect = program.words[0].effect.as_ref().unwrap();
2073
2074        // Output should be: Quotation(Int -- Int) on RowVar("rest")
2075        let (_, top) = effect.outputs.clone().pop().unwrap();
2076        match top {
2077            Type::Quotation(quot_effect) => {
2078                assert!(matches!(
2079                    quot_effect.inputs.clone().pop().unwrap().1,
2080                    Type::Int
2081                ));
2082                assert!(matches!(
2083                    quot_effect.outputs.clone().pop().unwrap().1,
2084                    Type::Int
2085                ));
2086            }
2087            _ => panic!("Expected Quotation type"),
2088        }
2089    }
2090
2091    #[test]
2092    fn test_parse_unclosed_quotation_type() {
2093        // Test: ( [Int -- Int -- )  (missing ])
2094        let source = ": broken ( [Int -- Int -- ) ;";
2095        let mut parser = Parser::new(source);
2096        let result = parser.parse();
2097
2098        assert!(result.is_err());
2099        let err_msg = result.unwrap_err();
2100        // Parser might error with various messages depending on where it fails
2101        // It should at least indicate a parsing problem
2102        assert!(
2103            err_msg.contains("Unclosed")
2104                || err_msg.contains("Expected")
2105                || err_msg.contains("Unexpected"),
2106            "Got error: {}",
2107            err_msg
2108        );
2109    }
2110
2111    #[test]
2112    fn test_parse_multiple_quotation_types() {
2113        // Test: ( [Int -- Int] [String -- Bool] -- )
2114        let source = ": multi ( [Int -- Int] [String -- Bool] -- ) ;";
2115        let mut parser = Parser::new(source);
2116        let program = parser.parse().unwrap();
2117
2118        let effect = program.words[0].effect.as_ref().unwrap();
2119
2120        // Pop second quotation (String -- Bool)
2121        let (rest, top) = effect.inputs.clone().pop().unwrap();
2122        match top {
2123            Type::Quotation(quot_effect) => {
2124                assert!(matches!(
2125                    quot_effect.inputs.clone().pop().unwrap().1,
2126                    Type::String
2127                ));
2128                assert!(matches!(
2129                    quot_effect.outputs.clone().pop().unwrap().1,
2130                    Type::Bool
2131                ));
2132            }
2133            _ => panic!("Expected Quotation type"),
2134        }
2135
2136        // Pop first quotation (Int -- Int)
2137        let (_, top2) = rest.pop().unwrap();
2138        match top2 {
2139            Type::Quotation(quot_effect) => {
2140                assert!(matches!(
2141                    quot_effect.inputs.clone().pop().unwrap().1,
2142                    Type::Int
2143                ));
2144                assert!(matches!(
2145                    quot_effect.outputs.clone().pop().unwrap().1,
2146                    Type::Int
2147                ));
2148            }
2149            _ => panic!("Expected Quotation type"),
2150        }
2151    }
2152
2153    #[test]
2154    fn test_parse_quotation_type_without_separator() {
2155        // Test: ( [Int] -- ) should be REJECTED
2156        //
2157        // Design decision: The '--' separator is REQUIRED for clarity.
2158        // [Int] looks like a list type in most languages, not a consumer function.
2159        // This would confuse users.
2160        //
2161        // Require explicit syntax:
2162        // - `[Int -- ]` for quotation that consumes Int and produces nothing
2163        // - `[ -- Int]` for quotation that produces Int
2164        // - `[Int -- Int]` for transformation
2165        let source = ": consumer ( [Int] -- ) ;";
2166        let mut parser = Parser::new(source);
2167        let result = parser.parse();
2168
2169        // Should fail with helpful error message
2170        assert!(result.is_err());
2171        let err_msg = result.unwrap_err();
2172        assert!(
2173            err_msg.contains("require") && err_msg.contains("--"),
2174            "Expected error about missing '--' separator, got: {}",
2175            err_msg
2176        );
2177    }
2178
2179    #[test]
2180    fn test_parse_bare_quotation_type_rejected() {
2181        // Test: ( Int Quotation -- Int ) should be REJECTED
2182        //
2183        // 'Quotation' looks like a type name but would be silently treated as a
2184        // type variable without this check. Users must use explicit effect syntax.
2185        let source = ": apply-twice ( Int Quotation -- Int ) ;";
2186        let mut parser = Parser::new(source);
2187        let result = parser.parse();
2188
2189        assert!(result.is_err());
2190        let err_msg = result.unwrap_err();
2191        assert!(
2192            err_msg.contains("Quotation") && err_msg.contains("not a valid type"),
2193            "Expected error about 'Quotation' not being valid, got: {}",
2194            err_msg
2195        );
2196        assert!(
2197            err_msg.contains("[Int -- Int]") || err_msg.contains("[ -- ]"),
2198            "Expected error to suggest explicit syntax, got: {}",
2199            err_msg
2200        );
2201    }
2202
2203    #[test]
2204    fn test_parse_no_stack_effect() {
2205        // Test word without stack effect (should still work)
2206        let source = ": test 1 2 add ;";
2207        let mut parser = Parser::new(source);
2208        let program = parser.parse().unwrap();
2209
2210        assert_eq!(program.words.len(), 1);
2211        assert!(program.words[0].effect.is_none());
2212    }
2213
2214    #[test]
2215    fn test_parse_simple_quotation() {
2216        let source = r#"
2217: test ( -- Quot )
2218  [ 1 add ] ;
2219"#;
2220
2221        let mut parser = Parser::new(source);
2222        let program = parser.parse().unwrap();
2223
2224        assert_eq!(program.words.len(), 1);
2225        assert_eq!(program.words[0].name, "test");
2226        assert_eq!(program.words[0].body.len(), 1);
2227
2228        match &program.words[0].body[0] {
2229            Statement::Quotation { body, .. } => {
2230                assert_eq!(body.len(), 2);
2231                assert_eq!(body[0], Statement::IntLiteral(1));
2232                assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "add"));
2233            }
2234            _ => panic!("Expected Quotation statement"),
2235        }
2236    }
2237
2238    #[test]
2239    fn test_parse_empty_quotation() {
2240        let source = ": test [ ] ;";
2241
2242        let mut parser = Parser::new(source);
2243        let program = parser.parse().unwrap();
2244
2245        assert_eq!(program.words.len(), 1);
2246
2247        match &program.words[0].body[0] {
2248            Statement::Quotation { body, .. } => {
2249                assert_eq!(body.len(), 0);
2250            }
2251            _ => panic!("Expected Quotation statement"),
2252        }
2253    }
2254
2255    #[test]
2256    fn test_parse_quotation_with_call() {
2257        let source = r#"
2258: test ( -- )
2259  5 [ 1 add ] call ;
2260"#;
2261
2262        let mut parser = Parser::new(source);
2263        let program = parser.parse().unwrap();
2264
2265        assert_eq!(program.words.len(), 1);
2266        assert_eq!(program.words[0].body.len(), 3);
2267
2268        assert_eq!(program.words[0].body[0], Statement::IntLiteral(5));
2269
2270        match &program.words[0].body[1] {
2271            Statement::Quotation { body, .. } => {
2272                assert_eq!(body.len(), 2);
2273            }
2274            _ => panic!("Expected Quotation"),
2275        }
2276
2277        assert!(matches!(
2278            &program.words[0].body[2],
2279            Statement::WordCall { name, .. } if name == "call"
2280        ));
2281    }
2282
2283    #[test]
2284    fn test_parse_nested_quotation() {
2285        let source = ": test [ [ 1 add ] call ] ;";
2286
2287        let mut parser = Parser::new(source);
2288        let program = parser.parse().unwrap();
2289
2290        assert_eq!(program.words.len(), 1);
2291
2292        match &program.words[0].body[0] {
2293            Statement::Quotation {
2294                body: outer_body, ..
2295            } => {
2296                assert_eq!(outer_body.len(), 2);
2297
2298                match &outer_body[0] {
2299                    Statement::Quotation {
2300                        body: inner_body, ..
2301                    } => {
2302                        assert_eq!(inner_body.len(), 2);
2303                        assert_eq!(inner_body[0], Statement::IntLiteral(1));
2304                        assert!(
2305                            matches!(&inner_body[1], Statement::WordCall { name, .. } if name == "add")
2306                        );
2307                    }
2308                    _ => panic!("Expected nested Quotation"),
2309                }
2310
2311                assert!(
2312                    matches!(&outer_body[1], Statement::WordCall { name, .. } if name == "call")
2313                );
2314            }
2315            _ => panic!("Expected Quotation"),
2316        }
2317    }
2318
2319    #[test]
2320    fn test_parse_while_with_quotations() {
2321        let source = r#"
2322: countdown ( Int -- )
2323  [ dup 0 > ] [ 1 subtract ] while drop ;
2324"#;
2325
2326        let mut parser = Parser::new(source);
2327        let program = parser.parse().unwrap();
2328
2329        assert_eq!(program.words.len(), 1);
2330        assert_eq!(program.words[0].body.len(), 4);
2331
2332        // First quotation: [ dup 0 > ]
2333        match &program.words[0].body[0] {
2334            Statement::Quotation { body: pred, .. } => {
2335                assert_eq!(pred.len(), 3);
2336                assert!(matches!(&pred[0], Statement::WordCall { name, .. } if name == "dup"));
2337                assert_eq!(pred[1], Statement::IntLiteral(0));
2338                assert!(matches!(&pred[2], Statement::WordCall { name, .. } if name == ">"));
2339            }
2340            _ => panic!("Expected predicate quotation"),
2341        }
2342
2343        // Second quotation: [ 1 subtract ]
2344        match &program.words[0].body[1] {
2345            Statement::Quotation { body, .. } => {
2346                assert_eq!(body.len(), 2);
2347                assert_eq!(body[0], Statement::IntLiteral(1));
2348                assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "subtract"));
2349            }
2350            _ => panic!("Expected body quotation"),
2351        }
2352
2353        // while call
2354        assert!(matches!(
2355            &program.words[0].body[2],
2356            Statement::WordCall { name, .. } if name == "while"
2357        ));
2358
2359        // drop
2360        assert!(matches!(
2361            &program.words[0].body[3],
2362            Statement::WordCall { name, .. } if name == "drop"
2363        ));
2364    }
2365
2366    #[test]
2367    fn test_parse_simple_closure_type() {
2368        // Test: ( Int -- Closure[Int -- Int] )
2369        let source = ": make-adder ( Int -- Closure[Int -- Int] ) ;";
2370        let mut parser = Parser::new(source);
2371        let program = parser.parse().unwrap();
2372
2373        assert_eq!(program.words.len(), 1);
2374        let word = &program.words[0];
2375        assert!(word.effect.is_some());
2376
2377        let effect = word.effect.as_ref().unwrap();
2378
2379        // Input: Int on RowVar("rest")
2380        let (input_rest, input_top) = effect.inputs.clone().pop().unwrap();
2381        assert_eq!(input_top, Type::Int);
2382        assert_eq!(input_rest, StackType::RowVar("rest".to_string()));
2383
2384        // Output: Closure[Int -- Int] on RowVar("rest")
2385        let (output_rest, output_top) = effect.outputs.clone().pop().unwrap();
2386        match output_top {
2387            Type::Closure { effect, captures } => {
2388                // Closure effect: Int -> Int
2389                assert_eq!(
2390                    effect.inputs,
2391                    StackType::Cons {
2392                        rest: Box::new(StackType::RowVar("rest".to_string())),
2393                        top: Type::Int
2394                    }
2395                );
2396                assert_eq!(
2397                    effect.outputs,
2398                    StackType::Cons {
2399                        rest: Box::new(StackType::RowVar("rest".to_string())),
2400                        top: Type::Int
2401                    }
2402                );
2403                // Captures should be empty (filled in by type checker)
2404                assert_eq!(captures.len(), 0);
2405            }
2406            _ => panic!("Expected Closure type, got {:?}", output_top),
2407        }
2408        assert_eq!(output_rest, StackType::RowVar("rest".to_string()));
2409    }
2410
2411    #[test]
2412    fn test_parse_closure_type_with_row_vars() {
2413        // Test: ( ..a Config -- ..a Closure[Request -- Response] )
2414        let source = ": make-handler ( ..a Config -- ..a Closure[Request -- Response] ) ;";
2415        let mut parser = Parser::new(source);
2416        let program = parser.parse().unwrap();
2417
2418        let effect = program.words[0].effect.as_ref().unwrap();
2419
2420        // Output: Closure on RowVar("a")
2421        let (rest, top) = effect.outputs.clone().pop().unwrap();
2422        match top {
2423            Type::Closure { effect, .. } => {
2424                // Closure effect: Request -> Response
2425                let (_, in_top) = effect.inputs.clone().pop().unwrap();
2426                assert_eq!(in_top, Type::Var("Request".to_string()));
2427                let (_, out_top) = effect.outputs.clone().pop().unwrap();
2428                assert_eq!(out_top, Type::Var("Response".to_string()));
2429            }
2430            _ => panic!("Expected Closure type"),
2431        }
2432        assert_eq!(rest, StackType::RowVar("a".to_string()));
2433    }
2434
2435    #[test]
2436    fn test_parse_closure_type_missing_bracket() {
2437        // Test: ( Int -- Closure ) should fail
2438        let source = ": broken ( Int -- Closure ) ;";
2439        let mut parser = Parser::new(source);
2440        let result = parser.parse();
2441
2442        assert!(result.is_err());
2443        let err_msg = result.unwrap_err();
2444        assert!(
2445            err_msg.contains("[") && err_msg.contains("Closure"),
2446            "Expected error about missing '[' after Closure, got: {}",
2447            err_msg
2448        );
2449    }
2450
2451    #[test]
2452    fn test_parse_closure_type_in_input() {
2453        // Test: ( Closure[Int -- Int] -- )
2454        let source = ": apply-closure ( Closure[Int -- Int] -- ) ;";
2455        let mut parser = Parser::new(source);
2456        let program = parser.parse().unwrap();
2457
2458        let effect = program.words[0].effect.as_ref().unwrap();
2459
2460        // Input: Closure[Int -- Int] on RowVar("rest")
2461        let (_, top) = effect.inputs.clone().pop().unwrap();
2462        match top {
2463            Type::Closure { effect, .. } => {
2464                // Verify closure effect
2465                assert!(matches!(effect.inputs.clone().pop().unwrap().1, Type::Int));
2466                assert!(matches!(effect.outputs.clone().pop().unwrap().1, Type::Int));
2467            }
2468            _ => panic!("Expected Closure type in input"),
2469        }
2470    }
2471
2472    // Tests for token position tracking
2473
2474    #[test]
2475    fn test_token_position_single_line() {
2476        // Test token positions on a single line
2477        let source = ": main ( -- ) ;";
2478        let tokens = tokenize(source);
2479
2480        // : is at line 0, column 0
2481        assert_eq!(tokens[0].text, ":");
2482        assert_eq!(tokens[0].line, 0);
2483        assert_eq!(tokens[0].column, 0);
2484
2485        // main is at line 0, column 2
2486        assert_eq!(tokens[1].text, "main");
2487        assert_eq!(tokens[1].line, 0);
2488        assert_eq!(tokens[1].column, 2);
2489
2490        // ( is at line 0, column 7
2491        assert_eq!(tokens[2].text, "(");
2492        assert_eq!(tokens[2].line, 0);
2493        assert_eq!(tokens[2].column, 7);
2494    }
2495
2496    #[test]
2497    fn test_token_position_multiline() {
2498        // Test token positions across multiple lines
2499        let source = ": main ( -- )\n  42\n;";
2500        let tokens = tokenize(source);
2501
2502        // Find the 42 token (after the newline)
2503        let token_42 = tokens.iter().find(|t| t.text == "42").unwrap();
2504        assert_eq!(token_42.line, 1);
2505        assert_eq!(token_42.column, 2); // After 2 spaces of indentation
2506
2507        // Find the ; token (on line 2)
2508        let token_semi = tokens.iter().find(|t| t.text == ";").unwrap();
2509        assert_eq!(token_semi.line, 2);
2510        assert_eq!(token_semi.column, 0);
2511    }
2512
2513    #[test]
2514    fn test_word_def_source_location_span() {
2515        // Test that word definitions capture correct start and end lines
2516        let source = r#": helper ( -- )
2517  "hello"
2518  write_line
2519;
2520
2521: main ( -- )
2522  helper
2523;"#;
2524
2525        let mut parser = Parser::new(source);
2526        let program = parser.parse().unwrap();
2527
2528        assert_eq!(program.words.len(), 2);
2529
2530        // First word: helper spans lines 0-3
2531        let helper = &program.words[0];
2532        assert_eq!(helper.name, "helper");
2533        let helper_source = helper.source.as_ref().unwrap();
2534        assert_eq!(helper_source.start_line, 0);
2535        assert_eq!(helper_source.end_line, 3);
2536
2537        // Second word: main spans lines 5-7
2538        let main_word = &program.words[1];
2539        assert_eq!(main_word.name, "main");
2540        let main_source = main_word.source.as_ref().unwrap();
2541        assert_eq!(main_source.start_line, 5);
2542        assert_eq!(main_source.end_line, 7);
2543    }
2544
2545    #[test]
2546    fn test_token_position_string_with_newline() {
2547        // Test that newlines inside strings are tracked correctly
2548        let source = "\"line1\\nline2\"";
2549        let tokens = tokenize(source);
2550
2551        // The string token should start at line 0, column 0
2552        assert_eq!(tokens.len(), 1);
2553        assert_eq!(tokens[0].line, 0);
2554        assert_eq!(tokens[0].column, 0);
2555    }
2556
2557    // ============================================================================
2558    //                         ADT PARSING TESTS
2559    // ============================================================================
2560
2561    #[test]
2562    fn test_parse_simple_union() {
2563        let source = r#"
2564union Message {
2565  Get { response-chan: Int }
2566  Set { value: Int }
2567}
2568
2569: main ( -- ) ;
2570"#;
2571
2572        let mut parser = Parser::new(source);
2573        let program = parser.parse().unwrap();
2574
2575        assert_eq!(program.unions.len(), 1);
2576        let union_def = &program.unions[0];
2577        assert_eq!(union_def.name, "Message");
2578        assert_eq!(union_def.variants.len(), 2);
2579
2580        // Check first variant
2581        assert_eq!(union_def.variants[0].name, "Get");
2582        assert_eq!(union_def.variants[0].fields.len(), 1);
2583        assert_eq!(union_def.variants[0].fields[0].name, "response-chan");
2584        assert_eq!(union_def.variants[0].fields[0].type_name, "Int");
2585
2586        // Check second variant
2587        assert_eq!(union_def.variants[1].name, "Set");
2588        assert_eq!(union_def.variants[1].fields.len(), 1);
2589        assert_eq!(union_def.variants[1].fields[0].name, "value");
2590        assert_eq!(union_def.variants[1].fields[0].type_name, "Int");
2591    }
2592
2593    #[test]
2594    fn test_parse_union_with_multiple_fields() {
2595        let source = r#"
2596union Report {
2597  Data { op: Int, delta: Int, total: Int }
2598  Empty
2599}
2600
2601: main ( -- ) ;
2602"#;
2603
2604        let mut parser = Parser::new(source);
2605        let program = parser.parse().unwrap();
2606
2607        assert_eq!(program.unions.len(), 1);
2608        let union_def = &program.unions[0];
2609        assert_eq!(union_def.name, "Report");
2610        assert_eq!(union_def.variants.len(), 2);
2611
2612        // Check Data variant with 3 fields
2613        let data_variant = &union_def.variants[0];
2614        assert_eq!(data_variant.name, "Data");
2615        assert_eq!(data_variant.fields.len(), 3);
2616        assert_eq!(data_variant.fields[0].name, "op");
2617        assert_eq!(data_variant.fields[1].name, "delta");
2618        assert_eq!(data_variant.fields[2].name, "total");
2619
2620        // Check Empty variant with no fields
2621        let empty_variant = &union_def.variants[1];
2622        assert_eq!(empty_variant.name, "Empty");
2623        assert_eq!(empty_variant.fields.len(), 0);
2624    }
2625
2626    #[test]
2627    fn test_parse_union_lowercase_name_error() {
2628        let source = r#"
2629union message {
2630  Get { }
2631}
2632"#;
2633
2634        let mut parser = Parser::new(source);
2635        let result = parser.parse();
2636        assert!(result.is_err());
2637        assert!(result.unwrap_err().contains("uppercase"));
2638    }
2639
2640    #[test]
2641    fn test_parse_union_empty_error() {
2642        let source = r#"
2643union Message {
2644}
2645"#;
2646
2647        let mut parser = Parser::new(source);
2648        let result = parser.parse();
2649        assert!(result.is_err());
2650        assert!(result.unwrap_err().contains("at least one variant"));
2651    }
2652
2653    #[test]
2654    fn test_parse_union_duplicate_variant_error() {
2655        let source = r#"
2656union Message {
2657  Get { x: Int }
2658  Get { y: String }
2659}
2660"#;
2661
2662        let mut parser = Parser::new(source);
2663        let result = parser.parse();
2664        assert!(result.is_err());
2665        let err = result.unwrap_err();
2666        assert!(err.contains("Duplicate variant name"));
2667        assert!(err.contains("Get"));
2668    }
2669
2670    #[test]
2671    fn test_parse_union_duplicate_field_error() {
2672        let source = r#"
2673union Data {
2674  Record { x: Int, x: String }
2675}
2676"#;
2677
2678        let mut parser = Parser::new(source);
2679        let result = parser.parse();
2680        assert!(result.is_err());
2681        let err = result.unwrap_err();
2682        assert!(err.contains("Duplicate field name"));
2683        assert!(err.contains("x"));
2684    }
2685
2686    #[test]
2687    fn test_parse_simple_match() {
2688        let source = r#"
2689: handle ( -- )
2690  match
2691    Get -> send-response
2692    Set -> process-set
2693  end
2694;
2695"#;
2696
2697        let mut parser = Parser::new(source);
2698        let program = parser.parse().unwrap();
2699
2700        assert_eq!(program.words.len(), 1);
2701        assert_eq!(program.words[0].body.len(), 1);
2702
2703        match &program.words[0].body[0] {
2704            Statement::Match { arms } => {
2705                assert_eq!(arms.len(), 2);
2706
2707                // First arm: Get ->
2708                match &arms[0].pattern {
2709                    Pattern::Variant(name) => assert_eq!(name, "Get"),
2710                    _ => panic!("Expected Variant pattern"),
2711                }
2712                assert_eq!(arms[0].body.len(), 1);
2713
2714                // Second arm: Set ->
2715                match &arms[1].pattern {
2716                    Pattern::Variant(name) => assert_eq!(name, "Set"),
2717                    _ => panic!("Expected Variant pattern"),
2718                }
2719                assert_eq!(arms[1].body.len(), 1);
2720            }
2721            _ => panic!("Expected Match statement"),
2722        }
2723    }
2724
2725    #[test]
2726    fn test_parse_match_with_bindings() {
2727        let source = r#"
2728: handle ( -- )
2729  match
2730    Get { >chan } -> chan send-response
2731    Report { >delta >total } -> delta total process
2732  end
2733;
2734"#;
2735
2736        let mut parser = Parser::new(source);
2737        let program = parser.parse().unwrap();
2738
2739        assert_eq!(program.words.len(), 1);
2740
2741        match &program.words[0].body[0] {
2742            Statement::Match { arms } => {
2743                assert_eq!(arms.len(), 2);
2744
2745                // First arm: Get { chan } ->
2746                match &arms[0].pattern {
2747                    Pattern::VariantWithBindings { name, bindings } => {
2748                        assert_eq!(name, "Get");
2749                        assert_eq!(bindings.len(), 1);
2750                        assert_eq!(bindings[0], "chan");
2751                    }
2752                    _ => panic!("Expected VariantWithBindings pattern"),
2753                }
2754
2755                // Second arm: Report { delta total } ->
2756                match &arms[1].pattern {
2757                    Pattern::VariantWithBindings { name, bindings } => {
2758                        assert_eq!(name, "Report");
2759                        assert_eq!(bindings.len(), 2);
2760                        assert_eq!(bindings[0], "delta");
2761                        assert_eq!(bindings[1], "total");
2762                    }
2763                    _ => panic!("Expected VariantWithBindings pattern"),
2764                }
2765            }
2766            _ => panic!("Expected Match statement"),
2767        }
2768    }
2769
2770    #[test]
2771    fn test_parse_match_bindings_require_prefix() {
2772        // Old syntax without > prefix should error
2773        let source = r#"
2774: handle ( -- )
2775  match
2776    Get { chan } -> chan send-response
2777  end
2778;
2779"#;
2780
2781        let mut parser = Parser::new(source);
2782        let result = parser.parse();
2783        assert!(result.is_err());
2784        let err = result.unwrap_err();
2785        assert!(err.contains(">chan"));
2786        assert!(err.contains("stack extraction"));
2787    }
2788
2789    #[test]
2790    fn test_parse_match_with_body_statements() {
2791        let source = r#"
2792: handle ( -- )
2793  match
2794    Get -> 1 2 add send-response
2795    Set -> process-value store
2796  end
2797;
2798"#;
2799
2800        let mut parser = Parser::new(source);
2801        let program = parser.parse().unwrap();
2802
2803        match &program.words[0].body[0] {
2804            Statement::Match { arms } => {
2805                // Get arm has 4 statements: 1, 2, add, send-response
2806                assert_eq!(arms[0].body.len(), 4);
2807                assert_eq!(arms[0].body[0], Statement::IntLiteral(1));
2808                assert_eq!(arms[0].body[1], Statement::IntLiteral(2));
2809                assert!(
2810                    matches!(&arms[0].body[2], Statement::WordCall { name, .. } if name == "add")
2811                );
2812
2813                // Set arm has 2 statements: process-value, store
2814                assert_eq!(arms[1].body.len(), 2);
2815            }
2816            _ => panic!("Expected Match statement"),
2817        }
2818    }
2819
2820    #[test]
2821    fn test_parse_match_empty_error() {
2822        let source = r#"
2823: handle ( -- )
2824  match
2825  end
2826;
2827"#;
2828
2829        let mut parser = Parser::new(source);
2830        let result = parser.parse();
2831        assert!(result.is_err());
2832        assert!(result.unwrap_err().contains("at least one arm"));
2833    }
2834
2835    #[test]
2836    fn test_parse_symbol_literal() {
2837        let source = r#"
2838: main ( -- )
2839    :hello drop
2840;
2841"#;
2842
2843        let mut parser = Parser::new(source);
2844        let program = parser.parse().unwrap();
2845        assert_eq!(program.words.len(), 1);
2846
2847        let main = &program.words[0];
2848        assert_eq!(main.body.len(), 2);
2849
2850        match &main.body[0] {
2851            Statement::Symbol(name) => assert_eq!(name, "hello"),
2852            _ => panic!("Expected Symbol statement, got {:?}", main.body[0]),
2853        }
2854    }
2855
2856    #[test]
2857    fn test_parse_symbol_with_hyphen() {
2858        let source = r#"
2859: main ( -- )
2860    :hello-world drop
2861;
2862"#;
2863
2864        let mut parser = Parser::new(source);
2865        let program = parser.parse().unwrap();
2866
2867        match &program.words[0].body[0] {
2868            Statement::Symbol(name) => assert_eq!(name, "hello-world"),
2869            _ => panic!("Expected Symbol statement"),
2870        }
2871    }
2872
2873    #[test]
2874    fn test_parse_symbol_starting_with_digit_fails() {
2875        let source = r#"
2876: main ( -- )
2877    :123abc drop
2878;
2879"#;
2880
2881        let mut parser = Parser::new(source);
2882        let result = parser.parse();
2883        assert!(result.is_err());
2884        assert!(result.unwrap_err().contains("cannot start with a digit"));
2885    }
2886
2887    #[test]
2888    fn test_parse_symbol_with_invalid_char_fails() {
2889        let source = r#"
2890: main ( -- )
2891    :hello@world drop
2892;
2893"#;
2894
2895        let mut parser = Parser::new(source);
2896        let result = parser.parse();
2897        assert!(result.is_err());
2898        assert!(result.unwrap_err().contains("invalid character"));
2899    }
2900
2901    #[test]
2902    fn test_parse_symbol_special_chars_allowed() {
2903        // Test that ? and ! are allowed in symbol names
2904        let source = r#"
2905: main ( -- )
2906    :empty? drop
2907    :save! drop
2908;
2909"#;
2910
2911        let mut parser = Parser::new(source);
2912        let program = parser.parse().unwrap();
2913
2914        match &program.words[0].body[0] {
2915            Statement::Symbol(name) => assert_eq!(name, "empty?"),
2916            _ => panic!("Expected Symbol statement"),
2917        }
2918        match &program.words[0].body[2] {
2919            Statement::Symbol(name) => assert_eq!(name, "save!"),
2920            _ => panic!("Expected Symbol statement"),
2921        }
2922    }
2923}