Skip to main content

seqc/
parser.rs

1//! Simple parser for Seq syntax
2//!
3//! Syntax:
4//! ```text
5//! : word-name ( stack-effect )
6//!   statement1
7//!   statement2
8//!   ... ;
9//! ```
10
11use crate::ast::{
12    Include, MatchArm, Pattern, Program, SourceLocation, Span, Statement, UnionDef, UnionField,
13    UnionVariant, WordDef,
14};
15use crate::types::{Effect, SideEffect, StackType, Type};
16
17/// A token with source position information
18#[derive(Debug, Clone)]
19pub struct Token {
20    pub text: String,
21    /// Line number (0-indexed for LSP compatibility)
22    pub line: usize,
23    /// Column number (0-indexed)
24    pub column: usize,
25}
26
27impl Token {
28    fn new(text: String, line: usize, column: usize) -> Self {
29        Token { text, line, column }
30    }
31}
32
33impl PartialEq<&str> for Token {
34    fn eq(&self, other: &&str) -> bool {
35        self.text == *other
36    }
37}
38
39impl PartialEq<str> for Token {
40    fn eq(&self, other: &str) -> bool {
41        self.text == other
42    }
43}
44
45pub struct Parser {
46    tokens: Vec<Token>,
47    pos: usize,
48    /// Counter for assigning unique IDs to quotations
49    /// Used by the typechecker to track inferred types
50    next_quotation_id: usize,
51    /// Pending lint annotations collected from `# seq:allow(lint-id)` comments
52    pending_allowed_lints: Vec<String>,
53}
54
55impl Parser {
56    pub fn new(source: &str) -> Self {
57        let tokens = tokenize(source);
58        Parser {
59            tokens,
60            pos: 0,
61            next_quotation_id: 0,
62            pending_allowed_lints: Vec::new(),
63        }
64    }
65
66    pub fn parse(&mut self) -> Result<Program, String> {
67        let mut program = Program::new();
68
69        // Check for unclosed string error from tokenizer
70        if let Some(error_token) = self.tokens.iter().find(|t| *t == "<<<UNCLOSED_STRING>>>") {
71            return Err(format!(
72                "Unclosed string literal at line {}, column {} - missing closing quote",
73                error_token.line + 1, // 1-indexed for user display
74                error_token.column + 1
75            ));
76        }
77
78        while !self.is_at_end() {
79            self.skip_comments();
80            if self.is_at_end() {
81                break;
82            }
83
84            // Check for include statement
85            if self.check("include") {
86                let include = self.parse_include()?;
87                program.includes.push(include);
88                continue;
89            }
90
91            // Check for union definition
92            if self.check("union") {
93                let union_def = self.parse_union_def()?;
94                program.unions.push(union_def);
95                continue;
96            }
97
98            let word = self.parse_word_def()?;
99            program.words.push(word);
100        }
101
102        Ok(program)
103    }
104
105    /// Parse an include statement:
106    ///   include std:http     -> Include::Std("http")
107    ///   include ffi:readline -> Include::Ffi("readline")
108    ///   include "my-utils"   -> Include::Relative("my-utils")
109    fn parse_include(&mut self) -> Result<Include, String> {
110        self.consume("include");
111
112        let token = self
113            .advance()
114            .ok_or("Expected module name after 'include'")?
115            .clone();
116
117        // Check for std: prefix (tokenizer splits this into "std", ":", "name")
118        if token == "std" {
119            // Expect : token
120            if !self.consume(":") {
121                return Err("Expected ':' after 'std' in include statement".to_string());
122            }
123            // Get the module name
124            let name = self
125                .advance()
126                .ok_or("Expected module name after 'std:'")?
127                .clone();
128            return Ok(Include::Std(name));
129        }
130
131        // Check for ffi: prefix
132        if token == "ffi" {
133            // Expect : token
134            if !self.consume(":") {
135                return Err("Expected ':' after 'ffi' in include statement".to_string());
136            }
137            // Get the library name
138            let name = self
139                .advance()
140                .ok_or("Expected library name after 'ffi:'")?
141                .clone();
142            return Ok(Include::Ffi(name));
143        }
144
145        // Check for quoted string (relative path)
146        if token.starts_with('"') && token.ends_with('"') {
147            let path = token.trim_start_matches('"').trim_end_matches('"');
148            return Ok(Include::Relative(path.to_string()));
149        }
150
151        Err(format!(
152            "Invalid include syntax '{}'. Use 'include std:name', 'include ffi:lib', or 'include \"path\"'",
153            token
154        ))
155    }
156
157    /// Parse a union type definition:
158    ///   union Message {
159    ///     Get { response-chan: Int }
160    ///     Increment { response-chan: Int }
161    ///     Report { op: Int, delta: Int, total: Int }
162    ///   }
163    fn parse_union_def(&mut self) -> Result<UnionDef, String> {
164        // Capture start line from 'union' token
165        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
166
167        // Consume 'union' keyword
168        self.consume("union");
169
170        // Get union name (must start with uppercase)
171        let name = self
172            .advance()
173            .ok_or("Expected union name after 'union'")?
174            .clone();
175
176        if !name
177            .chars()
178            .next()
179            .map(|c| c.is_uppercase())
180            .unwrap_or(false)
181        {
182            return Err(format!(
183                "Union name '{}' must start with an uppercase letter",
184                name
185            ));
186        }
187
188        // Skip comments and newlines
189        self.skip_comments();
190
191        // Expect '{'
192        if !self.consume("{") {
193            return Err(format!(
194                "Expected '{{' after union name '{}', got '{}'",
195                name,
196                self.current()
197            ));
198        }
199
200        // Parse variants until '}'
201        let mut variants = Vec::new();
202        loop {
203            self.skip_comments();
204
205            if self.check("}") {
206                break;
207            }
208
209            if self.is_at_end() {
210                return Err(format!("Unexpected end of file in union '{}'", name));
211            }
212
213            variants.push(self.parse_union_variant()?);
214        }
215
216        // Capture end line from '}' token before consuming
217        let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
218
219        // Consume '}'
220        self.consume("}");
221
222        if variants.is_empty() {
223            return Err(format!("Union '{}' must have at least one variant", name));
224        }
225
226        // Check for duplicate variant names
227        let mut seen_variants = std::collections::HashSet::new();
228        for variant in &variants {
229            if !seen_variants.insert(&variant.name) {
230                return Err(format!(
231                    "Duplicate variant name '{}' in union '{}'",
232                    variant.name, name
233                ));
234            }
235        }
236
237        Ok(UnionDef {
238            name,
239            variants,
240            source: Some(SourceLocation::span(
241                std::path::PathBuf::new(),
242                start_line,
243                end_line,
244            )),
245        })
246    }
247
248    /// Parse a single union variant:
249    ///   Get { response-chan: Int }
250    ///   or just: Empty (no fields)
251    fn parse_union_variant(&mut self) -> Result<UnionVariant, String> {
252        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
253
254        // Get variant name (must start with uppercase)
255        let name = self.advance().ok_or("Expected variant name")?.clone();
256
257        if !name
258            .chars()
259            .next()
260            .map(|c| c.is_uppercase())
261            .unwrap_or(false)
262        {
263            return Err(format!(
264                "Variant name '{}' must start with an uppercase letter",
265                name
266            ));
267        }
268
269        self.skip_comments();
270
271        // Check for optional fields
272        let fields = if self.check("{") {
273            self.consume("{");
274            let fields = self.parse_union_fields()?;
275            if !self.consume("}") {
276                return Err(format!("Expected '}}' after variant '{}' fields", name));
277            }
278            fields
279        } else {
280            Vec::new()
281        };
282
283        Ok(UnionVariant {
284            name,
285            fields,
286            source: Some(SourceLocation::new(std::path::PathBuf::new(), start_line)),
287        })
288    }
289
290    /// Parse union fields: name: Type, name: Type, ...
291    fn parse_union_fields(&mut self) -> Result<Vec<UnionField>, String> {
292        let mut fields = Vec::new();
293
294        loop {
295            self.skip_comments();
296
297            if self.check("}") {
298                break;
299            }
300
301            // Get field name
302            let field_name = self.advance().ok_or("Expected field name")?.clone();
303
304            // Expect ':'
305            if !self.consume(":") {
306                return Err(format!(
307                    "Expected ':' after field name '{}', got '{}'",
308                    field_name,
309                    self.current()
310                ));
311            }
312
313            // Get type name
314            let type_name = self
315                .advance()
316                .ok_or("Expected type name after ':'")?
317                .clone();
318
319            fields.push(UnionField {
320                name: field_name,
321                type_name,
322            });
323
324            // Optional comma separator
325            self.skip_comments();
326            self.consume(",");
327        }
328
329        // Check for duplicate field names
330        let mut seen_fields = std::collections::HashSet::new();
331        for field in &fields {
332            if !seen_fields.insert(&field.name) {
333                return Err(format!("Duplicate field name '{}' in variant", field.name));
334            }
335        }
336
337        Ok(fields)
338    }
339
340    fn parse_word_def(&mut self) -> Result<WordDef, String> {
341        // Consume any pending lint annotations collected from comments before this word
342        let allowed_lints = std::mem::take(&mut self.pending_allowed_lints);
343
344        // Capture start line from ':' token
345        let start_line = self.current_token().map(|t| t.line).unwrap_or(0);
346
347        // Expect ':'
348        if !self.consume(":") {
349            return Err(format!(
350                "Expected ':' to start word definition, got '{}'",
351                self.current()
352            ));
353        }
354
355        // Get word name
356        let name = self
357            .advance()
358            .ok_or("Expected word name after ':'")?
359            .clone();
360
361        // Parse stack effect if present: ( ..a Int -- ..a Bool )
362        let effect = if self.check("(") {
363            Some(self.parse_stack_effect()?)
364        } else {
365            None
366        };
367
368        // Parse body until ';'
369        let mut body = Vec::new();
370        while !self.check(";") {
371            if self.is_at_end() {
372                return Err(format!("Unexpected end of file in word '{}'", name));
373            }
374
375            // Skip comments and newlines in body
376            self.skip_comments();
377            if self.check(";") {
378                break;
379            }
380
381            body.push(self.parse_statement()?);
382        }
383
384        // Capture end line from ';' token before consuming
385        let end_line = self.current_token().map(|t| t.line).unwrap_or(start_line);
386
387        // Consume ';'
388        self.consume(";");
389
390        Ok(WordDef {
391            name,
392            effect,
393            body,
394            source: Some(crate::ast::SourceLocation::span(
395                std::path::PathBuf::new(),
396                start_line,
397                end_line,
398            )),
399            allowed_lints,
400        })
401    }
402
403    fn parse_statement(&mut self) -> Result<Statement, String> {
404        use crate::ast::Span;
405        let tok = self.advance_token().ok_or("Unexpected end of file")?;
406        let token = &tok.text;
407        let tok_line = tok.line;
408        let tok_column = tok.column;
409        let tok_len = tok.text.len();
410
411        // Check if it looks like a float literal (contains . or scientific notation)
412        // Must check this BEFORE integer parsing
413        if let Some(f) = is_float_literal(token)
414            .then(|| token.parse::<f64>().ok())
415            .flatten()
416        {
417            return Ok(Statement::FloatLiteral(f));
418        }
419
420        // Try to parse as hex literal (0x or 0X prefix)
421        if let Some(hex) = token
422            .strip_prefix("0x")
423            .or_else(|| token.strip_prefix("0X"))
424        {
425            return i64::from_str_radix(hex, 16)
426                .map(Statement::IntLiteral)
427                .map_err(|_| format!("Invalid hex literal: {}", token));
428        }
429
430        // Try to parse as binary literal (0b or 0B prefix)
431        if let Some(bin) = token
432            .strip_prefix("0b")
433            .or_else(|| token.strip_prefix("0B"))
434        {
435            return i64::from_str_radix(bin, 2)
436                .map(Statement::IntLiteral)
437                .map_err(|_| format!("Invalid binary literal: {}", token));
438        }
439
440        // Try to parse as decimal integer literal
441        if let Ok(n) = token.parse::<i64>() {
442            return Ok(Statement::IntLiteral(n));
443        }
444
445        // Try to parse as boolean literal
446        if token == "true" {
447            return Ok(Statement::BoolLiteral(true));
448        }
449        if token == "false" {
450            return Ok(Statement::BoolLiteral(false));
451        }
452
453        // Try to parse as symbol literal (:foo, :some-name)
454        if token == ":" {
455            // Get the next token as the symbol name
456            let name_tok = self
457                .advance_token()
458                .ok_or("Expected symbol name after ':', got end of input")?;
459            let name = &name_tok.text;
460            // Validate symbol name (identifier-like, kebab-case allowed)
461            if name.is_empty() {
462                return Err("Symbol name cannot be empty".to_string());
463            }
464            if name.starts_with(|c: char| c.is_ascii_digit()) {
465                return Err(format!(
466                    "Symbol name cannot start with a digit: ':{}'\n  Hint: Symbol names must start with a letter",
467                    name
468                ));
469            }
470            if let Some(bad_char) = name.chars().find(|c| {
471                !c.is_alphanumeric()
472                    && *c != '-'
473                    && *c != '_'
474                    && *c != '.'
475                    && *c != '?'
476                    && *c != '!'
477            }) {
478                return Err(format!(
479                    "Symbol name contains invalid character '{}': ':{}'\n  Hint: Allowed: letters, digits, - _ . ? !",
480                    bad_char, name
481                ));
482            }
483            return Ok(Statement::Symbol(name.clone()));
484        }
485
486        // Try to parse as string literal
487        if token.starts_with('"') {
488            // Validate token has at least opening and closing quotes
489            if token.len() < 2 || !token.ends_with('"') {
490                return Err(format!("Malformed string literal: {}", token));
491            }
492            // Strip exactly one quote from each end (not all quotes, which would
493            // incorrectly handle escaped quotes at string boundaries like "hello\"")
494            let raw = &token[1..token.len() - 1];
495            let unescaped = unescape_string(raw)?;
496            return Ok(Statement::StringLiteral(unescaped));
497        }
498
499        // Check for conditional
500        if token == "if" {
501            return self.parse_if(tok_line, tok_column);
502        }
503
504        // Check for quotation
505        if token == "[" {
506            return self.parse_quotation(tok_line, tok_column);
507        }
508
509        // Check for match expression
510        if token == "match" {
511            return self.parse_match(tok_line, tok_column);
512        }
513
514        // Otherwise it's a word call - preserve source span for precise diagnostics
515        Ok(Statement::WordCall {
516            name: token.to_string(),
517            span: Some(Span::new(tok_line, tok_column, tok_len)),
518        })
519    }
520
521    fn parse_if(&mut self, start_line: usize, start_column: usize) -> Result<Statement, String> {
522        let mut then_branch = Vec::new();
523
524        // Parse then branch until 'else' or 'then'
525        loop {
526            if self.is_at_end() {
527                return Err("Unexpected end of file in 'if' statement".to_string());
528            }
529
530            // Skip comments and newlines
531            self.skip_comments();
532
533            if self.check("else") {
534                self.advance();
535                // Parse else branch
536                break;
537            }
538
539            if self.check("then") {
540                self.advance();
541                // End of if without else
542                return Ok(Statement::If {
543                    then_branch,
544                    else_branch: None,
545                    span: Some(Span::new(start_line, start_column, "if".len())),
546                });
547            }
548
549            then_branch.push(self.parse_statement()?);
550        }
551
552        // Parse else branch until 'then'
553        let mut else_branch = Vec::new();
554        loop {
555            if self.is_at_end() {
556                return Err("Unexpected end of file in 'else' branch".to_string());
557            }
558
559            // Skip comments and newlines
560            self.skip_comments();
561
562            if self.check("then") {
563                self.advance();
564                return Ok(Statement::If {
565                    then_branch,
566                    else_branch: Some(else_branch),
567                    span: Some(Span::new(start_line, start_column, "if".len())),
568                });
569            }
570
571            else_branch.push(self.parse_statement()?);
572        }
573    }
574
575    fn parse_quotation(
576        &mut self,
577        start_line: usize,
578        start_column: usize,
579    ) -> Result<Statement, String> {
580        use crate::ast::QuotationSpan;
581        let mut body = Vec::new();
582
583        // Parse statements until ']'
584        loop {
585            if self.is_at_end() {
586                return Err("Unexpected end of file in quotation".to_string());
587            }
588
589            // Skip comments and newlines
590            self.skip_comments();
591
592            if self.check("]") {
593                let end_tok = self.advance_token().unwrap();
594                let end_line = end_tok.line;
595                let end_column = end_tok.column + 1; // exclusive
596                let id = self.next_quotation_id;
597                self.next_quotation_id += 1;
598                // Span from '[' to ']' inclusive
599                let span = QuotationSpan::new(start_line, start_column, end_line, end_column);
600                return Ok(Statement::Quotation {
601                    id,
602                    body,
603                    span: Some(span),
604                });
605            }
606
607            body.push(self.parse_statement()?);
608        }
609    }
610
611    /// Parse a match expression:
612    ///   match
613    ///     Get -> send-response
614    ///     Increment -> do-increment send-response
615    ///     Report -> aggregate-add
616    ///   end
617    fn parse_match(&mut self, start_line: usize, start_column: usize) -> Result<Statement, String> {
618        let mut arms = Vec::new();
619
620        loop {
621            self.skip_comments();
622
623            // Check for 'end' to terminate match
624            if self.check("end") {
625                self.advance();
626                break;
627            }
628
629            if self.is_at_end() {
630                return Err("Unexpected end of file in match expression".to_string());
631            }
632
633            arms.push(self.parse_match_arm()?);
634        }
635
636        if arms.is_empty() {
637            return Err("Match expression must have at least one arm".to_string());
638        }
639
640        Ok(Statement::Match {
641            arms,
642            span: Some(Span::new(start_line, start_column, "match".len())),
643        })
644    }
645
646    /// Parse a single match arm:
647    ///   Get -> send-response
648    ///   or with bindings:
649    ///   Get { chan } -> chan send-response
650    fn parse_match_arm(&mut self) -> Result<MatchArm, String> {
651        // Get variant name with position info
652        let variant_token = self
653            .advance_token()
654            .ok_or("Expected variant name in match arm")?;
655        let variant_name = variant_token.text.clone();
656        let arm_line = variant_token.line;
657        let arm_column = variant_token.column;
658        let arm_length = variant_name.len();
659
660        self.skip_comments();
661
662        // Check for optional bindings: { field1 field2 }
663        let pattern = if self.check("{") {
664            self.consume("{");
665            let mut bindings = Vec::new();
666
667            loop {
668                self.skip_comments();
669
670                if self.check("}") {
671                    break;
672                }
673
674                if self.is_at_end() {
675                    return Err(format!(
676                        "Unexpected end of file in match arm bindings for '{}'",
677                        variant_name
678                    ));
679                }
680
681                let token = self.advance().ok_or("Expected binding name")?.clone();
682
683                // Require > prefix to make clear these are stack extractions, not variables
684                if let Some(field_name) = token.strip_prefix('>') {
685                    if field_name.is_empty() {
686                        return Err(format!(
687                            "Expected field name after '>' in match bindings for '{}'",
688                            variant_name
689                        ));
690                    }
691                    bindings.push(field_name.to_string());
692                } else {
693                    return Err(format!(
694                        "Match bindings must use '>' prefix to indicate stack extraction. \
695                         Use '>{}' instead of '{}' in pattern for '{}'",
696                        token, token, variant_name
697                    ));
698                }
699            }
700
701            self.consume("}");
702            Pattern::VariantWithBindings {
703                name: variant_name,
704                bindings,
705            }
706        } else {
707            Pattern::Variant(variant_name.clone())
708        };
709
710        self.skip_comments();
711
712        // Expect '->' arrow
713        if !self.consume("->") {
714            return Err(format!(
715                "Expected '->' after pattern '{}', got '{}'",
716                match &pattern {
717                    Pattern::Variant(n) => n.clone(),
718                    Pattern::VariantWithBindings { name, .. } => name.clone(),
719                },
720                self.current()
721            ));
722        }
723
724        // Parse body until next pattern or 'end'
725        let mut body = Vec::new();
726        loop {
727            self.skip_comments();
728
729            // Check for end of arm (next pattern starts with uppercase, or 'end')
730            if self.check("end") {
731                break;
732            }
733
734            // Check if next token looks like a match pattern (not just any uppercase word).
735            // A pattern is: UppercaseName followed by '->' or '{'
736            // This prevents confusing 'Make-Get' (constructor call) with a pattern.
737            if let Some(token) = self.current_token()
738                && let Some(first_char) = token.text.chars().next()
739                && first_char.is_uppercase()
740            {
741                // Peek at next token to see if this is a pattern (followed by -> or {)
742                if let Some(next) = self.peek_at(1)
743                    && (next == "->" || next == "{")
744                {
745                    // This is the next pattern
746                    break;
747                }
748                // Otherwise it's just an uppercase word call (like Make-Get), continue parsing body
749            }
750
751            if self.is_at_end() {
752                return Err("Unexpected end of file in match arm body".to_string());
753            }
754
755            body.push(self.parse_statement()?);
756        }
757
758        Ok(MatchArm {
759            pattern,
760            body,
761            span: Some(Span::new(arm_line, arm_column, arm_length)),
762        })
763    }
764
765    /// Parse a stack effect declaration: ( ..a Int -- ..a Bool )
766    /// With optional computational effects: ( ..a Int -- ..a Bool | Yield Int )
767    fn parse_stack_effect(&mut self) -> Result<Effect, String> {
768        // Consume '('
769        if !self.consume("(") {
770            return Err("Expected '(' to start stack effect".to_string());
771        }
772
773        // Parse input stack types (until '--' or ')')
774        let (input_row_var, input_types) =
775            self.parse_type_list_until(&["--", ")"], "stack effect inputs", 0)?;
776
777        // Consume '--'
778        if !self.consume("--") {
779            return Err("Expected '--' separator in stack effect".to_string());
780        }
781
782        // Parse output stack types (until ')' or '|')
783        let (output_row_var, output_types) =
784            self.parse_type_list_until(&[")", "|"], "stack effect outputs", 0)?;
785
786        // Parse optional computational effects after '|'
787        let effects = if self.consume("|") {
788            self.parse_effect_annotations()?
789        } else {
790            Vec::new()
791        };
792
793        // Consume ')'
794        if !self.consume(")") {
795            return Err("Expected ')' to end stack effect".to_string());
796        }
797
798        // Build input and output StackTypes
799        let inputs = self.build_stack_type(input_row_var, input_types);
800        let outputs = self.build_stack_type(output_row_var, output_types);
801
802        Ok(Effect::with_effects(inputs, outputs, effects))
803    }
804
805    /// Parse computational effect annotations after '|'
806    /// Example: | Yield Int
807    fn parse_effect_annotations(&mut self) -> Result<Vec<SideEffect>, String> {
808        let mut effects = Vec::new();
809
810        // Parse effects until we hit ')'
811        while let Some(token) = self.peek_at(0) {
812            if token == ")" {
813                break;
814            }
815
816            match token {
817                "Yield" => {
818                    self.advance(); // consume "Yield"
819                    // Parse the yield type
820                    if let Some(type_token) = self.current_token() {
821                        if type_token.text == ")" {
822                            return Err("Expected type after 'Yield'".to_string());
823                        }
824                        let type_token = type_token.clone();
825                        self.advance();
826                        let yield_type = self.parse_type(&type_token)?;
827                        effects.push(SideEffect::Yield(Box::new(yield_type)));
828                    } else {
829                        return Err("Expected type after 'Yield'".to_string());
830                    }
831                }
832                _ => {
833                    return Err(format!("Unknown effect '{}'. Expected 'Yield'", token));
834                }
835            }
836        }
837
838        if effects.is_empty() {
839            return Err("Expected at least one effect after '|'".to_string());
840        }
841
842        Ok(effects)
843    }
844
845    /// Parse a single type token into a Type
846    fn parse_type(&self, token: &Token) -> Result<Type, String> {
847        match token.text.as_str() {
848            "Int" => Ok(Type::Int),
849            "Float" => Ok(Type::Float),
850            "Bool" => Ok(Type::Bool),
851            "String" => Ok(Type::String),
852            // Reject 'Quotation' - it looks like a type but would be silently treated as a type variable.
853            // Users must use explicit effect syntax like [Int -- Int] instead.
854            "Quotation" => Err(format!(
855                "'Quotation' is not a valid type at line {}, column {}. Use explicit quotation syntax like [Int -- Int] or [ -- ] instead.",
856                token.line + 1,
857                token.column + 1
858            )),
859            _ => {
860                // Check if it's a type variable (starts with uppercase)
861                if let Some(first_char) = token.text.chars().next() {
862                    if first_char.is_uppercase() {
863                        Ok(Type::Var(token.text.to_string()))
864                    } else {
865                        Err(format!(
866                            "Unknown type: '{}' at line {}, column {}. Expected Int, Bool, String, Closure, or a type variable (uppercase)",
867                            token.text.escape_default(),
868                            token.line + 1, // 1-indexed for user display
869                            token.column + 1
870                        ))
871                    }
872                } else {
873                    Err(format!(
874                        "Invalid type: '{}' at line {}, column {}",
875                        token.text.escape_default(),
876                        token.line + 1,
877                        token.column + 1
878                    ))
879                }
880            }
881        }
882    }
883
884    /// Validate row variable name
885    /// Row variables must start with a lowercase letter and contain only alphanumeric characters
886    fn validate_row_var_name(&self, name: &str) -> Result<(), String> {
887        if name.is_empty() {
888            return Err("Row variable must have a name after '..'".to_string());
889        }
890
891        // Must start with lowercase letter
892        let first_char = name.chars().next().unwrap();
893        if !first_char.is_ascii_lowercase() {
894            return Err(format!(
895                "Row variable '..{}' must start with a lowercase letter (a-z)",
896                name
897            ));
898        }
899
900        // Rest must be alphanumeric or underscore
901        for ch in name.chars() {
902            if !ch.is_alphanumeric() && ch != '_' {
903                return Err(format!(
904                    "Row variable '..{}' can only contain letters, numbers, and underscores",
905                    name
906                ));
907            }
908        }
909
910        // Check for reserved keywords (type names that might confuse users)
911        match name {
912            "Int" | "Bool" | "String" => {
913                return Err(format!(
914                    "Row variable '..{}' cannot use type name as identifier",
915                    name
916                ));
917            }
918            _ => {}
919        }
920
921        Ok(())
922    }
923
924    /// Parse a list of types until one of the given terminators is reached
925    /// Returns (optional row variable, list of types)
926    /// Used by both parse_stack_effect and parse_quotation_type
927    ///
928    /// depth: Current nesting depth for quotation types (0 at top level)
929    fn parse_type_list_until(
930        &mut self,
931        terminators: &[&str],
932        context: &str,
933        depth: usize,
934    ) -> Result<(Option<String>, Vec<Type>), String> {
935        const MAX_QUOTATION_DEPTH: usize = 32;
936
937        if depth > MAX_QUOTATION_DEPTH {
938            return Err(format!(
939                "Quotation type nesting exceeds maximum depth of {} (possible deeply nested types or DOS attack)",
940                MAX_QUOTATION_DEPTH
941            ));
942        }
943
944        let mut types = Vec::new();
945        let mut row_var = None;
946
947        while !terminators.iter().any(|t| self.check(t)) {
948            // Skip comments and blank lines within type lists
949            self.skip_comments();
950
951            // Re-check terminators after skipping comments
952            if terminators.iter().any(|t| self.check(t)) {
953                break;
954            }
955
956            if self.is_at_end() {
957                return Err(format!(
958                    "Unexpected end while parsing {} - expected one of: {}",
959                    context,
960                    terminators.join(", ")
961                ));
962            }
963
964            let token = self
965                .advance_token()
966                .ok_or_else(|| format!("Unexpected end in {}", context))?
967                .clone();
968
969            // Check for row variable: ..name
970            if token.text.starts_with("..") {
971                let var_name = token.text.trim_start_matches("..").to_string();
972                self.validate_row_var_name(&var_name)?;
973                row_var = Some(var_name);
974            } else if token.text == "Closure" {
975                // Closure type: Closure[effect]
976                if !self.consume("[") {
977                    return Err("Expected '[' after 'Closure' in type signature".to_string());
978                }
979                let effect_type = self.parse_quotation_type(depth)?;
980                match effect_type {
981                    Type::Quotation(effect) => {
982                        types.push(Type::Closure {
983                            effect,
984                            captures: Vec::new(), // Filled in by type checker
985                        });
986                    }
987                    _ => unreachable!("parse_quotation_type should return Quotation"),
988                }
989            } else if token.text == "[" {
990                // Nested quotation type
991                types.push(self.parse_quotation_type(depth)?);
992            } else {
993                // Parse as concrete type
994                types.push(self.parse_type(&token)?);
995            }
996        }
997
998        Ok((row_var, types))
999    }
1000
1001    /// Parse a quotation type: [inputs -- outputs]
1002    /// Note: The opening '[' has already been consumed
1003    ///
1004    /// depth: Current nesting depth (incremented for each nested quotation)
1005    fn parse_quotation_type(&mut self, depth: usize) -> Result<Type, String> {
1006        // Parse input stack types (until '--' or ']')
1007        let (input_row_var, input_types) =
1008            self.parse_type_list_until(&["--", "]"], "quotation type inputs", depth + 1)?;
1009
1010        // Require '--' separator for clarity
1011        if !self.consume("--") {
1012            // Check if user closed with ] without separator
1013            if self.check("]") {
1014                return Err(
1015                    "Quotation types require '--' separator. Did you mean '[Int -- ]' or '[ -- Int]'?"
1016                        .to_string(),
1017                );
1018            }
1019            return Err("Expected '--' separator in quotation type".to_string());
1020        }
1021
1022        // Parse output stack types (until ']')
1023        let (output_row_var, output_types) =
1024            self.parse_type_list_until(&["]"], "quotation type outputs", depth + 1)?;
1025
1026        // Consume ']'
1027        if !self.consume("]") {
1028            return Err("Expected ']' to end quotation type".to_string());
1029        }
1030
1031        // Build input and output StackTypes
1032        let inputs = self.build_stack_type(input_row_var, input_types);
1033        let outputs = self.build_stack_type(output_row_var, output_types);
1034
1035        Ok(Type::Quotation(Box::new(Effect::new(inputs, outputs))))
1036    }
1037
1038    /// Build a StackType from an optional row variable and a list of types
1039    /// Example: row_var="a", types=[Int, Bool] => RowVar("a") with Int on top of Bool
1040    ///
1041    /// IMPORTANT: ALL stack effects are implicitly row-polymorphic in concatenative languages.
1042    /// This means:
1043    ///   ( -- )        becomes  ( ..rest -- ..rest )       - no-op, preserves stack
1044    ///   ( -- Int )    becomes  ( ..rest -- ..rest Int )   - pushes Int
1045    ///   ( Int -- )    becomes  ( ..rest Int -- ..rest )   - consumes Int
1046    ///   ( Int -- Int) becomes  ( ..rest Int -- ..rest Int ) - transforms top
1047    fn build_stack_type(&self, row_var: Option<String>, types: Vec<Type>) -> StackType {
1048        // Always use row polymorphism - this is fundamental to concatenative semantics
1049        let base = match row_var {
1050            Some(name) => StackType::RowVar(name),
1051            None => StackType::RowVar("rest".to_string()),
1052        };
1053
1054        // Push types onto the stack (bottom to top order)
1055        types.into_iter().fold(base, |stack, ty| stack.push(ty))
1056    }
1057
1058    fn skip_comments(&mut self) {
1059        loop {
1060            // Check for comment: either standalone "#" or token starting with "#"
1061            // The latter handles shebangs like "#!/usr/bin/env seqc"
1062            let is_comment = if self.is_at_end() {
1063                false
1064            } else {
1065                let tok = self.current();
1066                tok == "#" || tok.starts_with("#!")
1067            };
1068
1069            if is_comment {
1070                self.advance(); // consume # or shebang token
1071
1072                // Collect all tokens until newline to reconstruct the comment text
1073                let mut comment_parts: Vec<String> = Vec::new();
1074                while !self.is_at_end() && self.current() != "\n" {
1075                    comment_parts.push(self.current().to_string());
1076                    self.advance();
1077                }
1078                if !self.is_at_end() {
1079                    self.advance(); // skip newline
1080                }
1081
1082                // Join parts and check for seq:allow annotation
1083                // Format: # seq:allow(lint-id) -> parts = ["seq", ":", "allow", "(", "lint-id", ")"]
1084                let comment = comment_parts.join("");
1085                if let Some(lint_id) = comment
1086                    .strip_prefix("seq:allow(")
1087                    .and_then(|s| s.strip_suffix(")"))
1088                {
1089                    self.pending_allowed_lints.push(lint_id.to_string());
1090                }
1091            } else if self.check("\n") {
1092                // Skip blank lines
1093                self.advance();
1094            } else {
1095                break;
1096            }
1097        }
1098    }
1099
1100    fn check(&self, expected: &str) -> bool {
1101        if self.is_at_end() {
1102            return false;
1103        }
1104        self.current() == expected
1105    }
1106
1107    fn consume(&mut self, expected: &str) -> bool {
1108        if self.check(expected) {
1109            self.advance();
1110            true
1111        } else {
1112            false
1113        }
1114    }
1115
1116    /// Get the text of the current token
1117    fn current(&self) -> &str {
1118        if self.is_at_end() {
1119            ""
1120        } else {
1121            &self.tokens[self.pos].text
1122        }
1123    }
1124
1125    /// Get the full current token with position info
1126    fn current_token(&self) -> Option<&Token> {
1127        if self.is_at_end() {
1128            None
1129        } else {
1130            Some(&self.tokens[self.pos])
1131        }
1132    }
1133
1134    /// Peek at a token N positions ahead without consuming
1135    fn peek_at(&self, n: usize) -> Option<&str> {
1136        let idx = self.pos + n;
1137        if idx < self.tokens.len() {
1138            Some(&self.tokens[idx].text)
1139        } else {
1140            None
1141        }
1142    }
1143
1144    /// Advance and return the token text (for compatibility with existing code)
1145    fn advance(&mut self) -> Option<&String> {
1146        if self.is_at_end() {
1147            None
1148        } else {
1149            let token = &self.tokens[self.pos];
1150            self.pos += 1;
1151            Some(&token.text)
1152        }
1153    }
1154
1155    /// Advance and return the full token with position info
1156    fn advance_token(&mut self) -> Option<&Token> {
1157        if self.is_at_end() {
1158            None
1159        } else {
1160            let token = &self.tokens[self.pos];
1161            self.pos += 1;
1162            Some(token)
1163        }
1164    }
1165
1166    fn is_at_end(&self) -> bool {
1167        self.pos >= self.tokens.len()
1168    }
1169}
1170
1171/// Check if a token looks like a float literal
1172///
1173/// Float literals contain either:
1174/// - A decimal point: `3.14`, `.5`, `5.`
1175/// - Scientific notation: `1e10`, `1E-5`, `1.5e3`
1176///
1177/// This check must happen BEFORE integer parsing to avoid
1178/// parsing "5" in "5.0" as an integer.
1179fn is_float_literal(token: &str) -> bool {
1180    // Skip leading minus sign for negative numbers
1181    let s = token.strip_prefix('-').unwrap_or(token);
1182
1183    // Must have at least one digit
1184    if s.is_empty() {
1185        return false;
1186    }
1187
1188    // Check for decimal point or scientific notation
1189    s.contains('.') || s.contains('e') || s.contains('E')
1190}
1191
1192/// Process escape sequences in a string literal
1193///
1194/// Supported escape sequences:
1195/// - `\"` -> `"`  (quote)
1196/// - `\\` -> `\`  (backslash)
1197/// - `\n` -> newline
1198/// - `\r` -> carriage return
1199/// - `\t` -> tab
1200/// - `\xNN` -> Unicode code point U+00NN (hex value 00-FF)
1201///
1202/// # Note on `\xNN` encoding
1203///
1204/// The `\xNN` escape creates a Unicode code point U+00NN, not a raw byte.
1205/// For values 0x00-0x7F (ASCII), this maps directly to the byte value.
1206/// For values 0x80-0xFF (Latin-1 Supplement), the character is stored as
1207/// a multi-byte UTF-8 sequence. For example:
1208/// - `\x41` -> 'A' (1 byte in UTF-8)
1209/// - `\x1b` -> ESC (1 byte in UTF-8, used for ANSI terminal codes)
1210/// - `\xFF` -> 'ÿ' (U+00FF, 2 bytes in UTF-8: 0xC3 0xBF)
1211///
1212/// This matches Python 3 and Rust string behavior. For terminal ANSI codes,
1213/// which are the primary use case, all values are in the ASCII range.
1214///
1215/// # Errors
1216/// Returns error if an unknown escape sequence is encountered
1217fn unescape_string(s: &str) -> Result<String, String> {
1218    let mut result = String::new();
1219    let mut chars = s.chars();
1220
1221    while let Some(ch) = chars.next() {
1222        if ch == '\\' {
1223            match chars.next() {
1224                Some('"') => result.push('"'),
1225                Some('\\') => result.push('\\'),
1226                Some('n') => result.push('\n'),
1227                Some('r') => result.push('\r'),
1228                Some('t') => result.push('\t'),
1229                Some('x') => {
1230                    // Hex escape: \xNN
1231                    let hex1 = chars.next().ok_or_else(|| {
1232                        "Incomplete hex escape sequence '\\x' - expected 2 hex digits".to_string()
1233                    })?;
1234                    let hex2 = chars.next().ok_or_else(|| {
1235                        format!(
1236                            "Incomplete hex escape sequence '\\x{}' - expected 2 hex digits",
1237                            hex1
1238                        )
1239                    })?;
1240
1241                    let hex_str: String = [hex1, hex2].iter().collect();
1242                    let byte_val = u8::from_str_radix(&hex_str, 16).map_err(|_| {
1243                        format!(
1244                            "Invalid hex escape sequence '\\x{}' - expected 2 hex digits (00-FF)",
1245                            hex_str
1246                        )
1247                    })?;
1248
1249                    result.push(byte_val as char);
1250                }
1251                Some(c) => {
1252                    return Err(format!(
1253                        "Unknown escape sequence '\\{}' in string literal. \
1254                         Supported: \\\" \\\\ \\n \\r \\t \\xNN",
1255                        c
1256                    ));
1257                }
1258                None => {
1259                    return Err("String ends with incomplete escape sequence '\\'".to_string());
1260                }
1261            }
1262        } else {
1263            result.push(ch);
1264        }
1265    }
1266
1267    Ok(result)
1268}
1269
1270fn tokenize(source: &str) -> Vec<Token> {
1271    let mut tokens = Vec::new();
1272    let mut current = String::new();
1273    let mut current_start_line = 0;
1274    let mut current_start_col = 0;
1275    let mut in_string = false;
1276    let mut prev_was_backslash = false;
1277
1278    // Track current position (0-indexed)
1279    let mut line = 0;
1280    let mut col = 0;
1281
1282    for ch in source.chars() {
1283        if in_string {
1284            current.push(ch);
1285            if ch == '"' && !prev_was_backslash {
1286                // Unescaped quote ends the string
1287                in_string = false;
1288                tokens.push(Token::new(
1289                    current.clone(),
1290                    current_start_line,
1291                    current_start_col,
1292                ));
1293                current.clear();
1294                prev_was_backslash = false;
1295            } else if ch == '\\' && !prev_was_backslash {
1296                // Start of escape sequence
1297                prev_was_backslash = true;
1298            } else {
1299                // Regular character or escaped character
1300                prev_was_backslash = false;
1301            }
1302            // Track newlines inside strings
1303            if ch == '\n' {
1304                line += 1;
1305                col = 0;
1306            } else {
1307                col += 1;
1308            }
1309        } else if ch == '"' {
1310            if !current.is_empty() {
1311                tokens.push(Token::new(
1312                    current.clone(),
1313                    current_start_line,
1314                    current_start_col,
1315                ));
1316                current.clear();
1317            }
1318            in_string = true;
1319            current_start_line = line;
1320            current_start_col = col;
1321            current.push(ch);
1322            prev_was_backslash = false;
1323            col += 1;
1324        } else if ch.is_whitespace() {
1325            if !current.is_empty() {
1326                tokens.push(Token::new(
1327                    current.clone(),
1328                    current_start_line,
1329                    current_start_col,
1330                ));
1331                current.clear();
1332            }
1333            // Preserve newlines for comment handling
1334            if ch == '\n' {
1335                tokens.push(Token::new("\n".to_string(), line, col));
1336                line += 1;
1337                col = 0;
1338            } else {
1339                col += 1;
1340            }
1341        } else if "():;[]{},".contains(ch) {
1342            if !current.is_empty() {
1343                tokens.push(Token::new(
1344                    current.clone(),
1345                    current_start_line,
1346                    current_start_col,
1347                ));
1348                current.clear();
1349            }
1350            tokens.push(Token::new(ch.to_string(), line, col));
1351            col += 1;
1352        } else {
1353            if current.is_empty() {
1354                current_start_line = line;
1355                current_start_col = col;
1356            }
1357            current.push(ch);
1358            col += 1;
1359        }
1360    }
1361
1362    // Check for unclosed string literal
1363    if in_string {
1364        // Return error by adding a special error token
1365        // The parser will handle this as a parse error
1366        tokens.push(Token::new(
1367            "<<<UNCLOSED_STRING>>>".to_string(),
1368            current_start_line,
1369            current_start_col,
1370        ));
1371    } else if !current.is_empty() {
1372        tokens.push(Token::new(current, current_start_line, current_start_col));
1373    }
1374
1375    tokens
1376}
1377
1378#[cfg(test)]
1379mod tests {
1380    use super::*;
1381
1382    #[test]
1383    fn test_parse_hello_world() {
1384        let source = r#"
1385: main ( -- )
1386  "Hello, World!" write_line ;
1387"#;
1388
1389        let mut parser = Parser::new(source);
1390        let program = parser.parse().unwrap();
1391
1392        assert_eq!(program.words.len(), 1);
1393        assert_eq!(program.words[0].name, "main");
1394        assert_eq!(program.words[0].body.len(), 2);
1395
1396        match &program.words[0].body[0] {
1397            Statement::StringLiteral(s) => assert_eq!(s, "Hello, World!"),
1398            _ => panic!("Expected StringLiteral"),
1399        }
1400
1401        match &program.words[0].body[1] {
1402            Statement::WordCall { name, .. } => assert_eq!(name, "write_line"),
1403            _ => panic!("Expected WordCall"),
1404        }
1405    }
1406
1407    #[test]
1408    fn test_parse_with_numbers() {
1409        let source = ": add-example ( -- ) 2 3 add ;";
1410
1411        let mut parser = Parser::new(source);
1412        let program = parser.parse().unwrap();
1413
1414        assert_eq!(program.words[0].body.len(), 3);
1415        assert_eq!(program.words[0].body[0], Statement::IntLiteral(2));
1416        assert_eq!(program.words[0].body[1], Statement::IntLiteral(3));
1417        assert!(matches!(
1418            &program.words[0].body[2],
1419            Statement::WordCall { name, .. } if name == "add"
1420        ));
1421    }
1422
1423    #[test]
1424    fn test_parse_hex_literals() {
1425        let source = ": test ( -- ) 0xFF 0x10 0X1A ;";
1426        let mut parser = Parser::new(source);
1427        let program = parser.parse().unwrap();
1428
1429        assert_eq!(program.words[0].body[0], Statement::IntLiteral(255));
1430        assert_eq!(program.words[0].body[1], Statement::IntLiteral(16));
1431        assert_eq!(program.words[0].body[2], Statement::IntLiteral(26));
1432    }
1433
1434    #[test]
1435    fn test_parse_binary_literals() {
1436        let source = ": test ( -- ) 0b1010 0B1111 0b0 ;";
1437        let mut parser = Parser::new(source);
1438        let program = parser.parse().unwrap();
1439
1440        assert_eq!(program.words[0].body[0], Statement::IntLiteral(10));
1441        assert_eq!(program.words[0].body[1], Statement::IntLiteral(15));
1442        assert_eq!(program.words[0].body[2], Statement::IntLiteral(0));
1443    }
1444
1445    #[test]
1446    fn test_parse_invalid_hex_literal() {
1447        let source = ": test ( -- ) 0xGG ;";
1448        let mut parser = Parser::new(source);
1449        let err = parser.parse().unwrap_err();
1450        assert!(err.contains("Invalid hex literal"));
1451    }
1452
1453    #[test]
1454    fn test_parse_invalid_binary_literal() {
1455        let source = ": test ( -- ) 0b123 ;";
1456        let mut parser = Parser::new(source);
1457        let err = parser.parse().unwrap_err();
1458        assert!(err.contains("Invalid binary literal"));
1459    }
1460
1461    #[test]
1462    fn test_parse_escaped_quotes() {
1463        let source = r#": main ( -- ) "Say \"hello\" there" write_line ;"#;
1464
1465        let mut parser = Parser::new(source);
1466        let program = parser.parse().unwrap();
1467
1468        assert_eq!(program.words.len(), 1);
1469        assert_eq!(program.words[0].body.len(), 2);
1470
1471        match &program.words[0].body[0] {
1472            // Escape sequences should be processed: \" becomes actual quote
1473            Statement::StringLiteral(s) => assert_eq!(s, "Say \"hello\" there"),
1474            _ => panic!("Expected StringLiteral with escaped quotes"),
1475        }
1476    }
1477
1478    /// Regression test for issue #117: escaped quote at end of string
1479    /// Previously failed with "String ends with incomplete escape sequence"
1480    #[test]
1481    fn test_escaped_quote_at_end_of_string() {
1482        let source = r#": main ( -- ) "hello\"" io.write-line ;"#;
1483
1484        let mut parser = Parser::new(source);
1485        let program = parser.parse().unwrap();
1486
1487        assert_eq!(program.words.len(), 1);
1488        match &program.words[0].body[0] {
1489            Statement::StringLiteral(s) => assert_eq!(s, "hello\""),
1490            _ => panic!("Expected StringLiteral ending with escaped quote"),
1491        }
1492    }
1493
1494    /// Test escaped quote at start of string (boundary case)
1495    #[test]
1496    fn test_escaped_quote_at_start_of_string() {
1497        let source = r#": main ( -- ) "\"hello" io.write-line ;"#;
1498
1499        let mut parser = Parser::new(source);
1500        let program = parser.parse().unwrap();
1501
1502        match &program.words[0].body[0] {
1503            Statement::StringLiteral(s) => assert_eq!(s, "\"hello"),
1504            _ => panic!("Expected StringLiteral starting with escaped quote"),
1505        }
1506    }
1507
1508    #[test]
1509    fn test_escape_sequences() {
1510        let source = r#": main ( -- ) "Line 1\nLine 2\tTabbed" write_line ;"#;
1511
1512        let mut parser = Parser::new(source);
1513        let program = parser.parse().unwrap();
1514
1515        match &program.words[0].body[0] {
1516            Statement::StringLiteral(s) => assert_eq!(s, "Line 1\nLine 2\tTabbed"),
1517            _ => panic!("Expected StringLiteral"),
1518        }
1519    }
1520
1521    #[test]
1522    fn test_unknown_escape_sequence() {
1523        let source = r#": main ( -- ) "Bad \q sequence" write_line ;"#;
1524
1525        let mut parser = Parser::new(source);
1526        let result = parser.parse();
1527
1528        assert!(result.is_err());
1529        assert!(result.unwrap_err().contains("Unknown escape sequence"));
1530    }
1531
1532    #[test]
1533    fn test_hex_escape_sequence() {
1534        // \x1b is ESC (27), \x41 is 'A' (65)
1535        let source = r#": main ( -- ) "\x1b[2K\x41" io.write-line ;"#;
1536
1537        let mut parser = Parser::new(source);
1538        let program = parser.parse().unwrap();
1539
1540        match &program.words[0].body[0] {
1541            Statement::StringLiteral(s) => {
1542                assert_eq!(s.len(), 5); // ESC [ 2 K A
1543                assert_eq!(s.as_bytes()[0], 0x1b); // ESC
1544                assert_eq!(s.as_bytes()[4], 0x41); // 'A'
1545            }
1546            _ => panic!("Expected StringLiteral"),
1547        }
1548    }
1549
1550    #[test]
1551    fn test_hex_escape_null_byte() {
1552        let source = r#": main ( -- ) "before\x00after" io.write-line ;"#;
1553
1554        let mut parser = Parser::new(source);
1555        let program = parser.parse().unwrap();
1556
1557        match &program.words[0].body[0] {
1558            Statement::StringLiteral(s) => {
1559                assert_eq!(s.len(), 12); // "before" + NUL + "after"
1560                assert_eq!(s.as_bytes()[6], 0x00);
1561            }
1562            _ => panic!("Expected StringLiteral"),
1563        }
1564    }
1565
1566    #[test]
1567    fn test_hex_escape_uppercase() {
1568        // Both uppercase and lowercase hex digits should work
1569        // Note: Values > 0x7F become Unicode code points (U+00NN), multi-byte in UTF-8
1570        let source = r#": main ( -- ) "\x41\x42\x4F" io.write-line ;"#;
1571
1572        let mut parser = Parser::new(source);
1573        let program = parser.parse().unwrap();
1574
1575        match &program.words[0].body[0] {
1576            Statement::StringLiteral(s) => {
1577                assert_eq!(s, "ABO"); // 0x41='A', 0x42='B', 0x4F='O'
1578            }
1579            _ => panic!("Expected StringLiteral"),
1580        }
1581    }
1582
1583    #[test]
1584    fn test_hex_escape_high_bytes() {
1585        // Values > 0x7F become Unicode code points (Latin-1), which are multi-byte in UTF-8
1586        let source = r#": main ( -- ) "\xFF" io.write-line ;"#;
1587
1588        let mut parser = Parser::new(source);
1589        let program = parser.parse().unwrap();
1590
1591        match &program.words[0].body[0] {
1592            Statement::StringLiteral(s) => {
1593                // \xFF becomes U+00FF (ÿ), which is 2 bytes in UTF-8: C3 BF
1594                assert_eq!(s, "\u{00FF}");
1595                assert_eq!(s.chars().next().unwrap(), 'ÿ');
1596            }
1597            _ => panic!("Expected StringLiteral"),
1598        }
1599    }
1600
1601    #[test]
1602    fn test_hex_escape_incomplete() {
1603        // \x with only one hex digit
1604        let source = r#": main ( -- ) "\x1" io.write-line ;"#;
1605
1606        let mut parser = Parser::new(source);
1607        let result = parser.parse();
1608
1609        assert!(result.is_err());
1610        assert!(result.unwrap_err().contains("Incomplete hex escape"));
1611    }
1612
1613    #[test]
1614    fn test_hex_escape_invalid_digits() {
1615        // \xGG is not valid hex
1616        let source = r#": main ( -- ) "\xGG" io.write-line ;"#;
1617
1618        let mut parser = Parser::new(source);
1619        let result = parser.parse();
1620
1621        assert!(result.is_err());
1622        assert!(result.unwrap_err().contains("Invalid hex escape"));
1623    }
1624
1625    #[test]
1626    fn test_hex_escape_at_end_of_string() {
1627        // \x at end of string with no digits
1628        let source = r#": main ( -- ) "test\x" io.write-line ;"#;
1629
1630        let mut parser = Parser::new(source);
1631        let result = parser.parse();
1632
1633        assert!(result.is_err());
1634        assert!(result.unwrap_err().contains("Incomplete hex escape"));
1635    }
1636
1637    #[test]
1638    fn test_unclosed_string_literal() {
1639        let source = r#": main ( -- ) "unclosed string ;"#;
1640
1641        let mut parser = Parser::new(source);
1642        let result = parser.parse();
1643
1644        assert!(result.is_err());
1645        let err_msg = result.unwrap_err();
1646        assert!(err_msg.contains("Unclosed string literal"));
1647        // Should include position information (line 1, column 15 for the opening quote)
1648        assert!(
1649            err_msg.contains("line 1"),
1650            "Expected line number in error: {}",
1651            err_msg
1652        );
1653        assert!(
1654            err_msg.contains("column 15"),
1655            "Expected column number in error: {}",
1656            err_msg
1657        );
1658    }
1659
1660    #[test]
1661    fn test_multiple_word_definitions() {
1662        let source = r#"
1663: double ( Int -- Int )
1664  2 multiply ;
1665
1666: quadruple ( Int -- Int )
1667  double double ;
1668"#;
1669
1670        let mut parser = Parser::new(source);
1671        let program = parser.parse().unwrap();
1672
1673        assert_eq!(program.words.len(), 2);
1674        assert_eq!(program.words[0].name, "double");
1675        assert_eq!(program.words[1].name, "quadruple");
1676
1677        // Verify stack effects were parsed
1678        assert!(program.words[0].effect.is_some());
1679        assert!(program.words[1].effect.is_some());
1680    }
1681
1682    #[test]
1683    fn test_user_word_calling_user_word() {
1684        let source = r#"
1685: helper ( -- )
1686  "helper called" write_line ;
1687
1688: main ( -- )
1689  helper ;
1690"#;
1691
1692        let mut parser = Parser::new(source);
1693        let program = parser.parse().unwrap();
1694
1695        assert_eq!(program.words.len(), 2);
1696
1697        // Check main calls helper
1698        match &program.words[1].body[0] {
1699            Statement::WordCall { name, .. } => assert_eq!(name, "helper"),
1700            _ => panic!("Expected WordCall to helper"),
1701        }
1702    }
1703
1704    #[test]
1705    fn test_parse_simple_stack_effect() {
1706        // Test: ( Int -- Bool )
1707        // With implicit row polymorphism, this becomes: ( ..rest Int -- ..rest Bool )
1708        let source = ": test ( Int -- Bool ) 1 ;";
1709        let mut parser = Parser::new(source);
1710        let program = parser.parse().unwrap();
1711
1712        assert_eq!(program.words.len(), 1);
1713        let word = &program.words[0];
1714        assert!(word.effect.is_some());
1715
1716        let effect = word.effect.as_ref().unwrap();
1717
1718        // Input: Int on RowVar("rest") (implicit row polymorphism)
1719        assert_eq!(
1720            effect.inputs,
1721            StackType::Cons {
1722                rest: Box::new(StackType::RowVar("rest".to_string())),
1723                top: Type::Int
1724            }
1725        );
1726
1727        // Output: Bool on RowVar("rest") (implicit row polymorphism)
1728        assert_eq!(
1729            effect.outputs,
1730            StackType::Cons {
1731                rest: Box::new(StackType::RowVar("rest".to_string())),
1732                top: Type::Bool
1733            }
1734        );
1735    }
1736
1737    #[test]
1738    fn test_parse_row_polymorphic_stack_effect() {
1739        // Test: ( ..a Int -- ..a Bool )
1740        let source = ": test ( ..a Int -- ..a Bool ) 1 ;";
1741        let mut parser = Parser::new(source);
1742        let program = parser.parse().unwrap();
1743
1744        assert_eq!(program.words.len(), 1);
1745        let word = &program.words[0];
1746        assert!(word.effect.is_some());
1747
1748        let effect = word.effect.as_ref().unwrap();
1749
1750        // Input: Int on RowVar("a")
1751        assert_eq!(
1752            effect.inputs,
1753            StackType::Cons {
1754                rest: Box::new(StackType::RowVar("a".to_string())),
1755                top: Type::Int
1756            }
1757        );
1758
1759        // Output: Bool on RowVar("a")
1760        assert_eq!(
1761            effect.outputs,
1762            StackType::Cons {
1763                rest: Box::new(StackType::RowVar("a".to_string())),
1764                top: Type::Bool
1765            }
1766        );
1767    }
1768
1769    #[test]
1770    fn test_parse_invalid_row_var_starts_with_digit() {
1771        // Test: Row variable cannot start with digit
1772        let source = ": test ( ..123 Int -- ) ;";
1773        let mut parser = Parser::new(source);
1774        let result = parser.parse();
1775
1776        assert!(result.is_err());
1777        let err_msg = result.unwrap_err();
1778        assert!(
1779            err_msg.contains("lowercase letter"),
1780            "Expected error about lowercase letter, got: {}",
1781            err_msg
1782        );
1783    }
1784
1785    #[test]
1786    fn test_parse_invalid_row_var_starts_with_uppercase() {
1787        // Test: Row variable cannot start with uppercase (that's a type variable)
1788        let source = ": test ( ..Int Int -- ) ;";
1789        let mut parser = Parser::new(source);
1790        let result = parser.parse();
1791
1792        assert!(result.is_err());
1793        let err_msg = result.unwrap_err();
1794        assert!(
1795            err_msg.contains("lowercase letter") || err_msg.contains("type name"),
1796            "Expected error about lowercase letter or type name, got: {}",
1797            err_msg
1798        );
1799    }
1800
1801    #[test]
1802    fn test_parse_invalid_row_var_with_special_chars() {
1803        // Test: Row variable cannot contain special characters
1804        let source = ": test ( ..a-b Int -- ) ;";
1805        let mut parser = Parser::new(source);
1806        let result = parser.parse();
1807
1808        assert!(result.is_err());
1809        let err_msg = result.unwrap_err();
1810        assert!(
1811            err_msg.contains("letters, numbers, and underscores")
1812                || err_msg.contains("Unknown type"),
1813            "Expected error about valid characters, got: {}",
1814            err_msg
1815        );
1816    }
1817
1818    #[test]
1819    fn test_parse_valid_row_var_with_underscore() {
1820        // Test: Row variable CAN contain underscore
1821        let source = ": test ( ..my_row Int -- ..my_row Bool ) ;";
1822        let mut parser = Parser::new(source);
1823        let result = parser.parse();
1824
1825        assert!(result.is_ok(), "Should accept row variable with underscore");
1826    }
1827
1828    #[test]
1829    fn test_parse_multiple_types_stack_effect() {
1830        // Test: ( Int String -- Bool )
1831        // With implicit row polymorphism: ( ..rest Int String -- ..rest Bool )
1832        let source = ": test ( Int String -- Bool ) 1 ;";
1833        let mut parser = Parser::new(source);
1834        let program = parser.parse().unwrap();
1835
1836        let effect = program.words[0].effect.as_ref().unwrap();
1837
1838        // Input: String on Int on RowVar("rest")
1839        let (rest, top) = effect.inputs.clone().pop().unwrap();
1840        assert_eq!(top, Type::String);
1841        let (rest2, top2) = rest.pop().unwrap();
1842        assert_eq!(top2, Type::Int);
1843        assert_eq!(rest2, StackType::RowVar("rest".to_string()));
1844
1845        // Output: Bool on RowVar("rest") (implicit row polymorphism)
1846        assert_eq!(
1847            effect.outputs,
1848            StackType::Cons {
1849                rest: Box::new(StackType::RowVar("rest".to_string())),
1850                top: Type::Bool
1851            }
1852        );
1853    }
1854
1855    #[test]
1856    fn test_parse_type_variable() {
1857        // Test: ( ..a T -- ..a T T ) for dup
1858        let source = ": dup ( ..a T -- ..a T T ) ;";
1859        let mut parser = Parser::new(source);
1860        let program = parser.parse().unwrap();
1861
1862        let effect = program.words[0].effect.as_ref().unwrap();
1863
1864        // Input: T on RowVar("a")
1865        assert_eq!(
1866            effect.inputs,
1867            StackType::Cons {
1868                rest: Box::new(StackType::RowVar("a".to_string())),
1869                top: Type::Var("T".to_string())
1870            }
1871        );
1872
1873        // Output: T on T on RowVar("a")
1874        let (rest, top) = effect.outputs.clone().pop().unwrap();
1875        assert_eq!(top, Type::Var("T".to_string()));
1876        let (rest2, top2) = rest.pop().unwrap();
1877        assert_eq!(top2, Type::Var("T".to_string()));
1878        assert_eq!(rest2, StackType::RowVar("a".to_string()));
1879    }
1880
1881    #[test]
1882    fn test_parse_empty_stack_effect() {
1883        // Test: ( -- )
1884        // In concatenative languages, even empty effects are row-polymorphic
1885        // ( -- ) means ( ..rest -- ..rest ) - preserves stack
1886        let source = ": test ( -- ) ;";
1887        let mut parser = Parser::new(source);
1888        let program = parser.parse().unwrap();
1889
1890        let effect = program.words[0].effect.as_ref().unwrap();
1891
1892        // Both inputs and outputs should use the same implicit row variable
1893        assert_eq!(effect.inputs, StackType::RowVar("rest".to_string()));
1894        assert_eq!(effect.outputs, StackType::RowVar("rest".to_string()));
1895    }
1896
1897    #[test]
1898    fn test_parse_invalid_type() {
1899        // Test invalid type (lowercase, not a row var)
1900        let source = ": test ( invalid -- Bool ) ;";
1901        let mut parser = Parser::new(source);
1902        let result = parser.parse();
1903
1904        assert!(result.is_err());
1905        assert!(result.unwrap_err().contains("Unknown type"));
1906    }
1907
1908    #[test]
1909    fn test_parse_unclosed_stack_effect() {
1910        // Test unclosed stack effect - parser tries to parse all tokens until ')' or EOF
1911        // In this case, it encounters "body" which is an invalid type
1912        let source = ": test ( Int -- Bool body ;";
1913        let mut parser = Parser::new(source);
1914        let result = parser.parse();
1915
1916        assert!(result.is_err());
1917        let err_msg = result.unwrap_err();
1918        // Parser will try to parse "body" as a type and fail
1919        assert!(err_msg.contains("Unknown type"));
1920    }
1921
1922    #[test]
1923    fn test_parse_simple_quotation_type() {
1924        // Test: ( [Int -- Int] -- )
1925        let source = ": apply ( [Int -- Int] -- ) ;";
1926        let mut parser = Parser::new(source);
1927        let program = parser.parse().unwrap();
1928
1929        let effect = program.words[0].effect.as_ref().unwrap();
1930
1931        // Input should be: Quotation(Int -- Int) on RowVar("rest")
1932        let (rest, top) = effect.inputs.clone().pop().unwrap();
1933        match top {
1934            Type::Quotation(quot_effect) => {
1935                // Check quotation's input: Int on RowVar("rest")
1936                assert_eq!(
1937                    quot_effect.inputs,
1938                    StackType::Cons {
1939                        rest: Box::new(StackType::RowVar("rest".to_string())),
1940                        top: Type::Int
1941                    }
1942                );
1943                // Check quotation's output: Int on RowVar("rest")
1944                assert_eq!(
1945                    quot_effect.outputs,
1946                    StackType::Cons {
1947                        rest: Box::new(StackType::RowVar("rest".to_string())),
1948                        top: Type::Int
1949                    }
1950                );
1951            }
1952            _ => panic!("Expected Quotation type, got {:?}", top),
1953        }
1954        assert_eq!(rest, StackType::RowVar("rest".to_string()));
1955    }
1956
1957    #[test]
1958    fn test_parse_quotation_type_with_row_vars() {
1959        // Test: ( ..a [..a T -- ..a Bool] -- ..a )
1960        let source = ": test ( ..a [..a T -- ..a Bool] -- ..a ) ;";
1961        let mut parser = Parser::new(source);
1962        let program = parser.parse().unwrap();
1963
1964        let effect = program.words[0].effect.as_ref().unwrap();
1965
1966        // Input: Quotation on RowVar("a")
1967        let (rest, top) = effect.inputs.clone().pop().unwrap();
1968        match top {
1969            Type::Quotation(quot_effect) => {
1970                // Check quotation's input: T on RowVar("a")
1971                let (q_in_rest, q_in_top) = quot_effect.inputs.clone().pop().unwrap();
1972                assert_eq!(q_in_top, Type::Var("T".to_string()));
1973                assert_eq!(q_in_rest, StackType::RowVar("a".to_string()));
1974
1975                // Check quotation's output: Bool on RowVar("a")
1976                let (q_out_rest, q_out_top) = quot_effect.outputs.clone().pop().unwrap();
1977                assert_eq!(q_out_top, Type::Bool);
1978                assert_eq!(q_out_rest, StackType::RowVar("a".to_string()));
1979            }
1980            _ => panic!("Expected Quotation type, got {:?}", top),
1981        }
1982        assert_eq!(rest, StackType::RowVar("a".to_string()));
1983    }
1984
1985    #[test]
1986    fn test_parse_nested_quotation_type() {
1987        // Test: ( [[Int -- Int] -- Bool] -- )
1988        let source = ": nested ( [[Int -- Int] -- Bool] -- ) ;";
1989        let mut parser = Parser::new(source);
1990        let program = parser.parse().unwrap();
1991
1992        let effect = program.words[0].effect.as_ref().unwrap();
1993
1994        // Input: Quotation([Int -- Int] -- Bool) on RowVar("rest")
1995        let (_, top) = effect.inputs.clone().pop().unwrap();
1996        match top {
1997            Type::Quotation(outer_effect) => {
1998                // Outer quotation input: [Int -- Int] on RowVar("rest")
1999                let (_, outer_in_top) = outer_effect.inputs.clone().pop().unwrap();
2000                match outer_in_top {
2001                    Type::Quotation(inner_effect) => {
2002                        // Inner quotation: Int -- Int
2003                        assert!(matches!(
2004                            inner_effect.inputs.clone().pop().unwrap().1,
2005                            Type::Int
2006                        ));
2007                        assert!(matches!(
2008                            inner_effect.outputs.clone().pop().unwrap().1,
2009                            Type::Int
2010                        ));
2011                    }
2012                    _ => panic!("Expected nested Quotation type"),
2013                }
2014
2015                // Outer quotation output: Bool
2016                let (_, outer_out_top) = outer_effect.outputs.clone().pop().unwrap();
2017                assert_eq!(outer_out_top, Type::Bool);
2018            }
2019            _ => panic!("Expected Quotation type"),
2020        }
2021    }
2022
2023    #[test]
2024    fn test_parse_deeply_nested_quotation_type_exceeds_limit() {
2025        // Test: Deeply nested quotation types should fail with max depth error
2026        // Build a quotation type nested 35 levels deep (exceeds MAX_QUOTATION_DEPTH = 32)
2027        let mut source = String::from(": deep ( ");
2028
2029        // Build opening brackets: [[[[[[...
2030        for _ in 0..35 {
2031            source.push_str("[ -- ");
2032        }
2033
2034        source.push_str("Int");
2035
2036        // Build closing brackets: ...]]]]]]
2037        for _ in 0..35 {
2038            source.push_str(" ]");
2039        }
2040
2041        source.push_str(" -- ) ;");
2042
2043        let mut parser = Parser::new(&source);
2044        let result = parser.parse();
2045
2046        // Should fail with depth limit error
2047        assert!(result.is_err());
2048        let err_msg = result.unwrap_err();
2049        assert!(
2050            err_msg.contains("depth") || err_msg.contains("32"),
2051            "Expected depth limit error, got: {}",
2052            err_msg
2053        );
2054    }
2055
2056    #[test]
2057    fn test_parse_empty_quotation_type() {
2058        // Test: ( [ -- ] -- )
2059        // An empty quotation type is also row-polymorphic: [ ..rest -- ..rest ]
2060        let source = ": empty-quot ( [ -- ] -- ) ;";
2061        let mut parser = Parser::new(source);
2062        let program = parser.parse().unwrap();
2063
2064        let effect = program.words[0].effect.as_ref().unwrap();
2065
2066        let (_, top) = effect.inputs.clone().pop().unwrap();
2067        match top {
2068            Type::Quotation(quot_effect) => {
2069                // Empty quotation preserves the stack (row-polymorphic)
2070                assert_eq!(quot_effect.inputs, StackType::RowVar("rest".to_string()));
2071                assert_eq!(quot_effect.outputs, StackType::RowVar("rest".to_string()));
2072            }
2073            _ => panic!("Expected Quotation type"),
2074        }
2075    }
2076
2077    #[test]
2078    fn test_parse_quotation_type_in_output() {
2079        // Test: ( -- [Int -- Int] )
2080        let source = ": maker ( -- [Int -- Int] ) ;";
2081        let mut parser = Parser::new(source);
2082        let program = parser.parse().unwrap();
2083
2084        let effect = program.words[0].effect.as_ref().unwrap();
2085
2086        // Output should be: Quotation(Int -- Int) on RowVar("rest")
2087        let (_, top) = effect.outputs.clone().pop().unwrap();
2088        match top {
2089            Type::Quotation(quot_effect) => {
2090                assert!(matches!(
2091                    quot_effect.inputs.clone().pop().unwrap().1,
2092                    Type::Int
2093                ));
2094                assert!(matches!(
2095                    quot_effect.outputs.clone().pop().unwrap().1,
2096                    Type::Int
2097                ));
2098            }
2099            _ => panic!("Expected Quotation type"),
2100        }
2101    }
2102
2103    #[test]
2104    fn test_parse_unclosed_quotation_type() {
2105        // Test: ( [Int -- Int -- )  (missing ])
2106        let source = ": broken ( [Int -- Int -- ) ;";
2107        let mut parser = Parser::new(source);
2108        let result = parser.parse();
2109
2110        assert!(result.is_err());
2111        let err_msg = result.unwrap_err();
2112        // Parser might error with various messages depending on where it fails
2113        // It should at least indicate a parsing problem
2114        assert!(
2115            err_msg.contains("Unclosed")
2116                || err_msg.contains("Expected")
2117                || err_msg.contains("Unexpected"),
2118            "Got error: {}",
2119            err_msg
2120        );
2121    }
2122
2123    #[test]
2124    fn test_parse_multiple_quotation_types() {
2125        // Test: ( [Int -- Int] [String -- Bool] -- )
2126        let source = ": multi ( [Int -- Int] [String -- Bool] -- ) ;";
2127        let mut parser = Parser::new(source);
2128        let program = parser.parse().unwrap();
2129
2130        let effect = program.words[0].effect.as_ref().unwrap();
2131
2132        // Pop second quotation (String -- Bool)
2133        let (rest, top) = effect.inputs.clone().pop().unwrap();
2134        match top {
2135            Type::Quotation(quot_effect) => {
2136                assert!(matches!(
2137                    quot_effect.inputs.clone().pop().unwrap().1,
2138                    Type::String
2139                ));
2140                assert!(matches!(
2141                    quot_effect.outputs.clone().pop().unwrap().1,
2142                    Type::Bool
2143                ));
2144            }
2145            _ => panic!("Expected Quotation type"),
2146        }
2147
2148        // Pop first quotation (Int -- Int)
2149        let (_, top2) = rest.pop().unwrap();
2150        match top2 {
2151            Type::Quotation(quot_effect) => {
2152                assert!(matches!(
2153                    quot_effect.inputs.clone().pop().unwrap().1,
2154                    Type::Int
2155                ));
2156                assert!(matches!(
2157                    quot_effect.outputs.clone().pop().unwrap().1,
2158                    Type::Int
2159                ));
2160            }
2161            _ => panic!("Expected Quotation type"),
2162        }
2163    }
2164
2165    #[test]
2166    fn test_parse_quotation_type_without_separator() {
2167        // Test: ( [Int] -- ) should be REJECTED
2168        //
2169        // Design decision: The '--' separator is REQUIRED for clarity.
2170        // [Int] looks like a list type in most languages, not a consumer function.
2171        // This would confuse users.
2172        //
2173        // Require explicit syntax:
2174        // - `[Int -- ]` for quotation that consumes Int and produces nothing
2175        // - `[ -- Int]` for quotation that produces Int
2176        // - `[Int -- Int]` for transformation
2177        let source = ": consumer ( [Int] -- ) ;";
2178        let mut parser = Parser::new(source);
2179        let result = parser.parse();
2180
2181        // Should fail with helpful error message
2182        assert!(result.is_err());
2183        let err_msg = result.unwrap_err();
2184        assert!(
2185            err_msg.contains("require") && err_msg.contains("--"),
2186            "Expected error about missing '--' separator, got: {}",
2187            err_msg
2188        );
2189    }
2190
2191    #[test]
2192    fn test_parse_bare_quotation_type_rejected() {
2193        // Test: ( Int Quotation -- Int ) should be REJECTED
2194        //
2195        // 'Quotation' looks like a type name but would be silently treated as a
2196        // type variable without this check. Users must use explicit effect syntax.
2197        let source = ": apply-twice ( Int Quotation -- Int ) ;";
2198        let mut parser = Parser::new(source);
2199        let result = parser.parse();
2200
2201        assert!(result.is_err());
2202        let err_msg = result.unwrap_err();
2203        assert!(
2204            err_msg.contains("Quotation") && err_msg.contains("not a valid type"),
2205            "Expected error about 'Quotation' not being valid, got: {}",
2206            err_msg
2207        );
2208        assert!(
2209            err_msg.contains("[Int -- Int]") || err_msg.contains("[ -- ]"),
2210            "Expected error to suggest explicit syntax, got: {}",
2211            err_msg
2212        );
2213    }
2214
2215    #[test]
2216    fn test_parse_no_stack_effect() {
2217        // Test word without stack effect (should still work)
2218        let source = ": test 1 2 add ;";
2219        let mut parser = Parser::new(source);
2220        let program = parser.parse().unwrap();
2221
2222        assert_eq!(program.words.len(), 1);
2223        assert!(program.words[0].effect.is_none());
2224    }
2225
2226    #[test]
2227    fn test_parse_simple_quotation() {
2228        let source = r#"
2229: test ( -- Quot )
2230  [ 1 add ] ;
2231"#;
2232
2233        let mut parser = Parser::new(source);
2234        let program = parser.parse().unwrap();
2235
2236        assert_eq!(program.words.len(), 1);
2237        assert_eq!(program.words[0].name, "test");
2238        assert_eq!(program.words[0].body.len(), 1);
2239
2240        match &program.words[0].body[0] {
2241            Statement::Quotation { body, .. } => {
2242                assert_eq!(body.len(), 2);
2243                assert_eq!(body[0], Statement::IntLiteral(1));
2244                assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "add"));
2245            }
2246            _ => panic!("Expected Quotation statement"),
2247        }
2248    }
2249
2250    #[test]
2251    fn test_parse_empty_quotation() {
2252        let source = ": test [ ] ;";
2253
2254        let mut parser = Parser::new(source);
2255        let program = parser.parse().unwrap();
2256
2257        assert_eq!(program.words.len(), 1);
2258
2259        match &program.words[0].body[0] {
2260            Statement::Quotation { body, .. } => {
2261                assert_eq!(body.len(), 0);
2262            }
2263            _ => panic!("Expected Quotation statement"),
2264        }
2265    }
2266
2267    #[test]
2268    fn test_parse_quotation_with_call() {
2269        let source = r#"
2270: test ( -- )
2271  5 [ 1 add ] call ;
2272"#;
2273
2274        let mut parser = Parser::new(source);
2275        let program = parser.parse().unwrap();
2276
2277        assert_eq!(program.words.len(), 1);
2278        assert_eq!(program.words[0].body.len(), 3);
2279
2280        assert_eq!(program.words[0].body[0], Statement::IntLiteral(5));
2281
2282        match &program.words[0].body[1] {
2283            Statement::Quotation { body, .. } => {
2284                assert_eq!(body.len(), 2);
2285            }
2286            _ => panic!("Expected Quotation"),
2287        }
2288
2289        assert!(matches!(
2290            &program.words[0].body[2],
2291            Statement::WordCall { name, .. } if name == "call"
2292        ));
2293    }
2294
2295    #[test]
2296    fn test_parse_nested_quotation() {
2297        let source = ": test [ [ 1 add ] call ] ;";
2298
2299        let mut parser = Parser::new(source);
2300        let program = parser.parse().unwrap();
2301
2302        assert_eq!(program.words.len(), 1);
2303
2304        match &program.words[0].body[0] {
2305            Statement::Quotation {
2306                body: outer_body, ..
2307            } => {
2308                assert_eq!(outer_body.len(), 2);
2309
2310                match &outer_body[0] {
2311                    Statement::Quotation {
2312                        body: inner_body, ..
2313                    } => {
2314                        assert_eq!(inner_body.len(), 2);
2315                        assert_eq!(inner_body[0], Statement::IntLiteral(1));
2316                        assert!(
2317                            matches!(&inner_body[1], Statement::WordCall { name, .. } if name == "add")
2318                        );
2319                    }
2320                    _ => panic!("Expected nested Quotation"),
2321                }
2322
2323                assert!(
2324                    matches!(&outer_body[1], Statement::WordCall { name, .. } if name == "call")
2325                );
2326            }
2327            _ => panic!("Expected Quotation"),
2328        }
2329    }
2330
2331    #[test]
2332    fn test_parse_while_with_quotations() {
2333        let source = r#"
2334: countdown ( Int -- )
2335  [ dup 0 > ] [ 1 subtract ] while drop ;
2336"#;
2337
2338        let mut parser = Parser::new(source);
2339        let program = parser.parse().unwrap();
2340
2341        assert_eq!(program.words.len(), 1);
2342        assert_eq!(program.words[0].body.len(), 4);
2343
2344        // First quotation: [ dup 0 > ]
2345        match &program.words[0].body[0] {
2346            Statement::Quotation { body: pred, .. } => {
2347                assert_eq!(pred.len(), 3);
2348                assert!(matches!(&pred[0], Statement::WordCall { name, .. } if name == "dup"));
2349                assert_eq!(pred[1], Statement::IntLiteral(0));
2350                assert!(matches!(&pred[2], Statement::WordCall { name, .. } if name == ">"));
2351            }
2352            _ => panic!("Expected predicate quotation"),
2353        }
2354
2355        // Second quotation: [ 1 subtract ]
2356        match &program.words[0].body[1] {
2357            Statement::Quotation { body, .. } => {
2358                assert_eq!(body.len(), 2);
2359                assert_eq!(body[0], Statement::IntLiteral(1));
2360                assert!(matches!(&body[1], Statement::WordCall { name, .. } if name == "subtract"));
2361            }
2362            _ => panic!("Expected body quotation"),
2363        }
2364
2365        // while call
2366        assert!(matches!(
2367            &program.words[0].body[2],
2368            Statement::WordCall { name, .. } if name == "while"
2369        ));
2370
2371        // drop
2372        assert!(matches!(
2373            &program.words[0].body[3],
2374            Statement::WordCall { name, .. } if name == "drop"
2375        ));
2376    }
2377
2378    #[test]
2379    fn test_parse_simple_closure_type() {
2380        // Test: ( Int -- Closure[Int -- Int] )
2381        let source = ": make-adder ( Int -- Closure[Int -- Int] ) ;";
2382        let mut parser = Parser::new(source);
2383        let program = parser.parse().unwrap();
2384
2385        assert_eq!(program.words.len(), 1);
2386        let word = &program.words[0];
2387        assert!(word.effect.is_some());
2388
2389        let effect = word.effect.as_ref().unwrap();
2390
2391        // Input: Int on RowVar("rest")
2392        let (input_rest, input_top) = effect.inputs.clone().pop().unwrap();
2393        assert_eq!(input_top, Type::Int);
2394        assert_eq!(input_rest, StackType::RowVar("rest".to_string()));
2395
2396        // Output: Closure[Int -- Int] on RowVar("rest")
2397        let (output_rest, output_top) = effect.outputs.clone().pop().unwrap();
2398        match output_top {
2399            Type::Closure { effect, captures } => {
2400                // Closure effect: Int -> Int
2401                assert_eq!(
2402                    effect.inputs,
2403                    StackType::Cons {
2404                        rest: Box::new(StackType::RowVar("rest".to_string())),
2405                        top: Type::Int
2406                    }
2407                );
2408                assert_eq!(
2409                    effect.outputs,
2410                    StackType::Cons {
2411                        rest: Box::new(StackType::RowVar("rest".to_string())),
2412                        top: Type::Int
2413                    }
2414                );
2415                // Captures should be empty (filled in by type checker)
2416                assert_eq!(captures.len(), 0);
2417            }
2418            _ => panic!("Expected Closure type, got {:?}", output_top),
2419        }
2420        assert_eq!(output_rest, StackType::RowVar("rest".to_string()));
2421    }
2422
2423    #[test]
2424    fn test_parse_closure_type_with_row_vars() {
2425        // Test: ( ..a Config -- ..a Closure[Request -- Response] )
2426        let source = ": make-handler ( ..a Config -- ..a Closure[Request -- Response] ) ;";
2427        let mut parser = Parser::new(source);
2428        let program = parser.parse().unwrap();
2429
2430        let effect = program.words[0].effect.as_ref().unwrap();
2431
2432        // Output: Closure on RowVar("a")
2433        let (rest, top) = effect.outputs.clone().pop().unwrap();
2434        match top {
2435            Type::Closure { effect, .. } => {
2436                // Closure effect: Request -> Response
2437                let (_, in_top) = effect.inputs.clone().pop().unwrap();
2438                assert_eq!(in_top, Type::Var("Request".to_string()));
2439                let (_, out_top) = effect.outputs.clone().pop().unwrap();
2440                assert_eq!(out_top, Type::Var("Response".to_string()));
2441            }
2442            _ => panic!("Expected Closure type"),
2443        }
2444        assert_eq!(rest, StackType::RowVar("a".to_string()));
2445    }
2446
2447    #[test]
2448    fn test_parse_closure_type_missing_bracket() {
2449        // Test: ( Int -- Closure ) should fail
2450        let source = ": broken ( Int -- Closure ) ;";
2451        let mut parser = Parser::new(source);
2452        let result = parser.parse();
2453
2454        assert!(result.is_err());
2455        let err_msg = result.unwrap_err();
2456        assert!(
2457            err_msg.contains("[") && err_msg.contains("Closure"),
2458            "Expected error about missing '[' after Closure, got: {}",
2459            err_msg
2460        );
2461    }
2462
2463    #[test]
2464    fn test_parse_closure_type_in_input() {
2465        // Test: ( Closure[Int -- Int] -- )
2466        let source = ": apply-closure ( Closure[Int -- Int] -- ) ;";
2467        let mut parser = Parser::new(source);
2468        let program = parser.parse().unwrap();
2469
2470        let effect = program.words[0].effect.as_ref().unwrap();
2471
2472        // Input: Closure[Int -- Int] on RowVar("rest")
2473        let (_, top) = effect.inputs.clone().pop().unwrap();
2474        match top {
2475            Type::Closure { effect, .. } => {
2476                // Verify closure effect
2477                assert!(matches!(effect.inputs.clone().pop().unwrap().1, Type::Int));
2478                assert!(matches!(effect.outputs.clone().pop().unwrap().1, Type::Int));
2479            }
2480            _ => panic!("Expected Closure type in input"),
2481        }
2482    }
2483
2484    // Tests for token position tracking
2485
2486    #[test]
2487    fn test_token_position_single_line() {
2488        // Test token positions on a single line
2489        let source = ": main ( -- ) ;";
2490        let tokens = tokenize(source);
2491
2492        // : is at line 0, column 0
2493        assert_eq!(tokens[0].text, ":");
2494        assert_eq!(tokens[0].line, 0);
2495        assert_eq!(tokens[0].column, 0);
2496
2497        // main is at line 0, column 2
2498        assert_eq!(tokens[1].text, "main");
2499        assert_eq!(tokens[1].line, 0);
2500        assert_eq!(tokens[1].column, 2);
2501
2502        // ( is at line 0, column 7
2503        assert_eq!(tokens[2].text, "(");
2504        assert_eq!(tokens[2].line, 0);
2505        assert_eq!(tokens[2].column, 7);
2506    }
2507
2508    #[test]
2509    fn test_token_position_multiline() {
2510        // Test token positions across multiple lines
2511        let source = ": main ( -- )\n  42\n;";
2512        let tokens = tokenize(source);
2513
2514        // Find the 42 token (after the newline)
2515        let token_42 = tokens.iter().find(|t| t.text == "42").unwrap();
2516        assert_eq!(token_42.line, 1);
2517        assert_eq!(token_42.column, 2); // After 2 spaces of indentation
2518
2519        // Find the ; token (on line 2)
2520        let token_semi = tokens.iter().find(|t| t.text == ";").unwrap();
2521        assert_eq!(token_semi.line, 2);
2522        assert_eq!(token_semi.column, 0);
2523    }
2524
2525    #[test]
2526    fn test_word_def_source_location_span() {
2527        // Test that word definitions capture correct start and end lines
2528        let source = r#": helper ( -- )
2529  "hello"
2530  write_line
2531;
2532
2533: main ( -- )
2534  helper
2535;"#;
2536
2537        let mut parser = Parser::new(source);
2538        let program = parser.parse().unwrap();
2539
2540        assert_eq!(program.words.len(), 2);
2541
2542        // First word: helper spans lines 0-3
2543        let helper = &program.words[0];
2544        assert_eq!(helper.name, "helper");
2545        let helper_source = helper.source.as_ref().unwrap();
2546        assert_eq!(helper_source.start_line, 0);
2547        assert_eq!(helper_source.end_line, 3);
2548
2549        // Second word: main spans lines 5-7
2550        let main_word = &program.words[1];
2551        assert_eq!(main_word.name, "main");
2552        let main_source = main_word.source.as_ref().unwrap();
2553        assert_eq!(main_source.start_line, 5);
2554        assert_eq!(main_source.end_line, 7);
2555    }
2556
2557    #[test]
2558    fn test_token_position_string_with_newline() {
2559        // Test that newlines inside strings are tracked correctly
2560        let source = "\"line1\\nline2\"";
2561        let tokens = tokenize(source);
2562
2563        // The string token should start at line 0, column 0
2564        assert_eq!(tokens.len(), 1);
2565        assert_eq!(tokens[0].line, 0);
2566        assert_eq!(tokens[0].column, 0);
2567    }
2568
2569    // ============================================================================
2570    //                         ADT PARSING TESTS
2571    // ============================================================================
2572
2573    #[test]
2574    fn test_parse_simple_union() {
2575        let source = r#"
2576union Message {
2577  Get { response-chan: Int }
2578  Set { value: Int }
2579}
2580
2581: main ( -- ) ;
2582"#;
2583
2584        let mut parser = Parser::new(source);
2585        let program = parser.parse().unwrap();
2586
2587        assert_eq!(program.unions.len(), 1);
2588        let union_def = &program.unions[0];
2589        assert_eq!(union_def.name, "Message");
2590        assert_eq!(union_def.variants.len(), 2);
2591
2592        // Check first variant
2593        assert_eq!(union_def.variants[0].name, "Get");
2594        assert_eq!(union_def.variants[0].fields.len(), 1);
2595        assert_eq!(union_def.variants[0].fields[0].name, "response-chan");
2596        assert_eq!(union_def.variants[0].fields[0].type_name, "Int");
2597
2598        // Check second variant
2599        assert_eq!(union_def.variants[1].name, "Set");
2600        assert_eq!(union_def.variants[1].fields.len(), 1);
2601        assert_eq!(union_def.variants[1].fields[0].name, "value");
2602        assert_eq!(union_def.variants[1].fields[0].type_name, "Int");
2603    }
2604
2605    #[test]
2606    fn test_parse_union_with_multiple_fields() {
2607        let source = r#"
2608union Report {
2609  Data { op: Int, delta: Int, total: Int }
2610  Empty
2611}
2612
2613: main ( -- ) ;
2614"#;
2615
2616        let mut parser = Parser::new(source);
2617        let program = parser.parse().unwrap();
2618
2619        assert_eq!(program.unions.len(), 1);
2620        let union_def = &program.unions[0];
2621        assert_eq!(union_def.name, "Report");
2622        assert_eq!(union_def.variants.len(), 2);
2623
2624        // Check Data variant with 3 fields
2625        let data_variant = &union_def.variants[0];
2626        assert_eq!(data_variant.name, "Data");
2627        assert_eq!(data_variant.fields.len(), 3);
2628        assert_eq!(data_variant.fields[0].name, "op");
2629        assert_eq!(data_variant.fields[1].name, "delta");
2630        assert_eq!(data_variant.fields[2].name, "total");
2631
2632        // Check Empty variant with no fields
2633        let empty_variant = &union_def.variants[1];
2634        assert_eq!(empty_variant.name, "Empty");
2635        assert_eq!(empty_variant.fields.len(), 0);
2636    }
2637
2638    #[test]
2639    fn test_parse_union_lowercase_name_error() {
2640        let source = r#"
2641union message {
2642  Get { }
2643}
2644"#;
2645
2646        let mut parser = Parser::new(source);
2647        let result = parser.parse();
2648        assert!(result.is_err());
2649        assert!(result.unwrap_err().contains("uppercase"));
2650    }
2651
2652    #[test]
2653    fn test_parse_union_empty_error() {
2654        let source = r#"
2655union Message {
2656}
2657"#;
2658
2659        let mut parser = Parser::new(source);
2660        let result = parser.parse();
2661        assert!(result.is_err());
2662        assert!(result.unwrap_err().contains("at least one variant"));
2663    }
2664
2665    #[test]
2666    fn test_parse_union_duplicate_variant_error() {
2667        let source = r#"
2668union Message {
2669  Get { x: Int }
2670  Get { y: String }
2671}
2672"#;
2673
2674        let mut parser = Parser::new(source);
2675        let result = parser.parse();
2676        assert!(result.is_err());
2677        let err = result.unwrap_err();
2678        assert!(err.contains("Duplicate variant name"));
2679        assert!(err.contains("Get"));
2680    }
2681
2682    #[test]
2683    fn test_parse_union_duplicate_field_error() {
2684        let source = r#"
2685union Data {
2686  Record { x: Int, x: String }
2687}
2688"#;
2689
2690        let mut parser = Parser::new(source);
2691        let result = parser.parse();
2692        assert!(result.is_err());
2693        let err = result.unwrap_err();
2694        assert!(err.contains("Duplicate field name"));
2695        assert!(err.contains("x"));
2696    }
2697
2698    #[test]
2699    fn test_parse_simple_match() {
2700        let source = r#"
2701: handle ( -- )
2702  match
2703    Get -> send-response
2704    Set -> process-set
2705  end
2706;
2707"#;
2708
2709        let mut parser = Parser::new(source);
2710        let program = parser.parse().unwrap();
2711
2712        assert_eq!(program.words.len(), 1);
2713        assert_eq!(program.words[0].body.len(), 1);
2714
2715        match &program.words[0].body[0] {
2716            Statement::Match { arms, span: _ } => {
2717                assert_eq!(arms.len(), 2);
2718
2719                // First arm: Get ->
2720                match &arms[0].pattern {
2721                    Pattern::Variant(name) => assert_eq!(name, "Get"),
2722                    _ => panic!("Expected Variant pattern"),
2723                }
2724                assert_eq!(arms[0].body.len(), 1);
2725
2726                // Second arm: Set ->
2727                match &arms[1].pattern {
2728                    Pattern::Variant(name) => assert_eq!(name, "Set"),
2729                    _ => panic!("Expected Variant pattern"),
2730                }
2731                assert_eq!(arms[1].body.len(), 1);
2732            }
2733            _ => panic!("Expected Match statement"),
2734        }
2735    }
2736
2737    #[test]
2738    fn test_parse_match_with_bindings() {
2739        let source = r#"
2740: handle ( -- )
2741  match
2742    Get { >chan } -> chan send-response
2743    Report { >delta >total } -> delta total process
2744  end
2745;
2746"#;
2747
2748        let mut parser = Parser::new(source);
2749        let program = parser.parse().unwrap();
2750
2751        assert_eq!(program.words.len(), 1);
2752
2753        match &program.words[0].body[0] {
2754            Statement::Match { arms, span: _ } => {
2755                assert_eq!(arms.len(), 2);
2756
2757                // First arm: Get { chan } ->
2758                match &arms[0].pattern {
2759                    Pattern::VariantWithBindings { name, bindings } => {
2760                        assert_eq!(name, "Get");
2761                        assert_eq!(bindings.len(), 1);
2762                        assert_eq!(bindings[0], "chan");
2763                    }
2764                    _ => panic!("Expected VariantWithBindings pattern"),
2765                }
2766
2767                // Second arm: Report { delta total } ->
2768                match &arms[1].pattern {
2769                    Pattern::VariantWithBindings { name, bindings } => {
2770                        assert_eq!(name, "Report");
2771                        assert_eq!(bindings.len(), 2);
2772                        assert_eq!(bindings[0], "delta");
2773                        assert_eq!(bindings[1], "total");
2774                    }
2775                    _ => panic!("Expected VariantWithBindings pattern"),
2776                }
2777            }
2778            _ => panic!("Expected Match statement"),
2779        }
2780    }
2781
2782    #[test]
2783    fn test_parse_match_bindings_require_prefix() {
2784        // Old syntax without > prefix should error
2785        let source = r#"
2786: handle ( -- )
2787  match
2788    Get { chan } -> chan send-response
2789  end
2790;
2791"#;
2792
2793        let mut parser = Parser::new(source);
2794        let result = parser.parse();
2795        assert!(result.is_err());
2796        let err = result.unwrap_err();
2797        assert!(err.contains(">chan"));
2798        assert!(err.contains("stack extraction"));
2799    }
2800
2801    #[test]
2802    fn test_parse_match_with_body_statements() {
2803        let source = r#"
2804: handle ( -- )
2805  match
2806    Get -> 1 2 add send-response
2807    Set -> process-value store
2808  end
2809;
2810"#;
2811
2812        let mut parser = Parser::new(source);
2813        let program = parser.parse().unwrap();
2814
2815        match &program.words[0].body[0] {
2816            Statement::Match { arms, span: _ } => {
2817                // Get arm has 4 statements: 1, 2, add, send-response
2818                assert_eq!(arms[0].body.len(), 4);
2819                assert_eq!(arms[0].body[0], Statement::IntLiteral(1));
2820                assert_eq!(arms[0].body[1], Statement::IntLiteral(2));
2821                assert!(
2822                    matches!(&arms[0].body[2], Statement::WordCall { name, .. } if name == "add")
2823                );
2824
2825                // Set arm has 2 statements: process-value, store
2826                assert_eq!(arms[1].body.len(), 2);
2827            }
2828            _ => panic!("Expected Match statement"),
2829        }
2830    }
2831
2832    #[test]
2833    fn test_parse_match_empty_error() {
2834        let source = r#"
2835: handle ( -- )
2836  match
2837  end
2838;
2839"#;
2840
2841        let mut parser = Parser::new(source);
2842        let result = parser.parse();
2843        assert!(result.is_err());
2844        assert!(result.unwrap_err().contains("at least one arm"));
2845    }
2846
2847    #[test]
2848    fn test_parse_symbol_literal() {
2849        let source = r#"
2850: main ( -- )
2851    :hello drop
2852;
2853"#;
2854
2855        let mut parser = Parser::new(source);
2856        let program = parser.parse().unwrap();
2857        assert_eq!(program.words.len(), 1);
2858
2859        let main = &program.words[0];
2860        assert_eq!(main.body.len(), 2);
2861
2862        match &main.body[0] {
2863            Statement::Symbol(name) => assert_eq!(name, "hello"),
2864            _ => panic!("Expected Symbol statement, got {:?}", main.body[0]),
2865        }
2866    }
2867
2868    #[test]
2869    fn test_parse_symbol_with_hyphen() {
2870        let source = r#"
2871: main ( -- )
2872    :hello-world drop
2873;
2874"#;
2875
2876        let mut parser = Parser::new(source);
2877        let program = parser.parse().unwrap();
2878
2879        match &program.words[0].body[0] {
2880            Statement::Symbol(name) => assert_eq!(name, "hello-world"),
2881            _ => panic!("Expected Symbol statement"),
2882        }
2883    }
2884
2885    #[test]
2886    fn test_parse_symbol_starting_with_digit_fails() {
2887        let source = r#"
2888: main ( -- )
2889    :123abc drop
2890;
2891"#;
2892
2893        let mut parser = Parser::new(source);
2894        let result = parser.parse();
2895        assert!(result.is_err());
2896        assert!(result.unwrap_err().contains("cannot start with a digit"));
2897    }
2898
2899    #[test]
2900    fn test_parse_symbol_with_invalid_char_fails() {
2901        let source = r#"
2902: main ( -- )
2903    :hello@world drop
2904;
2905"#;
2906
2907        let mut parser = Parser::new(source);
2908        let result = parser.parse();
2909        assert!(result.is_err());
2910        assert!(result.unwrap_err().contains("invalid character"));
2911    }
2912
2913    #[test]
2914    fn test_parse_symbol_special_chars_allowed() {
2915        // Test that ? and ! are allowed in symbol names
2916        let source = r#"
2917: main ( -- )
2918    :empty? drop
2919    :save! drop
2920;
2921"#;
2922
2923        let mut parser = Parser::new(source);
2924        let program = parser.parse().unwrap();
2925
2926        match &program.words[0].body[0] {
2927            Statement::Symbol(name) => assert_eq!(name, "empty?"),
2928            _ => panic!("Expected Symbol statement"),
2929        }
2930        match &program.words[0].body[2] {
2931            Statement::Symbol(name) => assert_eq!(name, "save!"),
2932            _ => panic!("Expected Symbol statement"),
2933        }
2934    }
2935}