seqc/
ast.rs

1//! Abstract Syntax Tree for Seq
2//!
3//! Minimal AST sufficient for hello-world and basic programs.
4//! Will be extended as we add more language features.
5
6use crate::types::{Effect, StackType, Type};
7use std::path::PathBuf;
8
9/// Source location for error reporting and tooling
10#[derive(Debug, Clone, PartialEq)]
11pub struct SourceLocation {
12    pub file: PathBuf,
13    /// Start line (0-indexed for LSP compatibility)
14    pub start_line: usize,
15    /// End line (0-indexed, inclusive)
16    pub end_line: usize,
17}
18
19impl SourceLocation {
20    /// Create a new source location with just a single line (for backward compatibility)
21    pub fn new(file: PathBuf, line: usize) -> Self {
22        SourceLocation {
23            file,
24            start_line: line,
25            end_line: line,
26        }
27    }
28
29    /// Create a source location spanning multiple lines
30    pub fn span(file: PathBuf, start_line: usize, end_line: usize) -> Self {
31        debug_assert!(
32            start_line <= end_line,
33            "SourceLocation: start_line ({}) must be <= end_line ({})",
34            start_line,
35            end_line
36        );
37        SourceLocation {
38            file,
39            start_line,
40            end_line,
41        }
42    }
43
44    /// Get the line number (for backward compatibility, returns start_line)
45    pub fn line(&self) -> usize {
46        self.start_line
47    }
48}
49
50impl std::fmt::Display for SourceLocation {
51    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
52        if self.start_line == self.end_line {
53            write!(f, "{}:{}", self.file.display(), self.start_line + 1)
54        } else {
55            write!(
56                f,
57                "{}:{}-{}",
58                self.file.display(),
59                self.start_line + 1,
60                self.end_line + 1
61            )
62        }
63    }
64}
65
66/// Include statement
67#[derive(Debug, Clone, PartialEq)]
68pub enum Include {
69    /// Standard library include: `include std:http`
70    Std(String),
71    /// Relative path include: `include "my-utils"`
72    Relative(String),
73    /// FFI library include: `include ffi:readline`
74    Ffi(String),
75}
76
77// ============================================================================
78//                     ALGEBRAIC DATA TYPES (ADTs)
79// ============================================================================
80
81/// A field in a union variant
82/// Example: `response-chan: Int`
83#[derive(Debug, Clone, PartialEq)]
84pub struct UnionField {
85    pub name: String,
86    pub type_name: String, // For now, just store the type name as string
87}
88
89/// A variant in a union type
90/// Example: `Get { response-chan: Int }`
91#[derive(Debug, Clone, PartialEq)]
92pub struct UnionVariant {
93    pub name: String,
94    pub fields: Vec<UnionField>,
95    pub source: Option<SourceLocation>,
96}
97
98/// A union type definition
99/// Example:
100/// ```seq
101/// union Message {
102///   Get { response-chan: Int }
103///   Increment { response-chan: Int }
104///   Report { op: Int, delta: Int, total: Int }
105/// }
106/// ```
107#[derive(Debug, Clone, PartialEq)]
108pub struct UnionDef {
109    pub name: String,
110    pub variants: Vec<UnionVariant>,
111    pub source: Option<SourceLocation>,
112}
113
114/// A pattern in a match expression
115/// For Phase 1: just the variant name (stack-based matching)
116/// Later phases will add field bindings: `Get { chan }`
117#[derive(Debug, Clone, PartialEq)]
118pub enum Pattern {
119    /// Match a variant by name, pushing all fields to stack
120    /// Example: `Get ->` pushes response-chan to stack
121    Variant(String),
122
123    /// Match a variant with named field bindings (Phase 5)
124    /// Example: `Get { chan } ->` binds chan to the response-chan field
125    VariantWithBindings { name: String, bindings: Vec<String> },
126}
127
128/// A single arm in a match expression
129#[derive(Debug, Clone, PartialEq)]
130pub struct MatchArm {
131    pub pattern: Pattern,
132    pub body: Vec<Statement>,
133}
134
135#[derive(Debug, Clone, PartialEq)]
136pub struct Program {
137    pub includes: Vec<Include>,
138    pub unions: Vec<UnionDef>,
139    pub words: Vec<WordDef>,
140}
141
142#[derive(Debug, Clone, PartialEq)]
143pub struct WordDef {
144    pub name: String,
145    /// Optional stack effect declaration
146    /// Example: ( ..a Int -- ..a Bool )
147    pub effect: Option<Effect>,
148    pub body: Vec<Statement>,
149    /// Source location for error reporting (collision detection)
150    pub source: Option<SourceLocation>,
151}
152
153/// Source span for a single token or expression
154#[derive(Debug, Clone, PartialEq, Default)]
155pub struct Span {
156    /// Line number (0-indexed)
157    pub line: usize,
158    /// Start column (0-indexed)
159    pub column: usize,
160    /// Length of the span in characters
161    pub length: usize,
162}
163
164impl Span {
165    pub fn new(line: usize, column: usize, length: usize) -> Self {
166        Span {
167            line,
168            column,
169            length,
170        }
171    }
172}
173
174/// Source span for a quotation, supporting multi-line ranges
175#[derive(Debug, Clone, PartialEq, Default)]
176pub struct QuotationSpan {
177    /// Start line (0-indexed)
178    pub start_line: usize,
179    /// Start column (0-indexed)
180    pub start_column: usize,
181    /// End line (0-indexed)
182    pub end_line: usize,
183    /// End column (0-indexed, exclusive)
184    pub end_column: usize,
185}
186
187impl QuotationSpan {
188    pub fn new(start_line: usize, start_column: usize, end_line: usize, end_column: usize) -> Self {
189        QuotationSpan {
190            start_line,
191            start_column,
192            end_line,
193            end_column,
194        }
195    }
196
197    /// Check if a position (line, column) falls within this span
198    pub fn contains(&self, line: usize, column: usize) -> bool {
199        if line < self.start_line || line > self.end_line {
200            return false;
201        }
202        if line == self.start_line && column < self.start_column {
203            return false;
204        }
205        if line == self.end_line && column >= self.end_column {
206            return false;
207        }
208        true
209    }
210}
211
212#[derive(Debug, Clone, PartialEq)]
213pub enum Statement {
214    /// Integer literal: pushes value onto stack
215    IntLiteral(i64),
216
217    /// Floating-point literal: pushes IEEE 754 double onto stack
218    FloatLiteral(f64),
219
220    /// Boolean literal: pushes true/false onto stack
221    BoolLiteral(bool),
222
223    /// String literal: pushes string onto stack
224    StringLiteral(String),
225
226    /// Symbol literal: pushes symbol onto stack
227    /// Syntax: :foo, :some-name, :ok
228    /// Used for dynamic variant construction and SON.
229    /// Note: Symbols are not currently interned (future optimization).
230    Symbol(String),
231
232    /// Word call: calls another word or built-in
233    /// Contains the word name and optional source span for precise diagnostics
234    WordCall { name: String, span: Option<Span> },
235
236    /// Conditional: if/else/then
237    ///
238    /// Pops an integer from the stack (0 = zero, non-zero = non-zero)
239    /// and executes the appropriate branch
240    If {
241        /// Statements to execute when condition is non-zero (the 'then' clause)
242        then_branch: Vec<Statement>,
243        /// Optional statements to execute when condition is zero (the 'else' clause)
244        else_branch: Option<Vec<Statement>>,
245    },
246
247    /// Quotation: [ ... ]
248    ///
249    /// A block of deferred code (quotation/lambda)
250    /// Quotations are first-class values that can be pushed onto the stack
251    /// and executed later with combinators like `call`, `times`, or `while`
252    ///
253    /// The id field is used by the typechecker to track the inferred type
254    /// (Quotation vs Closure) for this quotation. The id is assigned during parsing.
255    /// The span field records the source location for LSP hover support.
256    Quotation {
257        id: usize,
258        body: Vec<Statement>,
259        span: Option<QuotationSpan>,
260    },
261
262    /// Match expression: pattern matching on union types
263    ///
264    /// Pops a union value from the stack and dispatches to the
265    /// appropriate arm based on the variant tag.
266    ///
267    /// Example:
268    /// ```seq
269    /// match
270    ///   Get -> send-response
271    ///   Increment -> do-increment send-response
272    ///   Report -> aggregate-add
273    /// end
274    /// ```
275    Match {
276        /// The match arms in order
277        arms: Vec<MatchArm>,
278    },
279}
280
281impl Program {
282    pub fn new() -> Self {
283        Program {
284            includes: Vec::new(),
285            unions: Vec::new(),
286            words: Vec::new(),
287        }
288    }
289
290    /// Find a union definition by name
291    pub fn find_union(&self, name: &str) -> Option<&UnionDef> {
292        self.unions.iter().find(|u| u.name == name)
293    }
294
295    pub fn find_word(&self, name: &str) -> Option<&WordDef> {
296        self.words.iter().find(|w| w.name == name)
297    }
298
299    /// Validate that all word calls reference either a defined word or a built-in
300    pub fn validate_word_calls(&self) -> Result<(), String> {
301        self.validate_word_calls_with_externals(&[])
302    }
303
304    /// Validate that all word calls reference a defined word, built-in, or external word.
305    ///
306    /// The `external_words` parameter should contain names of words available from
307    /// external sources (e.g., included modules) that should be considered valid.
308    pub fn validate_word_calls_with_externals(
309        &self,
310        external_words: &[&str],
311    ) -> Result<(), String> {
312        // List of known runtime built-ins
313        // IMPORTANT: Keep this in sync with codegen.rs WordCall matching
314        let builtins = [
315            // I/O operations
316            "io.write",
317            "io.write-line",
318            "io.read-line",
319            "io.read-line+",
320            "io.read-n",
321            "int->string",
322            "symbol->string",
323            "string->symbol",
324            // Command-line arguments
325            "args.count",
326            "args.at",
327            // File operations
328            "file.slurp",
329            "file.exists?",
330            "file.for-each-line+",
331            // String operations
332            "string.concat",
333            "string.length",
334            "string.byte-length",
335            "string.char-at",
336            "string.substring",
337            "char->string",
338            "string.find",
339            "string.split",
340            "string.contains",
341            "string.starts-with",
342            "string.empty?",
343            "string.trim",
344            "string.chomp",
345            "string.to-upper",
346            "string.to-lower",
347            "string.equal?",
348            "string.json-escape",
349            "string->int",
350            // Symbol operations
351            "symbol.=",
352            // List operations
353            "list.make",
354            "list.push",
355            "list.get",
356            "list.set",
357            "list.map",
358            "list.filter",
359            "list.fold",
360            "list.each",
361            "list.length",
362            "list.empty?",
363            // Map operations
364            "map.make",
365            "map.get",
366            "map.set",
367            "map.has?",
368            "map.remove",
369            "map.keys",
370            "map.values",
371            "map.size",
372            "map.empty?",
373            // Variant operations
374            "variant.field-count",
375            "variant.tag",
376            "variant.field-at",
377            "variant.append",
378            "variant.last",
379            "variant.init",
380            "variant.make-0",
381            "variant.make-1",
382            "variant.make-2",
383            "variant.make-3",
384            "variant.make-4",
385            // SON wrap aliases
386            "wrap-0",
387            "wrap-1",
388            "wrap-2",
389            "wrap-3",
390            "wrap-4",
391            // Integer arithmetic operations
392            "i.add",
393            "i.subtract",
394            "i.multiply",
395            "i.divide",
396            "i.modulo",
397            // Terse integer arithmetic
398            "i.+",
399            "i.-",
400            "i.*",
401            "i./",
402            "i.%",
403            // Integer comparison operations (return 0 or 1)
404            "i.=",
405            "i.<",
406            "i.>",
407            "i.<=",
408            "i.>=",
409            "i.<>",
410            // Integer comparison operations (verbose form)
411            "i.eq",
412            "i.lt",
413            "i.gt",
414            "i.lte",
415            "i.gte",
416            "i.neq",
417            // Stack operations (simple - no parameters)
418            "dup",
419            "drop",
420            "swap",
421            "over",
422            "rot",
423            "nip",
424            "tuck",
425            "2dup",
426            "3drop",
427            "pick",
428            "roll",
429            // Boolean operations
430            "and",
431            "or",
432            "not",
433            // Bitwise operations
434            "band",
435            "bor",
436            "bxor",
437            "bnot",
438            "shl",
439            "shr",
440            "popcount",
441            "clz",
442            "ctz",
443            "int-bits",
444            // Channel operations
445            "chan.make",
446            "chan.send",
447            "chan.receive",
448            "chan.close",
449            "chan.yield",
450            // Quotation operations
451            "call",
452            "times",
453            "while",
454            "until",
455            "strand.spawn",
456            "strand.weave",
457            "strand.resume",
458            "strand.weave-cancel",
459            "yield",
460            "cond",
461            // TCP operations
462            "tcp.listen",
463            "tcp.accept",
464            "tcp.read",
465            "tcp.write",
466            "tcp.close",
467            // OS operations
468            "os.getenv",
469            "os.home-dir",
470            "os.current-dir",
471            "os.path-exists",
472            "os.path-is-file",
473            "os.path-is-dir",
474            "os.path-join",
475            "os.path-parent",
476            "os.path-filename",
477            "os.exit",
478            "os.name",
479            "os.arch",
480            // Float arithmetic operations (verbose form)
481            "f.add",
482            "f.subtract",
483            "f.multiply",
484            "f.divide",
485            // Float arithmetic operations (terse form)
486            "f.+",
487            "f.-",
488            "f.*",
489            "f./",
490            // Float comparison operations (symbol form)
491            "f.=",
492            "f.<",
493            "f.>",
494            "f.<=",
495            "f.>=",
496            "f.<>",
497            // Float comparison operations (verbose form)
498            "f.eq",
499            "f.lt",
500            "f.gt",
501            "f.lte",
502            "f.gte",
503            "f.neq",
504            // Type conversions
505            "int->float",
506            "float->int",
507            "float->string",
508            "string->float",
509            // Test framework operations
510            "test.init",
511            "test.finish",
512            "test.has-failures",
513            "test.assert",
514            "test.assert-not",
515            "test.assert-eq",
516            "test.assert-eq-str",
517            "test.fail",
518            "test.pass-count",
519            "test.fail-count",
520            // Time operations
521            "time.now",
522            "time.nanos",
523            "time.sleep-ms",
524            // SON serialization
525            "son.dump",
526            "son.dump-pretty",
527            // Stack introspection (for REPL)
528            "stack.dump",
529        ];
530
531        for word in &self.words {
532            self.validate_statements(&word.body, &word.name, &builtins, external_words)?;
533        }
534
535        Ok(())
536    }
537
538    /// Helper to validate word calls in a list of statements (recursively)
539    fn validate_statements(
540        &self,
541        statements: &[Statement],
542        word_name: &str,
543        builtins: &[&str],
544        external_words: &[&str],
545    ) -> Result<(), String> {
546        for statement in statements {
547            match statement {
548                Statement::WordCall { name, .. } => {
549                    // Check if it's a built-in
550                    if builtins.contains(&name.as_str()) {
551                        continue;
552                    }
553                    // Check if it's a user-defined word
554                    if self.find_word(name).is_some() {
555                        continue;
556                    }
557                    // Check if it's an external word (from includes)
558                    if external_words.contains(&name.as_str()) {
559                        continue;
560                    }
561                    // Undefined word!
562                    return Err(format!(
563                        "Undefined word '{}' called in word '{}'. \
564                         Did you forget to define it or misspell a built-in?",
565                        name, word_name
566                    ));
567                }
568                Statement::If {
569                    then_branch,
570                    else_branch,
571                } => {
572                    // Recursively validate both branches
573                    self.validate_statements(then_branch, word_name, builtins, external_words)?;
574                    if let Some(eb) = else_branch {
575                        self.validate_statements(eb, word_name, builtins, external_words)?;
576                    }
577                }
578                Statement::Quotation { body, .. } => {
579                    // Recursively validate quotation body
580                    self.validate_statements(body, word_name, builtins, external_words)?;
581                }
582                Statement::Match { arms } => {
583                    // Recursively validate each match arm's body
584                    for arm in arms {
585                        self.validate_statements(&arm.body, word_name, builtins, external_words)?;
586                    }
587                }
588                _ => {} // Literals don't need validation
589            }
590        }
591        Ok(())
592    }
593
594    /// Generate constructor words for all union definitions
595    ///
596    /// Maximum number of fields a variant can have (limited by runtime support)
597    pub const MAX_VARIANT_FIELDS: usize = 4;
598
599    /// For each union variant, generates a `Make-VariantName` word that:
600    /// 1. Takes the variant's field values from the stack
601    /// 2. Pushes the variant tag (index)
602    /// 3. Calls the appropriate `variant.make-N` builtin
603    ///
604    /// Example: For `union Message { Get { chan: Int } }`
605    /// Generates: `: Make-Get ( Int -- Message ) 0 variant.make-1 ;`
606    ///
607    /// Returns an error if any variant exceeds the maximum field count.
608    pub fn generate_constructors(&mut self) -> Result<(), String> {
609        let mut new_words = Vec::new();
610
611        for union_def in &self.unions {
612            for variant in &union_def.variants {
613                let constructor_name = format!("Make-{}", variant.name);
614                let field_count = variant.fields.len();
615
616                // Check field count limit before generating constructor
617                if field_count > Self::MAX_VARIANT_FIELDS {
618                    return Err(format!(
619                        "Variant '{}' in union '{}' has {} fields, but the maximum is {}. \
620                         Consider grouping fields into nested union types.",
621                        variant.name,
622                        union_def.name,
623                        field_count,
624                        Self::MAX_VARIANT_FIELDS
625                    ));
626                }
627
628                // Build the stack effect: ( field_types... -- UnionType )
629                // Input stack has fields in declaration order
630                let mut input_stack = StackType::RowVar("a".to_string());
631                for field in &variant.fields {
632                    let field_type = parse_type_name(&field.type_name);
633                    input_stack = input_stack.push(field_type);
634                }
635
636                // Output stack has the union type
637                let output_stack =
638                    StackType::RowVar("a".to_string()).push(Type::Union(union_def.name.clone()));
639
640                let effect = Effect::new(input_stack, output_stack);
641
642                // Build the body:
643                // 1. Push the variant name as a symbol (for dynamic matching)
644                // 2. Call variant.make-N which now accepts Symbol tags
645                let body = vec![
646                    Statement::Symbol(variant.name.clone()),
647                    Statement::WordCall {
648                        name: format!("variant.make-{}", field_count),
649                        span: None, // Generated code, no source span
650                    },
651                ];
652
653                new_words.push(WordDef {
654                    name: constructor_name,
655                    effect: Some(effect),
656                    body,
657                    source: variant.source.clone(),
658                });
659            }
660        }
661
662        self.words.extend(new_words);
663        Ok(())
664    }
665}
666
667/// Parse a type name string into a Type
668/// Used by constructor generation to build stack effects
669fn parse_type_name(name: &str) -> Type {
670    match name {
671        "Int" => Type::Int,
672        "Float" => Type::Float,
673        "Bool" => Type::Bool,
674        "String" => Type::String,
675        "Channel" => Type::Channel,
676        other => Type::Union(other.to_string()),
677    }
678}
679
680impl Default for Program {
681    fn default() -> Self {
682        Self::new()
683    }
684}
685
686#[cfg(test)]
687mod tests {
688    use super::*;
689
690    #[test]
691    fn test_validate_builtin_words() {
692        let program = Program {
693            includes: vec![],
694            unions: vec![],
695            words: vec![WordDef {
696                name: "main".to_string(),
697                effect: None,
698                body: vec![
699                    Statement::IntLiteral(2),
700                    Statement::IntLiteral(3),
701                    Statement::WordCall {
702                        name: "i.add".to_string(),
703                        span: None,
704                    },
705                    Statement::WordCall {
706                        name: "io.write-line".to_string(),
707                        span: None,
708                    },
709                ],
710                source: None,
711            }],
712        };
713
714        // Should succeed - i.add and io.write-line are built-ins
715        assert!(program.validate_word_calls().is_ok());
716    }
717
718    #[test]
719    fn test_validate_user_defined_words() {
720        let program = Program {
721            includes: vec![],
722            unions: vec![],
723            words: vec![
724                WordDef {
725                    name: "helper".to_string(),
726                    effect: None,
727                    body: vec![Statement::IntLiteral(42)],
728                    source: None,
729                },
730                WordDef {
731                    name: "main".to_string(),
732                    effect: None,
733                    body: vec![Statement::WordCall {
734                        name: "helper".to_string(),
735                        span: None,
736                    }],
737                    source: None,
738                },
739            ],
740        };
741
742        // Should succeed - helper is defined
743        assert!(program.validate_word_calls().is_ok());
744    }
745
746    #[test]
747    fn test_validate_undefined_word() {
748        let program = Program {
749            includes: vec![],
750            unions: vec![],
751            words: vec![WordDef {
752                name: "main".to_string(),
753                effect: None,
754                body: vec![Statement::WordCall {
755                    name: "undefined_word".to_string(),
756                    span: None,
757                }],
758                source: None,
759            }],
760        };
761
762        // Should fail - undefined_word is not a built-in or user-defined word
763        let result = program.validate_word_calls();
764        assert!(result.is_err());
765        let error = result.unwrap_err();
766        assert!(error.contains("undefined_word"));
767        assert!(error.contains("main"));
768    }
769
770    #[test]
771    fn test_validate_misspelled_builtin() {
772        let program = Program {
773            includes: vec![],
774            unions: vec![],
775            words: vec![WordDef {
776                name: "main".to_string(),
777                effect: None,
778                body: vec![Statement::WordCall {
779                    name: "wrte_line".to_string(),
780                    span: None,
781                }], // typo
782                source: None,
783            }],
784        };
785
786        // Should fail with helpful message
787        let result = program.validate_word_calls();
788        assert!(result.is_err());
789        let error = result.unwrap_err();
790        assert!(error.contains("wrte_line"));
791        assert!(error.contains("misspell"));
792    }
793
794    #[test]
795    fn test_generate_constructors() {
796        let mut program = Program {
797            includes: vec![],
798            unions: vec![UnionDef {
799                name: "Message".to_string(),
800                variants: vec![
801                    UnionVariant {
802                        name: "Get".to_string(),
803                        fields: vec![UnionField {
804                            name: "response-chan".to_string(),
805                            type_name: "Int".to_string(),
806                        }],
807                        source: None,
808                    },
809                    UnionVariant {
810                        name: "Put".to_string(),
811                        fields: vec![
812                            UnionField {
813                                name: "value".to_string(),
814                                type_name: "String".to_string(),
815                            },
816                            UnionField {
817                                name: "response-chan".to_string(),
818                                type_name: "Int".to_string(),
819                            },
820                        ],
821                        source: None,
822                    },
823                ],
824                source: None,
825            }],
826            words: vec![],
827        };
828
829        // Generate constructors
830        program.generate_constructors().unwrap();
831
832        // Should have 2 constructor words
833        assert_eq!(program.words.len(), 2);
834
835        // Check Make-Get constructor
836        let make_get = program
837            .find_word("Make-Get")
838            .expect("Make-Get should exist");
839        assert_eq!(make_get.name, "Make-Get");
840        assert!(make_get.effect.is_some());
841        let effect = make_get.effect.as_ref().unwrap();
842        // Input: ( ..a Int -- )
843        // Output: ( ..a Message -- )
844        assert_eq!(
845            format!("{:?}", effect.outputs),
846            "Cons { rest: RowVar(\"a\"), top: Union(\"Message\") }"
847        );
848
849        // Check Make-Put constructor
850        let make_put = program
851            .find_word("Make-Put")
852            .expect("Make-Put should exist");
853        assert_eq!(make_put.name, "Make-Put");
854        assert!(make_put.effect.is_some());
855
856        // Check the body generates correct code
857        // Make-Get should be: :Get variant.make-1
858        assert_eq!(make_get.body.len(), 2);
859        match &make_get.body[0] {
860            Statement::Symbol(s) if s == "Get" => {}
861            other => panic!("Expected Symbol(\"Get\") for variant tag, got {:?}", other),
862        }
863        match &make_get.body[1] {
864            Statement::WordCall { name, span: None } if name == "variant.make-1" => {}
865            _ => panic!("Expected WordCall(variant.make-1)"),
866        }
867
868        // Make-Put should be: :Put variant.make-2
869        assert_eq!(make_put.body.len(), 2);
870        match &make_put.body[0] {
871            Statement::Symbol(s) if s == "Put" => {}
872            other => panic!("Expected Symbol(\"Put\") for variant tag, got {:?}", other),
873        }
874        match &make_put.body[1] {
875            Statement::WordCall { name, span: None } if name == "variant.make-2" => {}
876            _ => panic!("Expected WordCall(variant.make-2)"),
877        }
878    }
879}