seqc/ast.rs
1//! Abstract Syntax Tree for Seq
2//!
3//! Minimal AST sufficient for hello-world and basic programs.
4//! Will be extended as we add more language features.
5
6use crate::types::{Effect, StackType, Type};
7use std::path::PathBuf;
8
9/// Source location for error reporting and tooling
10#[derive(Debug, Clone, PartialEq)]
11pub struct SourceLocation {
12 pub file: PathBuf,
13 /// Start line (0-indexed for LSP compatibility)
14 pub start_line: usize,
15 /// End line (0-indexed, inclusive)
16 pub end_line: usize,
17}
18
19impl SourceLocation {
20 /// Create a new source location with just a single line (for backward compatibility)
21 pub fn new(file: PathBuf, line: usize) -> Self {
22 SourceLocation {
23 file,
24 start_line: line,
25 end_line: line,
26 }
27 }
28
29 /// Create a source location spanning multiple lines
30 pub fn span(file: PathBuf, start_line: usize, end_line: usize) -> Self {
31 debug_assert!(
32 start_line <= end_line,
33 "SourceLocation: start_line ({}) must be <= end_line ({})",
34 start_line,
35 end_line
36 );
37 SourceLocation {
38 file,
39 start_line,
40 end_line,
41 }
42 }
43
44 /// Get the line number (for backward compatibility, returns start_line)
45 pub fn line(&self) -> usize {
46 self.start_line
47 }
48}
49
50impl std::fmt::Display for SourceLocation {
51 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
52 if self.start_line == self.end_line {
53 write!(f, "{}:{}", self.file.display(), self.start_line + 1)
54 } else {
55 write!(
56 f,
57 "{}:{}-{}",
58 self.file.display(),
59 self.start_line + 1,
60 self.end_line + 1
61 )
62 }
63 }
64}
65
66/// Include statement
67#[derive(Debug, Clone, PartialEq)]
68pub enum Include {
69 /// Standard library include: `include std:http`
70 Std(String),
71 /// Relative path include: `include "my-utils"`
72 Relative(String),
73 /// FFI library include: `include ffi:readline`
74 Ffi(String),
75}
76
77// ============================================================================
78// ALGEBRAIC DATA TYPES (ADTs)
79// ============================================================================
80
81/// A field in a union variant
82/// Example: `response-chan: Int`
83#[derive(Debug, Clone, PartialEq)]
84pub struct UnionField {
85 pub name: String,
86 pub type_name: String, // For now, just store the type name as string
87}
88
89/// A variant in a union type
90/// Example: `Get { response-chan: Int }`
91#[derive(Debug, Clone, PartialEq)]
92pub struct UnionVariant {
93 pub name: String,
94 pub fields: Vec<UnionField>,
95 pub source: Option<SourceLocation>,
96}
97
98/// A union type definition
99/// Example:
100/// ```seq
101/// union Message {
102/// Get { response-chan: Int }
103/// Increment { response-chan: Int }
104/// Report { op: Int, delta: Int, total: Int }
105/// }
106/// ```
107#[derive(Debug, Clone, PartialEq)]
108pub struct UnionDef {
109 pub name: String,
110 pub variants: Vec<UnionVariant>,
111 pub source: Option<SourceLocation>,
112}
113
114/// A pattern in a match expression
115/// For Phase 1: just the variant name (stack-based matching)
116/// Later phases will add field bindings: `Get { chan }`
117#[derive(Debug, Clone, PartialEq)]
118pub enum Pattern {
119 /// Match a variant by name, pushing all fields to stack
120 /// Example: `Get ->` pushes response-chan to stack
121 Variant(String),
122
123 /// Match a variant with named field bindings (Phase 5)
124 /// Example: `Get { chan } ->` binds chan to the response-chan field
125 VariantWithBindings { name: String, bindings: Vec<String> },
126}
127
128/// A single arm in a match expression
129#[derive(Debug, Clone, PartialEq)]
130pub struct MatchArm {
131 pub pattern: Pattern,
132 pub body: Vec<Statement>,
133}
134
135#[derive(Debug, Clone, PartialEq)]
136pub struct Program {
137 pub includes: Vec<Include>,
138 pub unions: Vec<UnionDef>,
139 pub words: Vec<WordDef>,
140}
141
142#[derive(Debug, Clone, PartialEq)]
143pub struct WordDef {
144 pub name: String,
145 /// Optional stack effect declaration
146 /// Example: ( ..a Int -- ..a Bool )
147 pub effect: Option<Effect>,
148 pub body: Vec<Statement>,
149 /// Source location for error reporting (collision detection)
150 pub source: Option<SourceLocation>,
151}
152
153/// Source span for a single token or expression
154#[derive(Debug, Clone, PartialEq, Default)]
155pub struct Span {
156 /// Line number (0-indexed)
157 pub line: usize,
158 /// Start column (0-indexed)
159 pub column: usize,
160 /// Length of the span in characters
161 pub length: usize,
162}
163
164impl Span {
165 pub fn new(line: usize, column: usize, length: usize) -> Self {
166 Span {
167 line,
168 column,
169 length,
170 }
171 }
172}
173
174/// Source span for a quotation, supporting multi-line ranges
175#[derive(Debug, Clone, PartialEq, Default)]
176pub struct QuotationSpan {
177 /// Start line (0-indexed)
178 pub start_line: usize,
179 /// Start column (0-indexed)
180 pub start_column: usize,
181 /// End line (0-indexed)
182 pub end_line: usize,
183 /// End column (0-indexed, exclusive)
184 pub end_column: usize,
185}
186
187impl QuotationSpan {
188 pub fn new(start_line: usize, start_column: usize, end_line: usize, end_column: usize) -> Self {
189 QuotationSpan {
190 start_line,
191 start_column,
192 end_line,
193 end_column,
194 }
195 }
196
197 /// Check if a position (line, column) falls within this span
198 pub fn contains(&self, line: usize, column: usize) -> bool {
199 if line < self.start_line || line > self.end_line {
200 return false;
201 }
202 if line == self.start_line && column < self.start_column {
203 return false;
204 }
205 if line == self.end_line && column >= self.end_column {
206 return false;
207 }
208 true
209 }
210}
211
212#[derive(Debug, Clone, PartialEq)]
213pub enum Statement {
214 /// Integer literal: pushes value onto stack
215 IntLiteral(i64),
216
217 /// Floating-point literal: pushes IEEE 754 double onto stack
218 FloatLiteral(f64),
219
220 /// Boolean literal: pushes true/false onto stack
221 BoolLiteral(bool),
222
223 /// String literal: pushes string onto stack
224 StringLiteral(String),
225
226 /// Symbol literal: pushes symbol onto stack
227 /// Syntax: :foo, :some-name, :ok
228 /// Used for dynamic variant construction and SON.
229 /// Note: Symbols are not currently interned (future optimization).
230 Symbol(String),
231
232 /// Word call: calls another word or built-in
233 /// Contains the word name and optional source span for precise diagnostics
234 WordCall { name: String, span: Option<Span> },
235
236 /// Conditional: if/else/then
237 ///
238 /// Pops an integer from the stack (0 = zero, non-zero = non-zero)
239 /// and executes the appropriate branch
240 If {
241 /// Statements to execute when condition is non-zero (the 'then' clause)
242 then_branch: Vec<Statement>,
243 /// Optional statements to execute when condition is zero (the 'else' clause)
244 else_branch: Option<Vec<Statement>>,
245 },
246
247 /// Quotation: [ ... ]
248 ///
249 /// A block of deferred code (quotation/lambda)
250 /// Quotations are first-class values that can be pushed onto the stack
251 /// and executed later with combinators like `call`, `times`, or `while`
252 ///
253 /// The id field is used by the typechecker to track the inferred type
254 /// (Quotation vs Closure) for this quotation. The id is assigned during parsing.
255 /// The span field records the source location for LSP hover support.
256 Quotation {
257 id: usize,
258 body: Vec<Statement>,
259 span: Option<QuotationSpan>,
260 },
261
262 /// Match expression: pattern matching on union types
263 ///
264 /// Pops a union value from the stack and dispatches to the
265 /// appropriate arm based on the variant tag.
266 ///
267 /// Example:
268 /// ```seq
269 /// match
270 /// Get -> send-response
271 /// Increment -> do-increment send-response
272 /// Report -> aggregate-add
273 /// end
274 /// ```
275 Match {
276 /// The match arms in order
277 arms: Vec<MatchArm>,
278 },
279}
280
281impl Program {
282 pub fn new() -> Self {
283 Program {
284 includes: Vec::new(),
285 unions: Vec::new(),
286 words: Vec::new(),
287 }
288 }
289
290 /// Find a union definition by name
291 pub fn find_union(&self, name: &str) -> Option<&UnionDef> {
292 self.unions.iter().find(|u| u.name == name)
293 }
294
295 pub fn find_word(&self, name: &str) -> Option<&WordDef> {
296 self.words.iter().find(|w| w.name == name)
297 }
298
299 /// Validate that all word calls reference either a defined word or a built-in
300 pub fn validate_word_calls(&self) -> Result<(), String> {
301 self.validate_word_calls_with_externals(&[])
302 }
303
304 /// Validate that all word calls reference a defined word, built-in, or external word.
305 ///
306 /// The `external_words` parameter should contain names of words available from
307 /// external sources (e.g., included modules) that should be considered valid.
308 pub fn validate_word_calls_with_externals(
309 &self,
310 external_words: &[&str],
311 ) -> Result<(), String> {
312 // List of known runtime built-ins
313 // IMPORTANT: Keep this in sync with codegen.rs WordCall matching
314 let builtins = [
315 // I/O operations
316 "io.write",
317 "io.write-line",
318 "io.read-line",
319 "io.read-line+",
320 "io.read-n",
321 "int->string",
322 "symbol->string",
323 "string->symbol",
324 // Command-line arguments
325 "args.count",
326 "args.at",
327 // File operations
328 "file.slurp",
329 "file.exists?",
330 "file.for-each-line+",
331 // String operations
332 "string.concat",
333 "string.length",
334 "string.byte-length",
335 "string.char-at",
336 "string.substring",
337 "char->string",
338 "string.find",
339 "string.split",
340 "string.contains",
341 "string.starts-with",
342 "string.empty?",
343 "string.trim",
344 "string.chomp",
345 "string.to-upper",
346 "string.to-lower",
347 "string.equal?",
348 "string.json-escape",
349 "string->int",
350 // Symbol operations
351 "symbol.=",
352 // List operations
353 "list.make",
354 "list.push",
355 "list.get",
356 "list.set",
357 "list.map",
358 "list.filter",
359 "list.fold",
360 "list.each",
361 "list.length",
362 "list.empty?",
363 // Map operations
364 "map.make",
365 "map.get",
366 "map.set",
367 "map.has?",
368 "map.remove",
369 "map.keys",
370 "map.values",
371 "map.size",
372 "map.empty?",
373 // Variant operations
374 "variant.field-count",
375 "variant.tag",
376 "variant.field-at",
377 "variant.append",
378 "variant.last",
379 "variant.init",
380 "variant.make-0",
381 "variant.make-1",
382 "variant.make-2",
383 "variant.make-3",
384 "variant.make-4",
385 // SON wrap aliases
386 "wrap-0",
387 "wrap-1",
388 "wrap-2",
389 "wrap-3",
390 "wrap-4",
391 // Integer arithmetic operations
392 "i.add",
393 "i.subtract",
394 "i.multiply",
395 "i.divide",
396 "i.modulo",
397 // Terse integer arithmetic
398 "i.+",
399 "i.-",
400 "i.*",
401 "i./",
402 "i.%",
403 // Integer comparison operations (return 0 or 1)
404 "i.=",
405 "i.<",
406 "i.>",
407 "i.<=",
408 "i.>=",
409 "i.<>",
410 // Integer comparison operations (verbose form)
411 "i.eq",
412 "i.lt",
413 "i.gt",
414 "i.lte",
415 "i.gte",
416 "i.neq",
417 // Stack operations (simple - no parameters)
418 "dup",
419 "drop",
420 "swap",
421 "over",
422 "rot",
423 "nip",
424 "tuck",
425 "2dup",
426 "3drop",
427 "pick",
428 "roll",
429 // Boolean operations
430 "and",
431 "or",
432 "not",
433 // Bitwise operations
434 "band",
435 "bor",
436 "bxor",
437 "bnot",
438 "shl",
439 "shr",
440 "popcount",
441 "clz",
442 "ctz",
443 "int-bits",
444 // Channel operations
445 "chan.make",
446 "chan.send",
447 "chan.receive",
448 "chan.close",
449 "chan.yield",
450 // Quotation operations
451 "call",
452 "times",
453 "while",
454 "until",
455 "strand.spawn",
456 "strand.weave",
457 "strand.resume",
458 "strand.weave-cancel",
459 "yield",
460 "cond",
461 // TCP operations
462 "tcp.listen",
463 "tcp.accept",
464 "tcp.read",
465 "tcp.write",
466 "tcp.close",
467 // OS operations
468 "os.getenv",
469 "os.home-dir",
470 "os.current-dir",
471 "os.path-exists",
472 "os.path-is-file",
473 "os.path-is-dir",
474 "os.path-join",
475 "os.path-parent",
476 "os.path-filename",
477 "os.exit",
478 "os.name",
479 "os.arch",
480 // Float arithmetic operations (verbose form)
481 "f.add",
482 "f.subtract",
483 "f.multiply",
484 "f.divide",
485 // Float arithmetic operations (terse form)
486 "f.+",
487 "f.-",
488 "f.*",
489 "f./",
490 // Float comparison operations (symbol form)
491 "f.=",
492 "f.<",
493 "f.>",
494 "f.<=",
495 "f.>=",
496 "f.<>",
497 // Float comparison operations (verbose form)
498 "f.eq",
499 "f.lt",
500 "f.gt",
501 "f.lte",
502 "f.gte",
503 "f.neq",
504 // Type conversions
505 "int->float",
506 "float->int",
507 "float->string",
508 "string->float",
509 // Test framework operations
510 "test.init",
511 "test.finish",
512 "test.has-failures",
513 "test.assert",
514 "test.assert-not",
515 "test.assert-eq",
516 "test.assert-eq-str",
517 "test.fail",
518 "test.pass-count",
519 "test.fail-count",
520 // Time operations
521 "time.now",
522 "time.nanos",
523 "time.sleep-ms",
524 // SON serialization
525 "son.dump",
526 "son.dump-pretty",
527 // Stack introspection (for REPL)
528 "stack.dump",
529 ];
530
531 for word in &self.words {
532 self.validate_statements(&word.body, &word.name, &builtins, external_words)?;
533 }
534
535 Ok(())
536 }
537
538 /// Helper to validate word calls in a list of statements (recursively)
539 fn validate_statements(
540 &self,
541 statements: &[Statement],
542 word_name: &str,
543 builtins: &[&str],
544 external_words: &[&str],
545 ) -> Result<(), String> {
546 for statement in statements {
547 match statement {
548 Statement::WordCall { name, .. } => {
549 // Check if it's a built-in
550 if builtins.contains(&name.as_str()) {
551 continue;
552 }
553 // Check if it's a user-defined word
554 if self.find_word(name).is_some() {
555 continue;
556 }
557 // Check if it's an external word (from includes)
558 if external_words.contains(&name.as_str()) {
559 continue;
560 }
561 // Undefined word!
562 return Err(format!(
563 "Undefined word '{}' called in word '{}'. \
564 Did you forget to define it or misspell a built-in?",
565 name, word_name
566 ));
567 }
568 Statement::If {
569 then_branch,
570 else_branch,
571 } => {
572 // Recursively validate both branches
573 self.validate_statements(then_branch, word_name, builtins, external_words)?;
574 if let Some(eb) = else_branch {
575 self.validate_statements(eb, word_name, builtins, external_words)?;
576 }
577 }
578 Statement::Quotation { body, .. } => {
579 // Recursively validate quotation body
580 self.validate_statements(body, word_name, builtins, external_words)?;
581 }
582 Statement::Match { arms } => {
583 // Recursively validate each match arm's body
584 for arm in arms {
585 self.validate_statements(&arm.body, word_name, builtins, external_words)?;
586 }
587 }
588 _ => {} // Literals don't need validation
589 }
590 }
591 Ok(())
592 }
593
594 /// Generate constructor words for all union definitions
595 ///
596 /// Maximum number of fields a variant can have (limited by runtime support)
597 pub const MAX_VARIANT_FIELDS: usize = 4;
598
599 /// For each union variant, generates a `Make-VariantName` word that:
600 /// 1. Takes the variant's field values from the stack
601 /// 2. Pushes the variant tag (index)
602 /// 3. Calls the appropriate `variant.make-N` builtin
603 ///
604 /// Example: For `union Message { Get { chan: Int } }`
605 /// Generates: `: Make-Get ( Int -- Message ) 0 variant.make-1 ;`
606 ///
607 /// Returns an error if any variant exceeds the maximum field count.
608 pub fn generate_constructors(&mut self) -> Result<(), String> {
609 let mut new_words = Vec::new();
610
611 for union_def in &self.unions {
612 for variant in &union_def.variants {
613 let constructor_name = format!("Make-{}", variant.name);
614 let field_count = variant.fields.len();
615
616 // Check field count limit before generating constructor
617 if field_count > Self::MAX_VARIANT_FIELDS {
618 return Err(format!(
619 "Variant '{}' in union '{}' has {} fields, but the maximum is {}. \
620 Consider grouping fields into nested union types.",
621 variant.name,
622 union_def.name,
623 field_count,
624 Self::MAX_VARIANT_FIELDS
625 ));
626 }
627
628 // Build the stack effect: ( field_types... -- UnionType )
629 // Input stack has fields in declaration order
630 let mut input_stack = StackType::RowVar("a".to_string());
631 for field in &variant.fields {
632 let field_type = parse_type_name(&field.type_name);
633 input_stack = input_stack.push(field_type);
634 }
635
636 // Output stack has the union type
637 let output_stack =
638 StackType::RowVar("a".to_string()).push(Type::Union(union_def.name.clone()));
639
640 let effect = Effect::new(input_stack, output_stack);
641
642 // Build the body:
643 // 1. Push the variant name as a symbol (for dynamic matching)
644 // 2. Call variant.make-N which now accepts Symbol tags
645 let body = vec![
646 Statement::Symbol(variant.name.clone()),
647 Statement::WordCall {
648 name: format!("variant.make-{}", field_count),
649 span: None, // Generated code, no source span
650 },
651 ];
652
653 new_words.push(WordDef {
654 name: constructor_name,
655 effect: Some(effect),
656 body,
657 source: variant.source.clone(),
658 });
659 }
660 }
661
662 self.words.extend(new_words);
663 Ok(())
664 }
665}
666
667/// Parse a type name string into a Type
668/// Used by constructor generation to build stack effects
669fn parse_type_name(name: &str) -> Type {
670 match name {
671 "Int" => Type::Int,
672 "Float" => Type::Float,
673 "Bool" => Type::Bool,
674 "String" => Type::String,
675 "Channel" => Type::Channel,
676 other => Type::Union(other.to_string()),
677 }
678}
679
680impl Default for Program {
681 fn default() -> Self {
682 Self::new()
683 }
684}
685
686#[cfg(test)]
687mod tests {
688 use super::*;
689
690 #[test]
691 fn test_validate_builtin_words() {
692 let program = Program {
693 includes: vec![],
694 unions: vec![],
695 words: vec![WordDef {
696 name: "main".to_string(),
697 effect: None,
698 body: vec![
699 Statement::IntLiteral(2),
700 Statement::IntLiteral(3),
701 Statement::WordCall {
702 name: "i.add".to_string(),
703 span: None,
704 },
705 Statement::WordCall {
706 name: "io.write-line".to_string(),
707 span: None,
708 },
709 ],
710 source: None,
711 }],
712 };
713
714 // Should succeed - i.add and io.write-line are built-ins
715 assert!(program.validate_word_calls().is_ok());
716 }
717
718 #[test]
719 fn test_validate_user_defined_words() {
720 let program = Program {
721 includes: vec![],
722 unions: vec![],
723 words: vec![
724 WordDef {
725 name: "helper".to_string(),
726 effect: None,
727 body: vec![Statement::IntLiteral(42)],
728 source: None,
729 },
730 WordDef {
731 name: "main".to_string(),
732 effect: None,
733 body: vec![Statement::WordCall {
734 name: "helper".to_string(),
735 span: None,
736 }],
737 source: None,
738 },
739 ],
740 };
741
742 // Should succeed - helper is defined
743 assert!(program.validate_word_calls().is_ok());
744 }
745
746 #[test]
747 fn test_validate_undefined_word() {
748 let program = Program {
749 includes: vec![],
750 unions: vec![],
751 words: vec![WordDef {
752 name: "main".to_string(),
753 effect: None,
754 body: vec![Statement::WordCall {
755 name: "undefined_word".to_string(),
756 span: None,
757 }],
758 source: None,
759 }],
760 };
761
762 // Should fail - undefined_word is not a built-in or user-defined word
763 let result = program.validate_word_calls();
764 assert!(result.is_err());
765 let error = result.unwrap_err();
766 assert!(error.contains("undefined_word"));
767 assert!(error.contains("main"));
768 }
769
770 #[test]
771 fn test_validate_misspelled_builtin() {
772 let program = Program {
773 includes: vec![],
774 unions: vec![],
775 words: vec![WordDef {
776 name: "main".to_string(),
777 effect: None,
778 body: vec![Statement::WordCall {
779 name: "wrte_line".to_string(),
780 span: None,
781 }], // typo
782 source: None,
783 }],
784 };
785
786 // Should fail with helpful message
787 let result = program.validate_word_calls();
788 assert!(result.is_err());
789 let error = result.unwrap_err();
790 assert!(error.contains("wrte_line"));
791 assert!(error.contains("misspell"));
792 }
793
794 #[test]
795 fn test_generate_constructors() {
796 let mut program = Program {
797 includes: vec![],
798 unions: vec![UnionDef {
799 name: "Message".to_string(),
800 variants: vec![
801 UnionVariant {
802 name: "Get".to_string(),
803 fields: vec![UnionField {
804 name: "response-chan".to_string(),
805 type_name: "Int".to_string(),
806 }],
807 source: None,
808 },
809 UnionVariant {
810 name: "Put".to_string(),
811 fields: vec![
812 UnionField {
813 name: "value".to_string(),
814 type_name: "String".to_string(),
815 },
816 UnionField {
817 name: "response-chan".to_string(),
818 type_name: "Int".to_string(),
819 },
820 ],
821 source: None,
822 },
823 ],
824 source: None,
825 }],
826 words: vec![],
827 };
828
829 // Generate constructors
830 program.generate_constructors().unwrap();
831
832 // Should have 2 constructor words
833 assert_eq!(program.words.len(), 2);
834
835 // Check Make-Get constructor
836 let make_get = program
837 .find_word("Make-Get")
838 .expect("Make-Get should exist");
839 assert_eq!(make_get.name, "Make-Get");
840 assert!(make_get.effect.is_some());
841 let effect = make_get.effect.as_ref().unwrap();
842 // Input: ( ..a Int -- )
843 // Output: ( ..a Message -- )
844 assert_eq!(
845 format!("{:?}", effect.outputs),
846 "Cons { rest: RowVar(\"a\"), top: Union(\"Message\") }"
847 );
848
849 // Check Make-Put constructor
850 let make_put = program
851 .find_word("Make-Put")
852 .expect("Make-Put should exist");
853 assert_eq!(make_put.name, "Make-Put");
854 assert!(make_put.effect.is_some());
855
856 // Check the body generates correct code
857 // Make-Get should be: :Get variant.make-1
858 assert_eq!(make_get.body.len(), 2);
859 match &make_get.body[0] {
860 Statement::Symbol(s) if s == "Get" => {}
861 other => panic!("Expected Symbol(\"Get\") for variant tag, got {:?}", other),
862 }
863 match &make_get.body[1] {
864 Statement::WordCall { name, span: None } if name == "variant.make-1" => {}
865 _ => panic!("Expected WordCall(variant.make-1)"),
866 }
867
868 // Make-Put should be: :Put variant.make-2
869 assert_eq!(make_put.body.len(), 2);
870 match &make_put.body[0] {
871 Statement::Symbol(s) if s == "Put" => {}
872 other => panic!("Expected Symbol(\"Put\") for variant tag, got {:?}", other),
873 }
874 match &make_put.body[1] {
875 Statement::WordCall { name, span: None } if name == "variant.make-2" => {}
876 _ => panic!("Expected WordCall(variant.make-2)"),
877 }
878 }
879}