seqc/codegen/
mod.rs

1//! LLVM IR Code Generation
2//!
3//! This module generates LLVM IR as text (.ll files) for Seq programs.
4//! The code generation is split into focused submodules for maintainability.
5//!
6//! # Key Concepts
7//!
8//! ## Value Representation
9//!
10//! All Seq values use the `%Value` type, a 40-byte Rust enum with `#[repr(C)]`.
11//! Layout: `{ i64, i64, i64, i64, i64 }` (discriminant + largest variant payload).
12//! This fixed size allows pass-by-value, required for Alpine/musl compatibility.
13//!
14//! ## Calling Conventions
15//!
16//! - **User-defined words**: Use `tailcc` (tail call convention) to enable TCO.
17//!   Each word has two functions: a C-convention wrapper (`seq_word_*`) for
18//!   external calls and a `tailcc` implementation (`seq_word_*_impl`) for
19//!   internal calls that can use `musttail`.
20//!
21//! - **Runtime functions**: Use C convention (`ccc`). Declared in `runtime.rs`.
22//!
23//! - **Quotations**: Use C convention. Quotations are first-class functions that
24//!   capture their environment. They have wrapper/impl pairs but currently don't
25//!   support TCO due to closure complexity.
26//!
27//! ## Virtual Stack Optimization
28//!
29//! The top N values (default 4) are kept in SSA virtual registers instead of
30//! memory. This avoids store/load overhead for common patterns like `2 3 i.+`.
31//! Values are "spilled" to the memory stack at control flow points (if/else,
32//! loops) and function calls. See `virtual_stack.rs` and `VirtualValue` in
33//! `state.rs`.
34//!
35//! ## Tail Call Optimization (TCO)
36//!
37//! Word calls in tail position use LLVM's `musttail` for guaranteed TCO.
38//! A call is in tail position when it's the last operation before return.
39//! TCO is disabled in these contexts:
40//! - Inside `main` (uses C convention for entry point)
41//! - Inside quotations (closure semantics require stack frames)
42//! - Inside closures that capture variables
43//!
44//! ## Quotations and Closures
45//!
46//! Quotations (`[ ... ]`) compile to function pointers pushed onto the stack.
47//! - **Pure quotations**: No captured variables, just a function pointer.
48//! - **Closures**: Capture variables from enclosing scope. The runtime allocates
49//!   a closure struct containing the function pointer and captured values.
50//!
51//! Each quotation generates a wrapper function (C convention, for `call` builtin)
52//! and an impl function. Closure captures are analyzed at compile time by
53//! `capture_analysis.rs`.
54//!
55//! # Module Structure
56//!
57//! - `state.rs`: Core types (CodeGen, VirtualValue, TailPosition)
58//! - `program.rs`: Main entry points (codegen_program*)
59//! - `words.rs`: Word and quotation code generation
60//! - `statements.rs`: Statement dispatch and main function
61//! - `inline/`: Inline operation code generation (no runtime calls)
62//!   - `dispatch.rs`: Main inline dispatch logic
63//!   - `ops.rs`: Individual inline operations
64//! - `control_flow.rs`: If/else, match statements
65//! - `virtual_stack.rs`: Virtual register optimization
66//! - `types.rs`: Type helpers and exhaustiveness checking
67//! - `globals.rs`: String and symbol constants
68//! - `runtime.rs`: Runtime function declarations
69//! - `ffi_wrappers.rs`: FFI wrapper generation
70//! - `platform.rs`: Platform detection
71//! - `error.rs`: Error types
72
73// Submodules
74mod control_flow;
75mod error;
76mod ffi_wrappers;
77mod globals;
78mod inline;
79mod platform;
80mod program;
81mod runtime;
82mod state;
83mod statements;
84mod types;
85mod virtual_stack;
86mod words;
87
88// Public re-exports
89pub use error::CodeGenError;
90pub use platform::{ffi_c_args, ffi_return_type, get_target_triple};
91pub use runtime::{BUILTIN_SYMBOLS, RUNTIME_DECLARATIONS, emit_runtime_decls};
92pub use state::CodeGen;
93
94// Internal re-exports for submodules
95use state::{
96    BranchResult, MAX_VIRTUAL_STACK, QuotationFunctions, TailPosition, UNREACHABLE_PREDECESSOR,
97    VirtualValue, mangle_name,
98};
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103    use crate::ast::{Program, Statement, WordDef};
104    use std::collections::HashMap;
105
106    #[test]
107    fn test_codegen_hello_world() {
108        let mut codegen = CodeGen::new();
109
110        let program = Program {
111            includes: vec![],
112            unions: vec![],
113            words: vec![WordDef {
114                name: "main".to_string(),
115                effect: None,
116                body: vec![
117                    Statement::StringLiteral("Hello, World!".to_string()),
118                    Statement::WordCall {
119                        name: "io.write-line".to_string(),
120                        span: None,
121                    },
122                ],
123                source: None,
124            }],
125        };
126
127        let ir = codegen
128            .codegen_program(&program, HashMap::new(), HashMap::new())
129            .unwrap();
130
131        assert!(ir.contains("define i32 @main(i32 %argc, ptr %argv)"));
132        // main uses C calling convention (no tailcc) since it's called from C runtime
133        assert!(ir.contains("define ptr @seq_main(ptr %stack)"));
134        assert!(ir.contains("call ptr @patch_seq_push_string"));
135        assert!(ir.contains("call ptr @patch_seq_write_line"));
136        assert!(ir.contains("\"Hello, World!\\00\""));
137    }
138
139    #[test]
140    fn test_codegen_io_write() {
141        // Test io.write (write without newline)
142        let mut codegen = CodeGen::new();
143
144        let program = Program {
145            includes: vec![],
146            unions: vec![],
147            words: vec![WordDef {
148                name: "main".to_string(),
149                effect: None,
150                body: vec![
151                    Statement::StringLiteral("no newline".to_string()),
152                    Statement::WordCall {
153                        name: "io.write".to_string(),
154                        span: None,
155                    },
156                ],
157                source: None,
158            }],
159        };
160
161        let ir = codegen
162            .codegen_program(&program, HashMap::new(), HashMap::new())
163            .unwrap();
164
165        assert!(ir.contains("call ptr @patch_seq_push_string"));
166        assert!(ir.contains("call ptr @patch_seq_write"));
167        assert!(ir.contains("\"no newline\\00\""));
168    }
169
170    #[test]
171    fn test_codegen_arithmetic() {
172        // Test inline tagged stack arithmetic with virtual registers (Issue #189)
173        let mut codegen = CodeGen::new();
174
175        let program = Program {
176            includes: vec![],
177            unions: vec![],
178            words: vec![WordDef {
179                name: "main".to_string(),
180                effect: None,
181                body: vec![
182                    Statement::IntLiteral(2),
183                    Statement::IntLiteral(3),
184                    Statement::WordCall {
185                        name: "i.add".to_string(),
186                        span: None,
187                    },
188                ],
189                source: None,
190            }],
191        };
192
193        let ir = codegen
194            .codegen_program(&program, HashMap::new(), HashMap::new())
195            .unwrap();
196
197        // Issue #189: With virtual registers, integers are kept in SSA variables
198        // Using identity add: %n = add i64 0, <value>
199        assert!(ir.contains("add i64 0, 2"), "Should create SSA var for 2");
200        assert!(ir.contains("add i64 0, 3"), "Should create SSA var for 3");
201        // The add operation uses virtual registers directly
202        assert!(ir.contains("add i64 %"), "Should add SSA variables");
203    }
204
205    #[test]
206    fn test_pure_inline_test_mode() {
207        let mut codegen = CodeGen::new_pure_inline_test();
208
209        // Simple program: 5 3 add (should return 8)
210        let program = Program {
211            includes: vec![],
212            unions: vec![],
213            words: vec![WordDef {
214                name: "main".to_string(),
215                effect: None,
216                body: vec![
217                    Statement::IntLiteral(5),
218                    Statement::IntLiteral(3),
219                    Statement::WordCall {
220                        name: "i.add".to_string(),
221                        span: None,
222                    },
223                ],
224                source: None,
225            }],
226        };
227
228        let ir = codegen
229            .codegen_program(&program, HashMap::new(), HashMap::new())
230            .unwrap();
231
232        // Pure inline test mode should:
233        // 1. NOT CALL the scheduler (declarations are ok, calls are not)
234        assert!(!ir.contains("call void @patch_seq_scheduler_init"));
235        assert!(!ir.contains("call i64 @patch_seq_strand_spawn"));
236
237        // 2. Have main allocate tagged stack and call seq_main directly
238        assert!(ir.contains("call ptr @seq_stack_new_default()"));
239        assert!(ir.contains("call ptr @seq_main(ptr %stack_base)"));
240
241        // 3. Read result from stack and return as exit code
242        assert!(ir.contains("trunc i64 %result to i32"));
243        assert!(ir.contains("ret i32 %exit_code"));
244
245        // 4. Use inline push with virtual registers (Issue #189)
246        assert!(!ir.contains("call ptr @patch_seq_push_int"));
247        // Values are kept in SSA variables via identity add
248        assert!(ir.contains("add i64 0, 5"), "Should create SSA var for 5");
249        assert!(ir.contains("add i64 0, 3"), "Should create SSA var for 3");
250
251        // 5. Use inline add with virtual registers (add i64 %, not call patch_seq_add)
252        assert!(!ir.contains("call ptr @patch_seq_add"));
253        assert!(ir.contains("add i64 %"), "Should add SSA variables");
254    }
255
256    #[test]
257    fn test_escape_llvm_string() {
258        assert_eq!(CodeGen::escape_llvm_string("hello").unwrap(), "hello");
259        assert_eq!(CodeGen::escape_llvm_string("a\nb").unwrap(), r"a\0Ab");
260        assert_eq!(CodeGen::escape_llvm_string("a\tb").unwrap(), r"a\09b");
261        assert_eq!(CodeGen::escape_llvm_string("a\"b").unwrap(), r"a\22b");
262    }
263
264    #[test]
265    fn test_external_builtins_declared() {
266        use crate::config::{CompilerConfig, ExternalBuiltin};
267
268        let mut codegen = CodeGen::new();
269
270        let program = Program {
271            includes: vec![],
272            unions: vec![],
273            words: vec![WordDef {
274                name: "main".to_string(),
275                effect: None,
276                body: vec![
277                    Statement::IntLiteral(42),
278                    Statement::WordCall {
279                        name: "my-external-op".to_string(),
280                        span: None,
281                    },
282                ],
283                source: None,
284            }],
285        };
286
287        let config = CompilerConfig::new()
288            .with_builtin(ExternalBuiltin::new("my-external-op", "test_runtime_my_op"));
289
290        let ir = codegen
291            .codegen_program_with_config(&program, HashMap::new(), HashMap::new(), &config)
292            .unwrap();
293
294        // Should declare the external builtin
295        assert!(
296            ir.contains("declare ptr @test_runtime_my_op(ptr)"),
297            "IR should declare external builtin"
298        );
299
300        // Should call the external builtin
301        assert!(
302            ir.contains("call ptr @test_runtime_my_op"),
303            "IR should call external builtin"
304        );
305    }
306
307    #[test]
308    fn test_multiple_external_builtins() {
309        use crate::config::{CompilerConfig, ExternalBuiltin};
310
311        let mut codegen = CodeGen::new();
312
313        let program = Program {
314            includes: vec![],
315            unions: vec![],
316            words: vec![WordDef {
317                name: "main".to_string(),
318                effect: None,
319                body: vec![
320                    Statement::WordCall {
321                        name: "actor-self".to_string(),
322                        span: None,
323                    },
324                    Statement::WordCall {
325                        name: "journal-append".to_string(),
326                        span: None,
327                    },
328                ],
329                source: None,
330            }],
331        };
332
333        let config = CompilerConfig::new()
334            .with_builtin(ExternalBuiltin::new("actor-self", "seq_actors_self"))
335            .with_builtin(ExternalBuiltin::new(
336                "journal-append",
337                "seq_actors_journal_append",
338            ));
339
340        let ir = codegen
341            .codegen_program_with_config(&program, HashMap::new(), HashMap::new(), &config)
342            .unwrap();
343
344        // Should declare both external builtins
345        assert!(ir.contains("declare ptr @seq_actors_self(ptr)"));
346        assert!(ir.contains("declare ptr @seq_actors_journal_append(ptr)"));
347
348        // Should call both
349        assert!(ir.contains("call ptr @seq_actors_self"));
350        assert!(ir.contains("call ptr @seq_actors_journal_append"));
351    }
352
353    #[test]
354    fn test_external_builtins_with_library_paths() {
355        use crate::config::{CompilerConfig, ExternalBuiltin};
356
357        let config = CompilerConfig::new()
358            .with_builtin(ExternalBuiltin::new("my-op", "runtime_my_op"))
359            .with_library_path("/custom/lib")
360            .with_library("myruntime");
361
362        assert_eq!(config.external_builtins.len(), 1);
363        assert_eq!(config.library_paths, vec!["/custom/lib"]);
364        assert_eq!(config.libraries, vec!["myruntime"]);
365    }
366
367    #[test]
368    fn test_external_builtin_full_pipeline() {
369        // Test that external builtins work through the full compile pipeline
370        // including parser, AST validation, type checker, and codegen
371        use crate::compile_to_ir_with_config;
372        use crate::config::{CompilerConfig, ExternalBuiltin};
373
374        let source = r#"
375            : main ( -- Int )
376              42 my-transform
377              0
378            ;
379        "#;
380
381        let config = CompilerConfig::new().with_builtin(ExternalBuiltin::new(
382            "my-transform",
383            "ext_runtime_transform",
384        ));
385
386        // This should succeed - the external builtin is registered
387        let result = compile_to_ir_with_config(source, &config);
388        assert!(
389            result.is_ok(),
390            "Compilation should succeed: {:?}",
391            result.err()
392        );
393
394        let ir = result.unwrap();
395        assert!(ir.contains("declare ptr @ext_runtime_transform(ptr)"));
396        assert!(ir.contains("call ptr @ext_runtime_transform"));
397    }
398
399    #[test]
400    fn test_external_builtin_without_config_fails() {
401        // Test that using an external builtin without config fails validation
402        use crate::compile_to_ir;
403
404        let source = r#"
405            : main ( -- Int )
406              42 unknown-builtin
407              0
408            ;
409        "#;
410
411        // This should fail - unknown-builtin is not registered
412        let result = compile_to_ir(source);
413        assert!(result.is_err());
414        assert!(result.unwrap_err().contains("unknown-builtin"));
415    }
416
417    #[test]
418    fn test_match_exhaustiveness_error() {
419        use crate::compile_to_ir;
420
421        let source = r#"
422            union Result { Ok { value: Int } Err { msg: String } }
423
424            : handle ( Variant -- Int )
425              match
426                Ok -> drop 1
427                # Missing Err arm!
428              end
429            ;
430
431            : main ( -- ) 42 Make-Ok handle drop ;
432        "#;
433
434        let result = compile_to_ir(source);
435        assert!(result.is_err());
436        let err = result.unwrap_err();
437        assert!(err.contains("Non-exhaustive match"));
438        assert!(err.contains("Result"));
439        assert!(err.contains("Err"));
440    }
441
442    #[test]
443    fn test_match_exhaustive_compiles() {
444        use crate::compile_to_ir;
445
446        let source = r#"
447            union Result { Ok { value: Int } Err { msg: String } }
448
449            : handle ( Variant -- Int )
450              match
451                Ok -> drop 1
452                Err -> drop 0
453              end
454            ;
455
456            : main ( -- ) 42 Make-Ok handle drop ;
457        "#;
458
459        let result = compile_to_ir(source);
460        assert!(
461            result.is_ok(),
462            "Exhaustive match should compile: {:?}",
463            result
464        );
465    }
466
467    #[test]
468    fn test_codegen_symbol() {
469        // Test symbol literal codegen
470        let mut codegen = CodeGen::new();
471
472        let program = Program {
473            includes: vec![],
474            unions: vec![],
475            words: vec![WordDef {
476                name: "main".to_string(),
477                effect: None,
478                body: vec![
479                    Statement::Symbol("hello".to_string()),
480                    Statement::WordCall {
481                        name: "symbol->string".to_string(),
482                        span: None,
483                    },
484                    Statement::WordCall {
485                        name: "io.write-line".to_string(),
486                        span: None,
487                    },
488                ],
489                source: None,
490            }],
491        };
492
493        let ir = codegen
494            .codegen_program(&program, HashMap::new(), HashMap::new())
495            .unwrap();
496
497        assert!(ir.contains("call ptr @patch_seq_push_interned_symbol"));
498        assert!(ir.contains("call ptr @patch_seq_symbol_to_string"));
499        assert!(ir.contains("\"hello\\00\""));
500    }
501
502    #[test]
503    fn test_symbol_interning_dedup() {
504        // Issue #166: Test that duplicate symbol literals share the same global
505        let mut codegen = CodeGen::new();
506
507        let program = Program {
508            includes: vec![],
509            unions: vec![],
510            words: vec![WordDef {
511                name: "main".to_string(),
512                effect: None,
513                body: vec![
514                    // Use :hello twice - should share the same .sym global
515                    Statement::Symbol("hello".to_string()),
516                    Statement::Symbol("hello".to_string()),
517                    Statement::Symbol("world".to_string()), // Different symbol
518                ],
519                source: None,
520            }],
521        };
522
523        let ir = codegen
524            .codegen_program(&program, HashMap::new(), HashMap::new())
525            .unwrap();
526
527        // Should have exactly one .sym global for "hello" and one for "world"
528        // Count occurrences of symbol global definitions (lines starting with @.sym)
529        let sym_defs: Vec<_> = ir
530            .lines()
531            .filter(|l| l.trim().starts_with("@.sym."))
532            .collect();
533
534        // There should be 2 definitions: .sym.0 for "hello" and .sym.1 for "world"
535        assert_eq!(
536            sym_defs.len(),
537            2,
538            "Expected 2 symbol globals, got: {:?}",
539            sym_defs
540        );
541
542        // Verify deduplication: :hello appears twice but .sym.0 is reused
543        let hello_uses: usize = ir.matches("@.sym.0").count();
544        assert_eq!(
545            hello_uses, 3,
546            "Expected 3 occurrences of .sym.0 (1 def + 2 uses)"
547        );
548
549        // The IR should contain static symbol structure with capacity=0
550        assert!(
551            ir.contains("i64 0, i8 1"),
552            "Symbol global should have capacity=0 and global=1"
553        );
554    }
555
556    #[test]
557    fn test_dup_optimization_for_int() {
558        // Test that dup on Int uses optimized load/store instead of clone_value
559        // This verifies the Issue #186 optimization actually fires
560        let mut codegen = CodeGen::new();
561
562        use crate::types::Type;
563
564        let program = Program {
565            includes: vec![],
566            unions: vec![],
567            words: vec![
568                WordDef {
569                    name: "test_dup".to_string(),
570                    effect: None,
571                    body: vec![
572                        Statement::IntLiteral(42), // stmt 0: push Int
573                        Statement::WordCall {
574                            // stmt 1: dup
575                            name: "dup".to_string(),
576                            span: None,
577                        },
578                        Statement::WordCall {
579                            name: "drop".to_string(),
580                            span: None,
581                        },
582                        Statement::WordCall {
583                            name: "drop".to_string(),
584                            span: None,
585                        },
586                    ],
587                    source: None,
588                },
589                WordDef {
590                    name: "main".to_string(),
591                    effect: None,
592                    body: vec![Statement::WordCall {
593                        name: "test_dup".to_string(),
594                        span: None,
595                    }],
596                    source: None,
597                },
598            ],
599        };
600
601        // Provide type info: before statement 1 (dup), top of stack is Int
602        let mut statement_types = HashMap::new();
603        statement_types.insert(("test_dup".to_string(), 1), Type::Int);
604
605        let ir = codegen
606            .codegen_program(&program, HashMap::new(), statement_types)
607            .unwrap();
608
609        // Extract just the test_dup function
610        let func_start = ir.find("define tailcc ptr @seq_test_dup").unwrap();
611        let func_end = ir[func_start..].find("\n}\n").unwrap() + func_start + 3;
612        let test_dup_fn = &ir[func_start..func_end];
613
614        // The optimized path should use load/store %Value directly
615        assert!(
616            test_dup_fn.contains("load %Value"),
617            "Optimized dup should use 'load %Value', got:\n{}",
618            test_dup_fn
619        );
620        assert!(
621            test_dup_fn.contains("store %Value"),
622            "Optimized dup should use 'store %Value', got:\n{}",
623            test_dup_fn
624        );
625
626        // The optimized path should NOT call clone_value
627        assert!(
628            !test_dup_fn.contains("@patch_seq_clone_value"),
629            "Optimized dup should NOT call clone_value for Int, got:\n{}",
630            test_dup_fn
631        );
632    }
633
634    #[test]
635    fn test_dup_optimization_after_literal() {
636        // Test Issue #195: dup after literal push uses optimized path
637        // Pattern: `42 dup` should be optimized even without type map info
638        let mut codegen = CodeGen::new();
639
640        let program = Program {
641            includes: vec![],
642            unions: vec![],
643            words: vec![
644                WordDef {
645                    name: "test_dup".to_string(),
646                    effect: None,
647                    body: vec![
648                        Statement::IntLiteral(42), // Previous statement is Int literal
649                        Statement::WordCall {
650                            // dup should be optimized
651                            name: "dup".to_string(),
652                            span: None,
653                        },
654                        Statement::WordCall {
655                            name: "drop".to_string(),
656                            span: None,
657                        },
658                        Statement::WordCall {
659                            name: "drop".to_string(),
660                            span: None,
661                        },
662                    ],
663                    source: None,
664                },
665                WordDef {
666                    name: "main".to_string(),
667                    effect: None,
668                    body: vec![Statement::WordCall {
669                        name: "test_dup".to_string(),
670                        span: None,
671                    }],
672                    source: None,
673                },
674            ],
675        };
676
677        // No type info provided - but literal heuristic should optimize
678        let ir = codegen
679            .codegen_program(&program, HashMap::new(), HashMap::new())
680            .unwrap();
681
682        // Extract just the test_dup function
683        let func_start = ir.find("define tailcc ptr @seq_test_dup").unwrap();
684        let func_end = ir[func_start..].find("\n}\n").unwrap() + func_start + 3;
685        let test_dup_fn = &ir[func_start..func_end];
686
687        // With literal heuristic, should use optimized path
688        assert!(
689            test_dup_fn.contains("load %Value"),
690            "Dup after int literal should use optimized load, got:\n{}",
691            test_dup_fn
692        );
693        assert!(
694            test_dup_fn.contains("store %Value"),
695            "Dup after int literal should use optimized store, got:\n{}",
696            test_dup_fn
697        );
698        assert!(
699            !test_dup_fn.contains("@patch_seq_clone_value"),
700            "Dup after int literal should NOT call clone_value, got:\n{}",
701            test_dup_fn
702        );
703    }
704
705    #[test]
706    fn test_dup_no_optimization_after_word_call() {
707        // Test that dup after word call (unknown type) uses safe clone_value path
708        let mut codegen = CodeGen::new();
709
710        let program = Program {
711            includes: vec![],
712            unions: vec![],
713            words: vec![
714                WordDef {
715                    name: "get_value".to_string(),
716                    effect: None,
717                    body: vec![Statement::IntLiteral(42)],
718                    source: None,
719                },
720                WordDef {
721                    name: "test_dup".to_string(),
722                    effect: None,
723                    body: vec![
724                        Statement::WordCall {
725                            // Previous statement is word call (unknown type)
726                            name: "get_value".to_string(),
727                            span: None,
728                        },
729                        Statement::WordCall {
730                            // dup should NOT be optimized
731                            name: "dup".to_string(),
732                            span: None,
733                        },
734                        Statement::WordCall {
735                            name: "drop".to_string(),
736                            span: None,
737                        },
738                        Statement::WordCall {
739                            name: "drop".to_string(),
740                            span: None,
741                        },
742                    ],
743                    source: None,
744                },
745                WordDef {
746                    name: "main".to_string(),
747                    effect: None,
748                    body: vec![Statement::WordCall {
749                        name: "test_dup".to_string(),
750                        span: None,
751                    }],
752                    source: None,
753                },
754            ],
755        };
756
757        // No type info provided and no literal before dup
758        let ir = codegen
759            .codegen_program(&program, HashMap::new(), HashMap::new())
760            .unwrap();
761
762        // Extract just the test_dup function
763        let func_start = ir.find("define tailcc ptr @seq_test_dup").unwrap();
764        let func_end = ir[func_start..].find("\n}\n").unwrap() + func_start + 3;
765        let test_dup_fn = &ir[func_start..func_end];
766
767        // Without literal or type info, should call clone_value (safe path)
768        assert!(
769            test_dup_fn.contains("@patch_seq_clone_value"),
770            "Dup after word call should call clone_value, got:\n{}",
771            test_dup_fn
772        );
773    }
774
775    #[test]
776    fn test_roll_constant_optimization() {
777        // Test Issue #192: roll with constant N uses optimized inline code
778        // Pattern: `2 roll` should generate rot-like inline code
779        let mut codegen = CodeGen::new();
780
781        let program = Program {
782            includes: vec![],
783            unions: vec![],
784            words: vec![
785                WordDef {
786                    name: "test_roll".to_string(),
787                    effect: None,
788                    body: vec![
789                        Statement::IntLiteral(1),
790                        Statement::IntLiteral(2),
791                        Statement::IntLiteral(3),
792                        Statement::IntLiteral(2), // Constant N for roll
793                        Statement::WordCall {
794                            // 2 roll = rot
795                            name: "roll".to_string(),
796                            span: None,
797                        },
798                        Statement::WordCall {
799                            name: "drop".to_string(),
800                            span: None,
801                        },
802                        Statement::WordCall {
803                            name: "drop".to_string(),
804                            span: None,
805                        },
806                        Statement::WordCall {
807                            name: "drop".to_string(),
808                            span: None,
809                        },
810                    ],
811                    source: None,
812                },
813                WordDef {
814                    name: "main".to_string(),
815                    effect: None,
816                    body: vec![Statement::WordCall {
817                        name: "test_roll".to_string(),
818                        span: None,
819                    }],
820                    source: None,
821                },
822            ],
823        };
824
825        let ir = codegen
826            .codegen_program(&program, HashMap::new(), HashMap::new())
827            .unwrap();
828
829        // Extract just the test_roll function
830        let func_start = ir.find("define tailcc ptr @seq_test_roll").unwrap();
831        let func_end = ir[func_start..].find("\n}\n").unwrap() + func_start + 3;
832        let test_roll_fn = &ir[func_start..func_end];
833
834        // With constant N=2, should NOT do dynamic calculation
835        // Should NOT have dynamic add/sub for offset calculation
836        assert!(
837            !test_roll_fn.contains("= add i64 %"),
838            "Constant roll should use constant offset, not dynamic add, got:\n{}",
839            test_roll_fn
840        );
841
842        // Should NOT call memmove for small N (n=2 uses direct loads/stores)
843        assert!(
844            !test_roll_fn.contains("@llvm.memmove"),
845            "2 roll should not use memmove, got:\n{}",
846            test_roll_fn
847        );
848    }
849
850    #[test]
851    fn test_pick_constant_optimization() {
852        // Test Issue #192: pick with constant N uses constant offset
853        // Pattern: `1 pick` should generate code with constant -3 offset
854        let mut codegen = CodeGen::new();
855
856        let program = Program {
857            includes: vec![],
858            unions: vec![],
859            words: vec![
860                WordDef {
861                    name: "test_pick".to_string(),
862                    effect: None,
863                    body: vec![
864                        Statement::IntLiteral(10),
865                        Statement::IntLiteral(20),
866                        Statement::IntLiteral(1), // Constant N for pick
867                        Statement::WordCall {
868                            // 1 pick = over
869                            name: "pick".to_string(),
870                            span: None,
871                        },
872                        Statement::WordCall {
873                            name: "drop".to_string(),
874                            span: None,
875                        },
876                        Statement::WordCall {
877                            name: "drop".to_string(),
878                            span: None,
879                        },
880                        Statement::WordCall {
881                            name: "drop".to_string(),
882                            span: None,
883                        },
884                    ],
885                    source: None,
886                },
887                WordDef {
888                    name: "main".to_string(),
889                    effect: None,
890                    body: vec![Statement::WordCall {
891                        name: "test_pick".to_string(),
892                        span: None,
893                    }],
894                    source: None,
895                },
896            ],
897        };
898
899        let ir = codegen
900            .codegen_program(&program, HashMap::new(), HashMap::new())
901            .unwrap();
902
903        // Extract just the test_pick function
904        let func_start = ir.find("define tailcc ptr @seq_test_pick").unwrap();
905        let func_end = ir[func_start..].find("\n}\n").unwrap() + func_start + 3;
906        let test_pick_fn = &ir[func_start..func_end];
907
908        // With constant N=1, should use constant offset -3
909        // Should NOT have dynamic add/sub for offset calculation
910        assert!(
911            !test_pick_fn.contains("= add i64 %"),
912            "Constant pick should use constant offset, not dynamic add, got:\n{}",
913            test_pick_fn
914        );
915
916        // Should have the constant offset -3 in getelementptr
917        assert!(
918            test_pick_fn.contains("i64 -3"),
919            "1 pick should use offset -3 (-(1+2)), got:\n{}",
920            test_pick_fn
921        );
922    }
923
924    #[test]
925    fn test_small_word_marked_alwaysinline() {
926        // Test Issue #187: Small words get alwaysinline attribute
927        let mut codegen = CodeGen::new();
928
929        let program = Program {
930            includes: vec![],
931            unions: vec![],
932            words: vec![
933                WordDef {
934                    name: "double".to_string(), // Small word: dup i.+
935                    effect: None,
936                    body: vec![
937                        Statement::WordCall {
938                            name: "dup".to_string(),
939                            span: None,
940                        },
941                        Statement::WordCall {
942                            name: "i.+".to_string(),
943                            span: None,
944                        },
945                    ],
946                    source: None,
947                },
948                WordDef {
949                    name: "main".to_string(),
950                    effect: None,
951                    body: vec![
952                        Statement::IntLiteral(21),
953                        Statement::WordCall {
954                            name: "double".to_string(),
955                            span: None,
956                        },
957                    ],
958                    source: None,
959                },
960            ],
961        };
962
963        let ir = codegen
964            .codegen_program(&program, HashMap::new(), HashMap::new())
965            .unwrap();
966
967        // Small word 'double' should have alwaysinline attribute
968        assert!(
969            ir.contains("define tailcc ptr @seq_double(ptr %stack) alwaysinline"),
970            "Small word should have alwaysinline attribute, got:\n{}",
971            ir.lines()
972                .filter(|l| l.contains("define"))
973                .collect::<Vec<_>>()
974                .join("\n")
975        );
976
977        // main should NOT have alwaysinline (uses C calling convention)
978        assert!(
979            ir.contains("define ptr @seq_main(ptr %stack) {"),
980            "main should not have alwaysinline, got:\n{}",
981            ir.lines()
982                .filter(|l| l.contains("define"))
983                .collect::<Vec<_>>()
984                .join("\n")
985        );
986    }
987
988    #[test]
989    fn test_recursive_word_not_inlined() {
990        // Test Issue #187: Recursive words should NOT get alwaysinline
991        let mut codegen = CodeGen::new();
992
993        let program = Program {
994            includes: vec![],
995            unions: vec![],
996            words: vec![
997                WordDef {
998                    name: "countdown".to_string(), // Recursive
999                    effect: None,
1000                    body: vec![
1001                        Statement::WordCall {
1002                            name: "dup".to_string(),
1003                            span: None,
1004                        },
1005                        Statement::If {
1006                            then_branch: vec![
1007                                Statement::IntLiteral(1),
1008                                Statement::WordCall {
1009                                    name: "i.-".to_string(),
1010                                    span: None,
1011                                },
1012                                Statement::WordCall {
1013                                    name: "countdown".to_string(), // Recursive call
1014                                    span: None,
1015                                },
1016                            ],
1017                            else_branch: Some(vec![]),
1018                        },
1019                    ],
1020                    source: None,
1021                },
1022                WordDef {
1023                    name: "main".to_string(),
1024                    effect: None,
1025                    body: vec![
1026                        Statement::IntLiteral(5),
1027                        Statement::WordCall {
1028                            name: "countdown".to_string(),
1029                            span: None,
1030                        },
1031                    ],
1032                    source: None,
1033                },
1034            ],
1035        };
1036
1037        let ir = codegen
1038            .codegen_program(&program, HashMap::new(), HashMap::new())
1039            .unwrap();
1040
1041        // Recursive word should NOT have alwaysinline
1042        assert!(
1043            ir.contains("define tailcc ptr @seq_countdown(ptr %stack) {"),
1044            "Recursive word should NOT have alwaysinline, got:\n{}",
1045            ir.lines()
1046                .filter(|l| l.contains("define"))
1047                .collect::<Vec<_>>()
1048                .join("\n")
1049        );
1050    }
1051
1052    #[test]
1053    fn test_recursive_word_in_match_not_inlined() {
1054        // Test Issue #187: Recursive calls inside match arms should prevent inlining
1055        use crate::ast::{MatchArm, Pattern, UnionDef, UnionVariant};
1056
1057        let mut codegen = CodeGen::new();
1058
1059        let program = Program {
1060            includes: vec![],
1061            unions: vec![UnionDef {
1062                name: "Option".to_string(),
1063                variants: vec![
1064                    UnionVariant {
1065                        name: "Some".to_string(),
1066                        fields: vec![],
1067                        source: None,
1068                    },
1069                    UnionVariant {
1070                        name: "None".to_string(),
1071                        fields: vec![],
1072                        source: None,
1073                    },
1074                ],
1075                source: None,
1076            }],
1077            words: vec![
1078                WordDef {
1079                    name: "process".to_string(), // Recursive in match arm
1080                    effect: None,
1081                    body: vec![Statement::Match {
1082                        arms: vec![
1083                            MatchArm {
1084                                pattern: Pattern::Variant("Some".to_string()),
1085                                body: vec![Statement::WordCall {
1086                                    name: "process".to_string(), // Recursive call
1087                                    span: None,
1088                                }],
1089                            },
1090                            MatchArm {
1091                                pattern: Pattern::Variant("None".to_string()),
1092                                body: vec![],
1093                            },
1094                        ],
1095                    }],
1096                    source: None,
1097                },
1098                WordDef {
1099                    name: "main".to_string(),
1100                    effect: None,
1101                    body: vec![Statement::WordCall {
1102                        name: "process".to_string(),
1103                        span: None,
1104                    }],
1105                    source: None,
1106                },
1107            ],
1108        };
1109
1110        let ir = codegen
1111            .codegen_program(&program, HashMap::new(), HashMap::new())
1112            .unwrap();
1113
1114        // Recursive word (via match arm) should NOT have alwaysinline
1115        assert!(
1116            ir.contains("define tailcc ptr @seq_process(ptr %stack) {"),
1117            "Recursive word in match should NOT have alwaysinline, got:\n{}",
1118            ir.lines()
1119                .filter(|l| l.contains("define"))
1120                .collect::<Vec<_>>()
1121                .join("\n")
1122        );
1123    }
1124}