seqc/codegen/
mod.rs

1//! LLVM IR Code Generation
2//!
3//! This module generates LLVM IR as text (.ll files) for Seq programs.
4//! The code generation is split into focused submodules for maintainability.
5//!
6//! # Key Concepts
7//!
8//! ## Value Representation
9//!
10//! All Seq values use the `%Value` type, a 40-byte Rust enum with `#[repr(C)]`.
11//! Layout: `{ i64, i64, i64, i64, i64 }` (discriminant + largest variant payload).
12//! This fixed size allows pass-by-value, required for Alpine/musl compatibility.
13//!
14//! ## Calling Conventions
15//!
16//! - **User-defined words**: Use `tailcc` (tail call convention) to enable TCO.
17//!   Each word has two functions: a C-convention wrapper (`seq_word_*`) for
18//!   external calls and a `tailcc` implementation (`seq_word_*_impl`) for
19//!   internal calls that can use `musttail`.
20//!
21//! - **Runtime functions**: Use C convention (`ccc`). Declared in `runtime.rs`.
22//!
23//! - **Quotations**: Use C convention. Quotations are first-class functions that
24//!   capture their environment. They have wrapper/impl pairs but currently don't
25//!   support TCO due to closure complexity.
26//!
27//! ## Virtual Stack Optimization
28//!
29//! The top N values (default 4) are kept in SSA virtual registers instead of
30//! memory. This avoids store/load overhead for common patterns like `2 3 i.+`.
31//! Values are "spilled" to the memory stack at control flow points (if/else,
32//! loops) and function calls. See `virtual_stack.rs` and `VirtualValue` in
33//! `state.rs`.
34//!
35//! ## Tail Call Optimization (TCO)
36//!
37//! Word calls in tail position use LLVM's `musttail` for guaranteed TCO.
38//! A call is in tail position when it's the last operation before return.
39//! TCO is disabled in these contexts:
40//! - Inside `main` (uses C convention for entry point)
41//! - Inside quotations (closure semantics require stack frames)
42//! - Inside closures that capture variables
43//!
44//! ## Quotations and Closures
45//!
46//! Quotations (`[ ... ]`) compile to function pointers pushed onto the stack.
47//! - **Pure quotations**: No captured variables, just a function pointer.
48//! - **Closures**: Capture variables from enclosing scope. The runtime allocates
49//!   a closure struct containing the function pointer and captured values.
50//!
51//! Each quotation generates a wrapper function (C convention, for `call` builtin)
52//! and an impl function. Closure captures are analyzed at compile time by
53//! `capture_analysis.rs`.
54//!
55//! # Module Structure
56//!
57//! - `state.rs`: Core types (CodeGen, VirtualValue, TailPosition)
58//! - `program.rs`: Main entry points (codegen_program*)
59//! - `words.rs`: Word and quotation code generation
60//! - `statements.rs`: Statement dispatch and main function
61//! - `inline/`: Inline operation code generation (no runtime calls)
62//!   - `dispatch.rs`: Main inline dispatch logic
63//!   - `ops.rs`: Individual inline operations
64//! - `control_flow.rs`: If/else, match statements
65//! - `virtual_stack.rs`: Virtual register optimization
66//! - `types.rs`: Type helpers and exhaustiveness checking
67//! - `globals.rs`: String and symbol constants
68//! - `runtime.rs`: Runtime function declarations
69//! - `ffi_wrappers.rs`: FFI wrapper generation
70//! - `platform.rs`: Platform detection
71//! - `error.rs`: Error types
72
73// Submodules
74mod control_flow;
75mod error;
76mod ffi_wrappers;
77mod globals;
78mod inline;
79#[cfg(feature = "nanbox")]
80mod inline_nanbox;
81mod platform;
82mod program;
83mod runtime;
84mod state;
85mod statements;
86mod types;
87mod virtual_stack;
88mod words;
89
90// Public re-exports
91pub use error::CodeGenError;
92pub use platform::{ffi_c_args, ffi_return_type, get_target_triple};
93pub use runtime::{BUILTIN_SYMBOLS, RUNTIME_DECLARATIONS, emit_runtime_decls};
94pub use state::CodeGen;
95
96// Internal re-exports for submodules
97use state::{
98    BranchResult, MAX_VIRTUAL_STACK, QuotationFunctions, TailPosition, UNREACHABLE_PREDECESSOR,
99    VirtualValue, mangle_name,
100};
101
102#[cfg(test)]
103mod tests {
104    use super::*;
105    use crate::ast::{Program, Statement, WordDef};
106    use std::collections::HashMap;
107
108    #[test]
109    fn test_codegen_hello_world() {
110        let mut codegen = CodeGen::new();
111
112        let program = Program {
113            includes: vec![],
114            unions: vec![],
115            words: vec![WordDef {
116                name: "main".to_string(),
117                effect: None,
118                body: vec![
119                    Statement::StringLiteral("Hello, World!".to_string()),
120                    Statement::WordCall {
121                        name: "io.write-line".to_string(),
122                        span: None,
123                    },
124                ],
125                source: None,
126            }],
127        };
128
129        let ir = codegen
130            .codegen_program(&program, HashMap::new(), HashMap::new())
131            .unwrap();
132
133        assert!(ir.contains("define i32 @main(i32 %argc, ptr %argv)"));
134        // main uses C calling convention (no tailcc) since it's called from C runtime
135        assert!(ir.contains("define ptr @seq_main(ptr %stack)"));
136        assert!(ir.contains("call ptr @patch_seq_push_string"));
137        assert!(ir.contains("call ptr @patch_seq_write_line"));
138        assert!(ir.contains("\"Hello, World!\\00\""));
139    }
140
141    #[test]
142    fn test_codegen_io_write() {
143        // Test io.write (write without newline)
144        let mut codegen = CodeGen::new();
145
146        let program = Program {
147            includes: vec![],
148            unions: vec![],
149            words: vec![WordDef {
150                name: "main".to_string(),
151                effect: None,
152                body: vec![
153                    Statement::StringLiteral("no newline".to_string()),
154                    Statement::WordCall {
155                        name: "io.write".to_string(),
156                        span: None,
157                    },
158                ],
159                source: None,
160            }],
161        };
162
163        let ir = codegen
164            .codegen_program(&program, HashMap::new(), HashMap::new())
165            .unwrap();
166
167        assert!(ir.contains("call ptr @patch_seq_push_string"));
168        assert!(ir.contains("call ptr @patch_seq_write"));
169        assert!(ir.contains("\"no newline\\00\""));
170    }
171
172    #[test]
173    fn test_codegen_arithmetic() {
174        // Test inline tagged stack arithmetic with virtual registers (Issue #189)
175        let mut codegen = CodeGen::new();
176
177        let program = Program {
178            includes: vec![],
179            unions: vec![],
180            words: vec![WordDef {
181                name: "main".to_string(),
182                effect: None,
183                body: vec![
184                    Statement::IntLiteral(2),
185                    Statement::IntLiteral(3),
186                    Statement::WordCall {
187                        name: "i.add".to_string(),
188                        span: None,
189                    },
190                ],
191                source: None,
192            }],
193        };
194
195        let ir = codegen
196            .codegen_program(&program, HashMap::new(), HashMap::new())
197            .unwrap();
198
199        // Issue #189: With virtual registers, integers are kept in SSA variables
200        // Using identity add: %n = add i64 0, <value>
201        assert!(ir.contains("add i64 0, 2"), "Should create SSA var for 2");
202        assert!(ir.contains("add i64 0, 3"), "Should create SSA var for 3");
203        // The add operation uses virtual registers directly
204        assert!(ir.contains("add i64 %"), "Should add SSA variables");
205    }
206
207    #[test]
208    fn test_pure_inline_test_mode() {
209        let mut codegen = CodeGen::new_pure_inline_test();
210
211        // Simple program: 5 3 add (should return 8)
212        let program = Program {
213            includes: vec![],
214            unions: vec![],
215            words: vec![WordDef {
216                name: "main".to_string(),
217                effect: None,
218                body: vec![
219                    Statement::IntLiteral(5),
220                    Statement::IntLiteral(3),
221                    Statement::WordCall {
222                        name: "i.add".to_string(),
223                        span: None,
224                    },
225                ],
226                source: None,
227            }],
228        };
229
230        let ir = codegen
231            .codegen_program(&program, HashMap::new(), HashMap::new())
232            .unwrap();
233
234        // Pure inline test mode should:
235        // 1. NOT CALL the scheduler (declarations are ok, calls are not)
236        assert!(!ir.contains("call void @patch_seq_scheduler_init"));
237        assert!(!ir.contains("call i64 @patch_seq_strand_spawn"));
238
239        // 2. Have main allocate tagged stack and call seq_main directly
240        assert!(ir.contains("call ptr @seq_stack_new_default()"));
241        assert!(ir.contains("call ptr @seq_main(ptr %stack_base)"));
242
243        // 3. Read result from stack and return as exit code
244        assert!(ir.contains("trunc i64 %result to i32"));
245        assert!(ir.contains("ret i32 %exit_code"));
246
247        // 4. Use inline push with virtual registers (Issue #189)
248        assert!(!ir.contains("call ptr @patch_seq_push_int"));
249        // Values are kept in SSA variables via identity add
250        assert!(ir.contains("add i64 0, 5"), "Should create SSA var for 5");
251        assert!(ir.contains("add i64 0, 3"), "Should create SSA var for 3");
252
253        // 5. Use inline add with virtual registers (add i64 %, not call patch_seq_add)
254        assert!(!ir.contains("call ptr @patch_seq_add"));
255        assert!(ir.contains("add i64 %"), "Should add SSA variables");
256    }
257
258    #[test]
259    fn test_escape_llvm_string() {
260        assert_eq!(CodeGen::escape_llvm_string("hello").unwrap(), "hello");
261        assert_eq!(CodeGen::escape_llvm_string("a\nb").unwrap(), r"a\0Ab");
262        assert_eq!(CodeGen::escape_llvm_string("a\tb").unwrap(), r"a\09b");
263        assert_eq!(CodeGen::escape_llvm_string("a\"b").unwrap(), r"a\22b");
264    }
265
266    #[test]
267    fn test_external_builtins_declared() {
268        use crate::config::{CompilerConfig, ExternalBuiltin};
269
270        let mut codegen = CodeGen::new();
271
272        let program = Program {
273            includes: vec![],
274            unions: vec![],
275            words: vec![WordDef {
276                name: "main".to_string(),
277                effect: None,
278                body: vec![
279                    Statement::IntLiteral(42),
280                    Statement::WordCall {
281                        name: "my-external-op".to_string(),
282                        span: None,
283                    },
284                ],
285                source: None,
286            }],
287        };
288
289        let config = CompilerConfig::new()
290            .with_builtin(ExternalBuiltin::new("my-external-op", "test_runtime_my_op"));
291
292        let ir = codegen
293            .codegen_program_with_config(&program, HashMap::new(), HashMap::new(), &config)
294            .unwrap();
295
296        // Should declare the external builtin
297        assert!(
298            ir.contains("declare ptr @test_runtime_my_op(ptr)"),
299            "IR should declare external builtin"
300        );
301
302        // Should call the external builtin
303        assert!(
304            ir.contains("call ptr @test_runtime_my_op"),
305            "IR should call external builtin"
306        );
307    }
308
309    #[test]
310    fn test_multiple_external_builtins() {
311        use crate::config::{CompilerConfig, ExternalBuiltin};
312
313        let mut codegen = CodeGen::new();
314
315        let program = Program {
316            includes: vec![],
317            unions: vec![],
318            words: vec![WordDef {
319                name: "main".to_string(),
320                effect: None,
321                body: vec![
322                    Statement::WordCall {
323                        name: "actor-self".to_string(),
324                        span: None,
325                    },
326                    Statement::WordCall {
327                        name: "journal-append".to_string(),
328                        span: None,
329                    },
330                ],
331                source: None,
332            }],
333        };
334
335        let config = CompilerConfig::new()
336            .with_builtin(ExternalBuiltin::new("actor-self", "seq_actors_self"))
337            .with_builtin(ExternalBuiltin::new(
338                "journal-append",
339                "seq_actors_journal_append",
340            ));
341
342        let ir = codegen
343            .codegen_program_with_config(&program, HashMap::new(), HashMap::new(), &config)
344            .unwrap();
345
346        // Should declare both external builtins
347        assert!(ir.contains("declare ptr @seq_actors_self(ptr)"));
348        assert!(ir.contains("declare ptr @seq_actors_journal_append(ptr)"));
349
350        // Should call both
351        assert!(ir.contains("call ptr @seq_actors_self"));
352        assert!(ir.contains("call ptr @seq_actors_journal_append"));
353    }
354
355    #[test]
356    fn test_external_builtins_with_library_paths() {
357        use crate::config::{CompilerConfig, ExternalBuiltin};
358
359        let config = CompilerConfig::new()
360            .with_builtin(ExternalBuiltin::new("my-op", "runtime_my_op"))
361            .with_library_path("/custom/lib")
362            .with_library("myruntime");
363
364        assert_eq!(config.external_builtins.len(), 1);
365        assert_eq!(config.library_paths, vec!["/custom/lib"]);
366        assert_eq!(config.libraries, vec!["myruntime"]);
367    }
368
369    #[test]
370    fn test_external_builtin_full_pipeline() {
371        // Test that external builtins work through the full compile pipeline
372        // including parser, AST validation, type checker, and codegen
373        use crate::compile_to_ir_with_config;
374        use crate::config::{CompilerConfig, ExternalBuiltin};
375
376        let source = r#"
377            : main ( -- Int )
378              42 my-transform
379              0
380            ;
381        "#;
382
383        let config = CompilerConfig::new().with_builtin(ExternalBuiltin::new(
384            "my-transform",
385            "ext_runtime_transform",
386        ));
387
388        // This should succeed - the external builtin is registered
389        let result = compile_to_ir_with_config(source, &config);
390        assert!(
391            result.is_ok(),
392            "Compilation should succeed: {:?}",
393            result.err()
394        );
395
396        let ir = result.unwrap();
397        assert!(ir.contains("declare ptr @ext_runtime_transform(ptr)"));
398        assert!(ir.contains("call ptr @ext_runtime_transform"));
399    }
400
401    #[test]
402    fn test_external_builtin_without_config_fails() {
403        // Test that using an external builtin without config fails validation
404        use crate::compile_to_ir;
405
406        let source = r#"
407            : main ( -- Int )
408              42 unknown-builtin
409              0
410            ;
411        "#;
412
413        // This should fail - unknown-builtin is not registered
414        let result = compile_to_ir(source);
415        assert!(result.is_err());
416        assert!(result.unwrap_err().contains("unknown-builtin"));
417    }
418
419    #[test]
420    fn test_match_exhaustiveness_error() {
421        use crate::compile_to_ir;
422
423        let source = r#"
424            union Result { Ok { value: Int } Err { msg: String } }
425
426            : handle ( Variant -- Int )
427              match
428                Ok -> drop 1
429                # Missing Err arm!
430              end
431            ;
432
433            : main ( -- ) 42 Make-Ok handle drop ;
434        "#;
435
436        let result = compile_to_ir(source);
437        assert!(result.is_err());
438        let err = result.unwrap_err();
439        assert!(err.contains("Non-exhaustive match"));
440        assert!(err.contains("Result"));
441        assert!(err.contains("Err"));
442    }
443
444    #[test]
445    fn test_match_exhaustive_compiles() {
446        use crate::compile_to_ir;
447
448        let source = r#"
449            union Result { Ok { value: Int } Err { msg: String } }
450
451            : handle ( Variant -- Int )
452              match
453                Ok -> drop 1
454                Err -> drop 0
455              end
456            ;
457
458            : main ( -- ) 42 Make-Ok handle drop ;
459        "#;
460
461        let result = compile_to_ir(source);
462        assert!(
463            result.is_ok(),
464            "Exhaustive match should compile: {:?}",
465            result
466        );
467    }
468
469    #[test]
470    fn test_codegen_symbol() {
471        // Test symbol literal codegen
472        let mut codegen = CodeGen::new();
473
474        let program = Program {
475            includes: vec![],
476            unions: vec![],
477            words: vec![WordDef {
478                name: "main".to_string(),
479                effect: None,
480                body: vec![
481                    Statement::Symbol("hello".to_string()),
482                    Statement::WordCall {
483                        name: "symbol->string".to_string(),
484                        span: None,
485                    },
486                    Statement::WordCall {
487                        name: "io.write-line".to_string(),
488                        span: None,
489                    },
490                ],
491                source: None,
492            }],
493        };
494
495        let ir = codegen
496            .codegen_program(&program, HashMap::new(), HashMap::new())
497            .unwrap();
498
499        assert!(ir.contains("call ptr @patch_seq_push_interned_symbol"));
500        assert!(ir.contains("call ptr @patch_seq_symbol_to_string"));
501        assert!(ir.contains("\"hello\\00\""));
502    }
503
504    #[test]
505    fn test_symbol_interning_dedup() {
506        // Issue #166: Test that duplicate symbol literals share the same global
507        let mut codegen = CodeGen::new();
508
509        let program = Program {
510            includes: vec![],
511            unions: vec![],
512            words: vec![WordDef {
513                name: "main".to_string(),
514                effect: None,
515                body: vec![
516                    // Use :hello twice - should share the same .sym global
517                    Statement::Symbol("hello".to_string()),
518                    Statement::Symbol("hello".to_string()),
519                    Statement::Symbol("world".to_string()), // Different symbol
520                ],
521                source: None,
522            }],
523        };
524
525        let ir = codegen
526            .codegen_program(&program, HashMap::new(), HashMap::new())
527            .unwrap();
528
529        // Should have exactly one .sym global for "hello" and one for "world"
530        // Count occurrences of symbol global definitions (lines starting with @.sym)
531        let sym_defs: Vec<_> = ir
532            .lines()
533            .filter(|l| l.trim().starts_with("@.sym."))
534            .collect();
535
536        // There should be 2 definitions: .sym.0 for "hello" and .sym.1 for "world"
537        assert_eq!(
538            sym_defs.len(),
539            2,
540            "Expected 2 symbol globals, got: {:?}",
541            sym_defs
542        );
543
544        // Verify deduplication: :hello appears twice but .sym.0 is reused
545        let hello_uses: usize = ir.matches("@.sym.0").count();
546        assert_eq!(
547            hello_uses, 3,
548            "Expected 3 occurrences of .sym.0 (1 def + 2 uses)"
549        );
550
551        // The IR should contain static symbol structure with capacity=0
552        assert!(
553            ir.contains("i64 0, i8 1"),
554            "Symbol global should have capacity=0 and global=1"
555        );
556    }
557
558    #[test]
559    fn test_dup_optimization_for_int() {
560        // Test that dup on Int uses optimized load/store instead of clone_value
561        // This verifies the Issue #186 optimization actually fires
562        let mut codegen = CodeGen::new();
563
564        use crate::types::Type;
565
566        let program = Program {
567            includes: vec![],
568            unions: vec![],
569            words: vec![
570                WordDef {
571                    name: "test_dup".to_string(),
572                    effect: None,
573                    body: vec![
574                        Statement::IntLiteral(42), // stmt 0: push Int
575                        Statement::WordCall {
576                            // stmt 1: dup
577                            name: "dup".to_string(),
578                            span: None,
579                        },
580                        Statement::WordCall {
581                            name: "drop".to_string(),
582                            span: None,
583                        },
584                        Statement::WordCall {
585                            name: "drop".to_string(),
586                            span: None,
587                        },
588                    ],
589                    source: None,
590                },
591                WordDef {
592                    name: "main".to_string(),
593                    effect: None,
594                    body: vec![Statement::WordCall {
595                        name: "test_dup".to_string(),
596                        span: None,
597                    }],
598                    source: None,
599                },
600            ],
601        };
602
603        // Provide type info: before statement 1 (dup), top of stack is Int
604        let mut statement_types = HashMap::new();
605        statement_types.insert(("test_dup".to_string(), 1), Type::Int);
606
607        let ir = codegen
608            .codegen_program(&program, HashMap::new(), statement_types)
609            .unwrap();
610
611        // Extract just the test_dup function
612        let func_start = ir.find("define tailcc ptr @seq_test_dup").unwrap();
613        let func_end = ir[func_start..].find("\n}\n").unwrap() + func_start + 3;
614        let test_dup_fn = &ir[func_start..func_end];
615
616        // The optimized path should use load/store directly (no clone_value call)
617        // In nanbox mode: load i64 / store i64
618        // In non-nanbox mode: load %Value / store %Value
619        #[cfg(not(feature = "nanbox"))]
620        {
621            assert!(
622                test_dup_fn.contains("load %Value"),
623                "Optimized dup should use 'load %Value', got:\n{}",
624                test_dup_fn
625            );
626            assert!(
627                test_dup_fn.contains("store %Value"),
628                "Optimized dup should use 'store %Value', got:\n{}",
629                test_dup_fn
630            );
631        }
632        #[cfg(feature = "nanbox")]
633        {
634            assert!(
635                test_dup_fn.contains("load i64"),
636                "Optimized dup should use 'load i64', got:\n{}",
637                test_dup_fn
638            );
639            assert!(
640                test_dup_fn.contains("store i64"),
641                "Optimized dup should use 'store i64', got:\n{}",
642                test_dup_fn
643            );
644        }
645
646        // The optimized path should NOT call clone_value
647        assert!(
648            !test_dup_fn.contains("@patch_seq_clone_value"),
649            "Optimized dup should NOT call clone_value for Int, got:\n{}",
650            test_dup_fn
651        );
652    }
653
654    #[test]
655    fn test_dup_optimization_after_literal() {
656        // Test Issue #195: dup after literal push uses optimized path
657        // Pattern: `42 dup` should be optimized even without type map info
658        let mut codegen = CodeGen::new();
659
660        let program = Program {
661            includes: vec![],
662            unions: vec![],
663            words: vec![
664                WordDef {
665                    name: "test_dup".to_string(),
666                    effect: None,
667                    body: vec![
668                        Statement::IntLiteral(42), // Previous statement is Int literal
669                        Statement::WordCall {
670                            // dup should be optimized
671                            name: "dup".to_string(),
672                            span: None,
673                        },
674                        Statement::WordCall {
675                            name: "drop".to_string(),
676                            span: None,
677                        },
678                        Statement::WordCall {
679                            name: "drop".to_string(),
680                            span: None,
681                        },
682                    ],
683                    source: None,
684                },
685                WordDef {
686                    name: "main".to_string(),
687                    effect: None,
688                    body: vec![Statement::WordCall {
689                        name: "test_dup".to_string(),
690                        span: None,
691                    }],
692                    source: None,
693                },
694            ],
695        };
696
697        // No type info provided - but literal heuristic should optimize
698        let ir = codegen
699            .codegen_program(&program, HashMap::new(), HashMap::new())
700            .unwrap();
701
702        // Extract just the test_dup function
703        let func_start = ir.find("define tailcc ptr @seq_test_dup").unwrap();
704        let func_end = ir[func_start..].find("\n}\n").unwrap() + func_start + 3;
705        let test_dup_fn = &ir[func_start..func_end];
706
707        // With literal heuristic, should use optimized path
708        #[cfg(not(feature = "nanbox"))]
709        {
710            assert!(
711                test_dup_fn.contains("load %Value"),
712                "Dup after int literal should use optimized load, got:\n{}",
713                test_dup_fn
714            );
715            assert!(
716                test_dup_fn.contains("store %Value"),
717                "Dup after int literal should use optimized store, got:\n{}",
718                test_dup_fn
719            );
720        }
721        #[cfg(feature = "nanbox")]
722        {
723            assert!(
724                test_dup_fn.contains("load i64"),
725                "Dup after int literal should use optimized load i64, got:\n{}",
726                test_dup_fn
727            );
728            assert!(
729                test_dup_fn.contains("store i64"),
730                "Dup after int literal should use optimized store i64, got:\n{}",
731                test_dup_fn
732            );
733        }
734        assert!(
735            !test_dup_fn.contains("@patch_seq_clone_value"),
736            "Dup after int literal should NOT call clone_value, got:\n{}",
737            test_dup_fn
738        );
739    }
740
741    #[test]
742    fn test_dup_no_optimization_after_word_call() {
743        // Test that dup after word call (unknown type) uses safe clone_value path
744        let mut codegen = CodeGen::new();
745
746        let program = Program {
747            includes: vec![],
748            unions: vec![],
749            words: vec![
750                WordDef {
751                    name: "get_value".to_string(),
752                    effect: None,
753                    body: vec![Statement::IntLiteral(42)],
754                    source: None,
755                },
756                WordDef {
757                    name: "test_dup".to_string(),
758                    effect: None,
759                    body: vec![
760                        Statement::WordCall {
761                            // Previous statement is word call (unknown type)
762                            name: "get_value".to_string(),
763                            span: None,
764                        },
765                        Statement::WordCall {
766                            // dup should NOT be optimized
767                            name: "dup".to_string(),
768                            span: None,
769                        },
770                        Statement::WordCall {
771                            name: "drop".to_string(),
772                            span: None,
773                        },
774                        Statement::WordCall {
775                            name: "drop".to_string(),
776                            span: None,
777                        },
778                    ],
779                    source: None,
780                },
781                WordDef {
782                    name: "main".to_string(),
783                    effect: None,
784                    body: vec![Statement::WordCall {
785                        name: "test_dup".to_string(),
786                        span: None,
787                    }],
788                    source: None,
789                },
790            ],
791        };
792
793        // No type info provided and no literal before dup
794        let ir = codegen
795            .codegen_program(&program, HashMap::new(), HashMap::new())
796            .unwrap();
797
798        // Extract just the test_dup function
799        let func_start = ir.find("define tailcc ptr @seq_test_dup").unwrap();
800        let func_end = ir[func_start..].find("\n}\n").unwrap() + func_start + 3;
801        let test_dup_fn = &ir[func_start..func_end];
802
803        // Without literal or type info, should call clone_value (safe path)
804        assert!(
805            test_dup_fn.contains("@patch_seq_clone_value"),
806            "Dup after word call should call clone_value, got:\n{}",
807            test_dup_fn
808        );
809    }
810
811    #[test]
812    fn test_roll_constant_optimization() {
813        // Test Issue #192: roll with constant N uses optimized inline code
814        // Pattern: `2 roll` should generate rot-like inline code
815        let mut codegen = CodeGen::new();
816
817        let program = Program {
818            includes: vec![],
819            unions: vec![],
820            words: vec![
821                WordDef {
822                    name: "test_roll".to_string(),
823                    effect: None,
824                    body: vec![
825                        Statement::IntLiteral(1),
826                        Statement::IntLiteral(2),
827                        Statement::IntLiteral(3),
828                        Statement::IntLiteral(2), // Constant N for roll
829                        Statement::WordCall {
830                            // 2 roll = rot
831                            name: "roll".to_string(),
832                            span: None,
833                        },
834                        Statement::WordCall {
835                            name: "drop".to_string(),
836                            span: None,
837                        },
838                        Statement::WordCall {
839                            name: "drop".to_string(),
840                            span: None,
841                        },
842                        Statement::WordCall {
843                            name: "drop".to_string(),
844                            span: None,
845                        },
846                    ],
847                    source: None,
848                },
849                WordDef {
850                    name: "main".to_string(),
851                    effect: None,
852                    body: vec![Statement::WordCall {
853                        name: "test_roll".to_string(),
854                        span: None,
855                    }],
856                    source: None,
857                },
858            ],
859        };
860
861        let ir = codegen
862            .codegen_program(&program, HashMap::new(), HashMap::new())
863            .unwrap();
864
865        // Extract just the test_roll function
866        let func_start = ir.find("define tailcc ptr @seq_test_roll").unwrap();
867        let func_end = ir[func_start..].find("\n}\n").unwrap() + func_start + 3;
868        let test_roll_fn = &ir[func_start..func_end];
869
870        // With constant N=2, should NOT do dynamic calculation
871        // Should NOT have dynamic add/sub for offset calculation
872        assert!(
873            !test_roll_fn.contains("= add i64 %"),
874            "Constant roll should use constant offset, not dynamic add, got:\n{}",
875            test_roll_fn
876        );
877
878        // Should NOT call memmove for small N (n=2 uses direct loads/stores)
879        assert!(
880            !test_roll_fn.contains("@llvm.memmove"),
881            "2 roll should not use memmove, got:\n{}",
882            test_roll_fn
883        );
884    }
885
886    #[test]
887    fn test_pick_constant_optimization() {
888        // Test Issue #192: pick with constant N uses constant offset
889        // Pattern: `1 pick` should generate code with constant -3 offset
890        let mut codegen = CodeGen::new();
891
892        let program = Program {
893            includes: vec![],
894            unions: vec![],
895            words: vec![
896                WordDef {
897                    name: "test_pick".to_string(),
898                    effect: None,
899                    body: vec![
900                        Statement::IntLiteral(10),
901                        Statement::IntLiteral(20),
902                        Statement::IntLiteral(1), // Constant N for pick
903                        Statement::WordCall {
904                            // 1 pick = over
905                            name: "pick".to_string(),
906                            span: None,
907                        },
908                        Statement::WordCall {
909                            name: "drop".to_string(),
910                            span: None,
911                        },
912                        Statement::WordCall {
913                            name: "drop".to_string(),
914                            span: None,
915                        },
916                        Statement::WordCall {
917                            name: "drop".to_string(),
918                            span: None,
919                        },
920                    ],
921                    source: None,
922                },
923                WordDef {
924                    name: "main".to_string(),
925                    effect: None,
926                    body: vec![Statement::WordCall {
927                        name: "test_pick".to_string(),
928                        span: None,
929                    }],
930                    source: None,
931                },
932            ],
933        };
934
935        let ir = codegen
936            .codegen_program(&program, HashMap::new(), HashMap::new())
937            .unwrap();
938
939        // Extract just the test_pick function
940        let func_start = ir.find("define tailcc ptr @seq_test_pick").unwrap();
941        let func_end = ir[func_start..].find("\n}\n").unwrap() + func_start + 3;
942        let test_pick_fn = &ir[func_start..func_end];
943
944        // With constant N=1, should use constant offset -3
945        // Should NOT have dynamic add/sub for offset calculation
946        assert!(
947            !test_pick_fn.contains("= add i64 %"),
948            "Constant pick should use constant offset, not dynamic add, got:\n{}",
949            test_pick_fn
950        );
951
952        // Should have the constant offset -3 in getelementptr
953        assert!(
954            test_pick_fn.contains("i64 -3"),
955            "1 pick should use offset -3 (-(1+2)), got:\n{}",
956            test_pick_fn
957        );
958    }
959
960    #[test]
961    fn test_small_word_marked_alwaysinline() {
962        // Test Issue #187: Small words get alwaysinline attribute
963        let mut codegen = CodeGen::new();
964
965        let program = Program {
966            includes: vec![],
967            unions: vec![],
968            words: vec![
969                WordDef {
970                    name: "double".to_string(), // Small word: dup i.+
971                    effect: None,
972                    body: vec![
973                        Statement::WordCall {
974                            name: "dup".to_string(),
975                            span: None,
976                        },
977                        Statement::WordCall {
978                            name: "i.+".to_string(),
979                            span: None,
980                        },
981                    ],
982                    source: None,
983                },
984                WordDef {
985                    name: "main".to_string(),
986                    effect: None,
987                    body: vec![
988                        Statement::IntLiteral(21),
989                        Statement::WordCall {
990                            name: "double".to_string(),
991                            span: None,
992                        },
993                    ],
994                    source: None,
995                },
996            ],
997        };
998
999        let ir = codegen
1000            .codegen_program(&program, HashMap::new(), HashMap::new())
1001            .unwrap();
1002
1003        // Small word 'double' should have alwaysinline attribute
1004        assert!(
1005            ir.contains("define tailcc ptr @seq_double(ptr %stack) alwaysinline"),
1006            "Small word should have alwaysinline attribute, got:\n{}",
1007            ir.lines()
1008                .filter(|l| l.contains("define"))
1009                .collect::<Vec<_>>()
1010                .join("\n")
1011        );
1012
1013        // main should NOT have alwaysinline (uses C calling convention)
1014        assert!(
1015            ir.contains("define ptr @seq_main(ptr %stack) {"),
1016            "main should not have alwaysinline, got:\n{}",
1017            ir.lines()
1018                .filter(|l| l.contains("define"))
1019                .collect::<Vec<_>>()
1020                .join("\n")
1021        );
1022    }
1023
1024    #[test]
1025    fn test_recursive_word_not_inlined() {
1026        // Test Issue #187: Recursive words should NOT get alwaysinline
1027        let mut codegen = CodeGen::new();
1028
1029        let program = Program {
1030            includes: vec![],
1031            unions: vec![],
1032            words: vec![
1033                WordDef {
1034                    name: "countdown".to_string(), // Recursive
1035                    effect: None,
1036                    body: vec![
1037                        Statement::WordCall {
1038                            name: "dup".to_string(),
1039                            span: None,
1040                        },
1041                        Statement::If {
1042                            then_branch: vec![
1043                                Statement::IntLiteral(1),
1044                                Statement::WordCall {
1045                                    name: "i.-".to_string(),
1046                                    span: None,
1047                                },
1048                                Statement::WordCall {
1049                                    name: "countdown".to_string(), // Recursive call
1050                                    span: None,
1051                                },
1052                            ],
1053                            else_branch: Some(vec![]),
1054                        },
1055                    ],
1056                    source: None,
1057                },
1058                WordDef {
1059                    name: "main".to_string(),
1060                    effect: None,
1061                    body: vec![
1062                        Statement::IntLiteral(5),
1063                        Statement::WordCall {
1064                            name: "countdown".to_string(),
1065                            span: None,
1066                        },
1067                    ],
1068                    source: None,
1069                },
1070            ],
1071        };
1072
1073        let ir = codegen
1074            .codegen_program(&program, HashMap::new(), HashMap::new())
1075            .unwrap();
1076
1077        // Recursive word should NOT have alwaysinline
1078        assert!(
1079            ir.contains("define tailcc ptr @seq_countdown(ptr %stack) {"),
1080            "Recursive word should NOT have alwaysinline, got:\n{}",
1081            ir.lines()
1082                .filter(|l| l.contains("define"))
1083                .collect::<Vec<_>>()
1084                .join("\n")
1085        );
1086    }
1087
1088    #[test]
1089    fn test_recursive_word_in_match_not_inlined() {
1090        // Test Issue #187: Recursive calls inside match arms should prevent inlining
1091        use crate::ast::{MatchArm, Pattern, UnionDef, UnionVariant};
1092
1093        let mut codegen = CodeGen::new();
1094
1095        let program = Program {
1096            includes: vec![],
1097            unions: vec![UnionDef {
1098                name: "Option".to_string(),
1099                variants: vec![
1100                    UnionVariant {
1101                        name: "Some".to_string(),
1102                        fields: vec![],
1103                        source: None,
1104                    },
1105                    UnionVariant {
1106                        name: "None".to_string(),
1107                        fields: vec![],
1108                        source: None,
1109                    },
1110                ],
1111                source: None,
1112            }],
1113            words: vec![
1114                WordDef {
1115                    name: "process".to_string(), // Recursive in match arm
1116                    effect: None,
1117                    body: vec![Statement::Match {
1118                        arms: vec![
1119                            MatchArm {
1120                                pattern: Pattern::Variant("Some".to_string()),
1121                                body: vec![Statement::WordCall {
1122                                    name: "process".to_string(), // Recursive call
1123                                    span: None,
1124                                }],
1125                            },
1126                            MatchArm {
1127                                pattern: Pattern::Variant("None".to_string()),
1128                                body: vec![],
1129                            },
1130                        ],
1131                    }],
1132                    source: None,
1133                },
1134                WordDef {
1135                    name: "main".to_string(),
1136                    effect: None,
1137                    body: vec![Statement::WordCall {
1138                        name: "process".to_string(),
1139                        span: None,
1140                    }],
1141                    source: None,
1142                },
1143            ],
1144        };
1145
1146        let ir = codegen
1147            .codegen_program(&program, HashMap::new(), HashMap::new())
1148            .unwrap();
1149
1150        // Recursive word (via match arm) should NOT have alwaysinline
1151        assert!(
1152            ir.contains("define tailcc ptr @seq_process(ptr %stack) {"),
1153            "Recursive word in match should NOT have alwaysinline, got:\n{}",
1154            ir.lines()
1155                .filter(|l| l.contains("define"))
1156                .collect::<Vec<_>>()
1157                .join("\n")
1158        );
1159    }
1160}