seqc/codegen/
mod.rs

1//! LLVM IR Code Generation
2//!
3//! This module generates LLVM IR as text (.ll files) for Seq programs.
4//! The code generation is split into focused submodules for maintainability.
5//!
6//! # Key Concepts
7//!
8//! ## Value Representation
9//!
10//! All Seq values use the `%Value` type, a 40-byte Rust enum with `#[repr(C)]`.
11//! Layout: `{ i64, i64, i64, i64, i64 }` (discriminant + largest variant payload).
12//! This fixed size allows pass-by-value, required for Alpine/musl compatibility.
13//!
14//! ## Calling Conventions
15//!
16//! - **User-defined words**: Use `tailcc` (tail call convention) to enable TCO.
17//!   Each word has two functions: a C-convention wrapper (`seq_word_*`) for
18//!   external calls and a `tailcc` implementation (`seq_word_*_impl`) for
19//!   internal calls that can use `musttail`.
20//!
21//! - **Runtime functions**: Use C convention (`ccc`). Declared in `runtime.rs`.
22//!
23//! - **Quotations**: Use C convention. Quotations are first-class functions that
24//!   capture their environment. They have wrapper/impl pairs but currently don't
25//!   support TCO due to closure complexity.
26//!
27//! ## Virtual Stack Optimization
28//!
29//! The top N values (default 4) are kept in SSA virtual registers instead of
30//! memory. This avoids store/load overhead for common patterns like `2 3 i.+`.
31//! Values are "spilled" to the memory stack at control flow points (if/else,
32//! loops) and function calls. See `virtual_stack.rs` and `VirtualValue` in
33//! `state.rs`.
34//!
35//! ## Tail Call Optimization (TCO)
36//!
37//! Word calls in tail position use LLVM's `musttail` for guaranteed TCO.
38//! A call is in tail position when it's the last operation before return.
39//! TCO is disabled in these contexts:
40//! - Inside `main` (uses C convention for entry point)
41//! - Inside quotations (closure semantics require stack frames)
42//! - Inside closures that capture variables
43//!
44//! ## Quotations and Closures
45//!
46//! Quotations (`[ ... ]`) compile to function pointers pushed onto the stack.
47//! - **Pure quotations**: No captured variables, just a function pointer.
48//! - **Closures**: Capture variables from enclosing scope. The runtime allocates
49//!   a closure struct containing the function pointer and captured values.
50//!
51//! Each quotation generates a wrapper function (C convention, for `call` builtin)
52//! and an impl function. Closure captures are analyzed at compile time by
53//! `capture_analysis.rs`.
54//!
55//! # Module Structure
56//!
57//! - `state.rs`: Core types (CodeGen, VirtualValue, TailPosition)
58//! - `program.rs`: Main entry points (codegen_program*)
59//! - `words.rs`: Word and quotation code generation
60//! - `statements.rs`: Statement dispatch and main function
61//! - `inline/`: Inline operation code generation (no runtime calls)
62//!   - `dispatch.rs`: Main inline dispatch logic
63//!   - `ops.rs`: Individual inline operations
64//! - `control_flow.rs`: If/else, match statements
65//! - `virtual_stack.rs`: Virtual register optimization
66//! - `types.rs`: Type helpers and exhaustiveness checking
67//! - `globals.rs`: String and symbol constants
68//! - `runtime.rs`: Runtime function declarations
69//! - `ffi_wrappers.rs`: FFI wrapper generation
70//! - `platform.rs`: Platform detection
71//! - `error.rs`: Error types
72
73// Submodules
74mod control_flow;
75mod error;
76mod ffi_wrappers;
77mod globals;
78mod inline;
79mod platform;
80mod program;
81mod runtime;
82mod state;
83mod statements;
84mod types;
85mod virtual_stack;
86mod words;
87
88// Public re-exports
89pub use error::CodeGenError;
90pub use platform::{ffi_c_args, ffi_return_type, get_target_triple};
91pub use runtime::{BUILTIN_SYMBOLS, RUNTIME_DECLARATIONS, emit_runtime_decls};
92pub use state::CodeGen;
93
94// Internal re-exports for submodules
95use state::{
96    BranchResult, MAX_VIRTUAL_STACK, QuotationFunctions, TailPosition, UNREACHABLE_PREDECESSOR,
97    VirtualValue, mangle_name,
98};
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103    use crate::ast::{Program, Statement, WordDef};
104    use std::collections::HashMap;
105
106    #[test]
107    fn test_codegen_hello_world() {
108        let mut codegen = CodeGen::new();
109
110        let program = Program {
111            includes: vec![],
112            unions: vec![],
113            words: vec![WordDef {
114                name: "main".to_string(),
115                effect: None,
116                body: vec![
117                    Statement::StringLiteral("Hello, World!".to_string()),
118                    Statement::WordCall {
119                        name: "io.write-line".to_string(),
120                        span: None,
121                    },
122                ],
123                source: None,
124                allowed_lints: vec![],
125            }],
126        };
127
128        let ir = codegen
129            .codegen_program(&program, HashMap::new(), HashMap::new())
130            .unwrap();
131
132        assert!(ir.contains("define i32 @main(i32 %argc, ptr %argv)"));
133        // main uses C calling convention (no tailcc) since it's called from C runtime
134        assert!(ir.contains("define ptr @seq_main(ptr %stack)"));
135        assert!(ir.contains("call ptr @patch_seq_push_string"));
136        assert!(ir.contains("call ptr @patch_seq_write_line"));
137        assert!(ir.contains("\"Hello, World!\\00\""));
138    }
139
140    #[test]
141    fn test_codegen_io_write() {
142        // Test io.write (write without newline)
143        let mut codegen = CodeGen::new();
144
145        let program = Program {
146            includes: vec![],
147            unions: vec![],
148            words: vec![WordDef {
149                name: "main".to_string(),
150                effect: None,
151                body: vec![
152                    Statement::StringLiteral("no newline".to_string()),
153                    Statement::WordCall {
154                        name: "io.write".to_string(),
155                        span: None,
156                    },
157                ],
158                source: None,
159                allowed_lints: vec![],
160            }],
161        };
162
163        let ir = codegen
164            .codegen_program(&program, HashMap::new(), HashMap::new())
165            .unwrap();
166
167        assert!(ir.contains("call ptr @patch_seq_push_string"));
168        assert!(ir.contains("call ptr @patch_seq_write"));
169        assert!(ir.contains("\"no newline\\00\""));
170    }
171
172    #[test]
173    fn test_codegen_arithmetic() {
174        // Test inline tagged stack arithmetic with virtual registers (Issue #189)
175        let mut codegen = CodeGen::new();
176
177        let program = Program {
178            includes: vec![],
179            unions: vec![],
180            words: vec![WordDef {
181                name: "main".to_string(),
182                effect: None,
183                body: vec![
184                    Statement::IntLiteral(2),
185                    Statement::IntLiteral(3),
186                    Statement::WordCall {
187                        name: "i.add".to_string(),
188                        span: None,
189                    },
190                ],
191                source: None,
192                allowed_lints: vec![],
193            }],
194        };
195
196        let ir = codegen
197            .codegen_program(&program, HashMap::new(), HashMap::new())
198            .unwrap();
199
200        // Issue #189: With virtual registers, integers are kept in SSA variables
201        // Using identity add: %n = add i64 0, <value>
202        assert!(ir.contains("add i64 0, 2"), "Should create SSA var for 2");
203        assert!(ir.contains("add i64 0, 3"), "Should create SSA var for 3");
204        // The add operation uses virtual registers directly
205        assert!(ir.contains("add i64 %"), "Should add SSA variables");
206    }
207
208    #[test]
209    fn test_pure_inline_test_mode() {
210        let mut codegen = CodeGen::new_pure_inline_test();
211
212        // Simple program: 5 3 add (should return 8)
213        let program = Program {
214            includes: vec![],
215            unions: vec![],
216            words: vec![WordDef {
217                name: "main".to_string(),
218                effect: None,
219                body: vec![
220                    Statement::IntLiteral(5),
221                    Statement::IntLiteral(3),
222                    Statement::WordCall {
223                        name: "i.add".to_string(),
224                        span: None,
225                    },
226                ],
227                source: None,
228                allowed_lints: vec![],
229            }],
230        };
231
232        let ir = codegen
233            .codegen_program(&program, HashMap::new(), HashMap::new())
234            .unwrap();
235
236        // Pure inline test mode should:
237        // 1. NOT CALL the scheduler (declarations are ok, calls are not)
238        assert!(!ir.contains("call void @patch_seq_scheduler_init"));
239        assert!(!ir.contains("call i64 @patch_seq_strand_spawn"));
240
241        // 2. Have main allocate tagged stack and call seq_main directly
242        assert!(ir.contains("call ptr @seq_stack_new_default()"));
243        assert!(ir.contains("call ptr @seq_main(ptr %stack_base)"));
244
245        // 3. Read result from stack and return as exit code
246        assert!(ir.contains("trunc i64 %result to i32"));
247        assert!(ir.contains("ret i32 %exit_code"));
248
249        // 4. Use inline push with virtual registers (Issue #189)
250        assert!(!ir.contains("call ptr @patch_seq_push_int"));
251        // Values are kept in SSA variables via identity add
252        assert!(ir.contains("add i64 0, 5"), "Should create SSA var for 5");
253        assert!(ir.contains("add i64 0, 3"), "Should create SSA var for 3");
254
255        // 5. Use inline add with virtual registers (add i64 %, not call patch_seq_add)
256        assert!(!ir.contains("call ptr @patch_seq_add"));
257        assert!(ir.contains("add i64 %"), "Should add SSA variables");
258    }
259
260    #[test]
261    fn test_escape_llvm_string() {
262        assert_eq!(CodeGen::escape_llvm_string("hello").unwrap(), "hello");
263        assert_eq!(CodeGen::escape_llvm_string("a\nb").unwrap(), r"a\0Ab");
264        assert_eq!(CodeGen::escape_llvm_string("a\tb").unwrap(), r"a\09b");
265        assert_eq!(CodeGen::escape_llvm_string("a\"b").unwrap(), r"a\22b");
266    }
267
268    #[test]
269    #[allow(deprecated)] // Testing codegen in isolation, not full pipeline
270    fn test_external_builtins_declared() {
271        use crate::config::{CompilerConfig, ExternalBuiltin};
272
273        let mut codegen = CodeGen::new();
274
275        let program = Program {
276            includes: vec![],
277            unions: vec![],
278            words: vec![WordDef {
279                name: "main".to_string(),
280                effect: None, // Codegen doesn't check effects
281                body: vec![
282                    Statement::IntLiteral(42),
283                    Statement::WordCall {
284                        name: "my-external-op".to_string(),
285                        span: None,
286                    },
287                ],
288                source: None,
289                allowed_lints: vec![],
290            }],
291        };
292
293        let config = CompilerConfig::new()
294            .with_builtin(ExternalBuiltin::new("my-external-op", "test_runtime_my_op"));
295
296        let ir = codegen
297            .codegen_program_with_config(&program, HashMap::new(), HashMap::new(), &config)
298            .unwrap();
299
300        // Should declare the external builtin
301        assert!(
302            ir.contains("declare ptr @test_runtime_my_op(ptr)"),
303            "IR should declare external builtin"
304        );
305
306        // Should call the external builtin
307        assert!(
308            ir.contains("call ptr @test_runtime_my_op"),
309            "IR should call external builtin"
310        );
311    }
312
313    #[test]
314    #[allow(deprecated)] // Testing codegen in isolation, not full pipeline
315    fn test_multiple_external_builtins() {
316        use crate::config::{CompilerConfig, ExternalBuiltin};
317
318        let mut codegen = CodeGen::new();
319
320        let program = Program {
321            includes: vec![],
322            unions: vec![],
323            words: vec![WordDef {
324                name: "main".to_string(),
325                effect: None, // Codegen doesn't check effects
326                body: vec![
327                    Statement::WordCall {
328                        name: "actor-self".to_string(),
329                        span: None,
330                    },
331                    Statement::WordCall {
332                        name: "journal-append".to_string(),
333                        span: None,
334                    },
335                ],
336                source: None,
337                allowed_lints: vec![],
338            }],
339        };
340
341        let config = CompilerConfig::new()
342            .with_builtin(ExternalBuiltin::new("actor-self", "seq_actors_self"))
343            .with_builtin(ExternalBuiltin::new(
344                "journal-append",
345                "seq_actors_journal_append",
346            ));
347
348        let ir = codegen
349            .codegen_program_with_config(&program, HashMap::new(), HashMap::new(), &config)
350            .unwrap();
351
352        // Should declare both external builtins
353        assert!(ir.contains("declare ptr @seq_actors_self(ptr)"));
354        assert!(ir.contains("declare ptr @seq_actors_journal_append(ptr)"));
355
356        // Should call both
357        assert!(ir.contains("call ptr @seq_actors_self"));
358        assert!(ir.contains("call ptr @seq_actors_journal_append"));
359    }
360
361    #[test]
362    #[allow(deprecated)] // Testing config builder, not full pipeline
363    fn test_external_builtins_with_library_paths() {
364        use crate::config::{CompilerConfig, ExternalBuiltin};
365
366        let config = CompilerConfig::new()
367            .with_builtin(ExternalBuiltin::new("my-op", "runtime_my_op"))
368            .with_library_path("/custom/lib")
369            .with_library("myruntime");
370
371        assert_eq!(config.external_builtins.len(), 1);
372        assert_eq!(config.library_paths, vec!["/custom/lib"]);
373        assert_eq!(config.libraries, vec!["myruntime"]);
374    }
375
376    #[test]
377    fn test_external_builtin_full_pipeline() {
378        // Test that external builtins work through the full compile pipeline
379        // including parser, AST validation, type checker, and codegen
380        use crate::compile_to_ir_with_config;
381        use crate::config::{CompilerConfig, ExternalBuiltin};
382        use crate::types::{Effect, StackType, Type};
383
384        let source = r#"
385            : main ( -- Int )
386              42 my-transform
387              0
388            ;
389        "#;
390
391        // External builtins must have explicit effects (v2.0 requirement)
392        let effect = Effect::new(StackType::singleton(Type::Int), StackType::Empty);
393        let config = CompilerConfig::new().with_builtin(ExternalBuiltin::with_effect(
394            "my-transform",
395            "ext_runtime_transform",
396            effect,
397        ));
398
399        // This should succeed - the external builtin is registered
400        let result = compile_to_ir_with_config(source, &config);
401        assert!(
402            result.is_ok(),
403            "Compilation should succeed: {:?}",
404            result.err()
405        );
406
407        let ir = result.unwrap();
408        assert!(ir.contains("declare ptr @ext_runtime_transform(ptr)"));
409        assert!(ir.contains("call ptr @ext_runtime_transform"));
410    }
411
412    #[test]
413    fn test_external_builtin_without_config_fails() {
414        // Test that using an external builtin without config fails validation
415        use crate::compile_to_ir;
416
417        let source = r#"
418            : main ( -- Int )
419              42 unknown-builtin
420              0
421            ;
422        "#;
423
424        // This should fail - unknown-builtin is not registered
425        let result = compile_to_ir(source);
426        assert!(result.is_err());
427        assert!(result.unwrap_err().contains("unknown-builtin"));
428    }
429
430    #[test]
431    fn test_match_exhaustiveness_error() {
432        use crate::compile_to_ir;
433
434        let source = r#"
435            union Result { Ok { value: Int } Err { msg: String } }
436
437            : handle ( Variant -- Int )
438              match
439                Ok -> drop 1
440                # Missing Err arm!
441              end
442            ;
443
444            : main ( -- ) 42 Make-Ok handle drop ;
445        "#;
446
447        let result = compile_to_ir(source);
448        assert!(result.is_err());
449        let err = result.unwrap_err();
450        assert!(err.contains("Non-exhaustive match"));
451        assert!(err.contains("Result"));
452        assert!(err.contains("Err"));
453    }
454
455    #[test]
456    fn test_match_exhaustive_compiles() {
457        use crate::compile_to_ir;
458
459        let source = r#"
460            union Result { Ok { value: Int } Err { msg: String } }
461
462            : handle ( Variant -- Int )
463              match
464                Ok -> drop 1
465                Err -> drop 0
466              end
467            ;
468
469            : main ( -- ) 42 Make-Ok handle drop ;
470        "#;
471
472        let result = compile_to_ir(source);
473        assert!(
474            result.is_ok(),
475            "Exhaustive match should compile: {:?}",
476            result
477        );
478    }
479
480    #[test]
481    fn test_codegen_symbol() {
482        // Test symbol literal codegen
483        let mut codegen = CodeGen::new();
484
485        let program = Program {
486            includes: vec![],
487            unions: vec![],
488            words: vec![WordDef {
489                name: "main".to_string(),
490                effect: None,
491                body: vec![
492                    Statement::Symbol("hello".to_string()),
493                    Statement::WordCall {
494                        name: "symbol->string".to_string(),
495                        span: None,
496                    },
497                    Statement::WordCall {
498                        name: "io.write-line".to_string(),
499                        span: None,
500                    },
501                ],
502                source: None,
503                allowed_lints: vec![],
504            }],
505        };
506
507        let ir = codegen
508            .codegen_program(&program, HashMap::new(), HashMap::new())
509            .unwrap();
510
511        assert!(ir.contains("call ptr @patch_seq_push_interned_symbol"));
512        assert!(ir.contains("call ptr @patch_seq_symbol_to_string"));
513        assert!(ir.contains("\"hello\\00\""));
514    }
515
516    #[test]
517    fn test_symbol_interning_dedup() {
518        // Issue #166: Test that duplicate symbol literals share the same global
519        let mut codegen = CodeGen::new();
520
521        let program = Program {
522            includes: vec![],
523            unions: vec![],
524            words: vec![WordDef {
525                name: "main".to_string(),
526                effect: None,
527                body: vec![
528                    // Use :hello twice - should share the same .sym global
529                    Statement::Symbol("hello".to_string()),
530                    Statement::Symbol("hello".to_string()),
531                    Statement::Symbol("world".to_string()), // Different symbol
532                ],
533                source: None,
534                allowed_lints: vec![],
535            }],
536        };
537
538        let ir = codegen
539            .codegen_program(&program, HashMap::new(), HashMap::new())
540            .unwrap();
541
542        // Should have exactly one .sym global for "hello" and one for "world"
543        // Count occurrences of symbol global definitions (lines starting with @.sym)
544        let sym_defs: Vec<_> = ir
545            .lines()
546            .filter(|l| l.trim().starts_with("@.sym."))
547            .collect();
548
549        // There should be 2 definitions: .sym.0 for "hello" and .sym.1 for "world"
550        assert_eq!(
551            sym_defs.len(),
552            2,
553            "Expected 2 symbol globals, got: {:?}",
554            sym_defs
555        );
556
557        // Verify deduplication: :hello appears twice but .sym.0 is reused
558        let hello_uses: usize = ir.matches("@.sym.0").count();
559        assert_eq!(
560            hello_uses, 3,
561            "Expected 3 occurrences of .sym.0 (1 def + 2 uses)"
562        );
563
564        // The IR should contain static symbol structure with capacity=0
565        assert!(
566            ir.contains("i64 0, i8 1"),
567            "Symbol global should have capacity=0 and global=1"
568        );
569    }
570
571    #[test]
572    fn test_dup_optimization_for_int() {
573        // Test that dup on Int uses optimized load/store instead of clone_value
574        // This verifies the Issue #186 optimization actually fires
575        let mut codegen = CodeGen::new();
576
577        use crate::types::Type;
578
579        let program = Program {
580            includes: vec![],
581            unions: vec![],
582            words: vec![
583                WordDef {
584                    name: "test_dup".to_string(),
585                    effect: None,
586                    body: vec![
587                        Statement::IntLiteral(42), // stmt 0: push Int
588                        Statement::WordCall {
589                            // stmt 1: dup
590                            name: "dup".to_string(),
591                            span: None,
592                        },
593                        Statement::WordCall {
594                            name: "drop".to_string(),
595                            span: None,
596                        },
597                        Statement::WordCall {
598                            name: "drop".to_string(),
599                            span: None,
600                        },
601                    ],
602                    source: None,
603                    allowed_lints: vec![],
604                },
605                WordDef {
606                    name: "main".to_string(),
607                    effect: None,
608                    body: vec![Statement::WordCall {
609                        name: "test_dup".to_string(),
610                        span: None,
611                    }],
612                    source: None,
613                    allowed_lints: vec![],
614                },
615            ],
616        };
617
618        // Provide type info: before statement 1 (dup), top of stack is Int
619        let mut statement_types = HashMap::new();
620        statement_types.insert(("test_dup".to_string(), 1), Type::Int);
621
622        let ir = codegen
623            .codegen_program(&program, HashMap::new(), statement_types)
624            .unwrap();
625
626        // Extract just the test_dup function
627        let func_start = ir.find("define tailcc ptr @seq_test_dup").unwrap();
628        let func_end = ir[func_start..].find("\n}\n").unwrap() + func_start + 3;
629        let test_dup_fn = &ir[func_start..func_end];
630
631        // The optimized path should use load/store directly (no clone_value call)
632        assert!(
633            test_dup_fn.contains("load %Value"),
634            "Optimized dup should use 'load %Value', got:\n{}",
635            test_dup_fn
636        );
637        assert!(
638            test_dup_fn.contains("store %Value"),
639            "Optimized dup should use 'store %Value', got:\n{}",
640            test_dup_fn
641        );
642
643        // The optimized path should NOT call clone_value
644        assert!(
645            !test_dup_fn.contains("@patch_seq_clone_value"),
646            "Optimized dup should NOT call clone_value for Int, got:\n{}",
647            test_dup_fn
648        );
649    }
650
651    #[test]
652    fn test_dup_optimization_after_literal() {
653        // Test Issue #195: dup after literal push uses optimized path
654        // Pattern: `42 dup` should be optimized even without type map info
655        let mut codegen = CodeGen::new();
656
657        let program = Program {
658            includes: vec![],
659            unions: vec![],
660            words: vec![
661                WordDef {
662                    name: "test_dup".to_string(),
663                    effect: None,
664                    body: vec![
665                        Statement::IntLiteral(42), // Previous statement is Int literal
666                        Statement::WordCall {
667                            // dup should be optimized
668                            name: "dup".to_string(),
669                            span: None,
670                        },
671                        Statement::WordCall {
672                            name: "drop".to_string(),
673                            span: None,
674                        },
675                        Statement::WordCall {
676                            name: "drop".to_string(),
677                            span: None,
678                        },
679                    ],
680                    source: None,
681                    allowed_lints: vec![],
682                },
683                WordDef {
684                    name: "main".to_string(),
685                    effect: None,
686                    body: vec![Statement::WordCall {
687                        name: "test_dup".to_string(),
688                        span: None,
689                    }],
690                    source: None,
691                    allowed_lints: vec![],
692                },
693            ],
694        };
695
696        // No type info provided - but literal heuristic should optimize
697        let ir = codegen
698            .codegen_program(&program, HashMap::new(), HashMap::new())
699            .unwrap();
700
701        // Extract just the test_dup function
702        let func_start = ir.find("define tailcc ptr @seq_test_dup").unwrap();
703        let func_end = ir[func_start..].find("\n}\n").unwrap() + func_start + 3;
704        let test_dup_fn = &ir[func_start..func_end];
705
706        // With literal heuristic, should use optimized path
707        assert!(
708            test_dup_fn.contains("load %Value"),
709            "Dup after int literal should use optimized load, got:\n{}",
710            test_dup_fn
711        );
712        assert!(
713            test_dup_fn.contains("store %Value"),
714            "Dup after int literal should use optimized store, got:\n{}",
715            test_dup_fn
716        );
717        assert!(
718            !test_dup_fn.contains("@patch_seq_clone_value"),
719            "Dup after int literal should NOT call clone_value, got:\n{}",
720            test_dup_fn
721        );
722    }
723
724    #[test]
725    fn test_dup_no_optimization_after_word_call() {
726        // Test that dup after word call (unknown type) uses safe clone_value path
727        let mut codegen = CodeGen::new();
728
729        let program = Program {
730            includes: vec![],
731            unions: vec![],
732            words: vec![
733                WordDef {
734                    name: "get_value".to_string(),
735                    effect: None,
736                    body: vec![Statement::IntLiteral(42)],
737                    source: None,
738                    allowed_lints: vec![],
739                },
740                WordDef {
741                    name: "test_dup".to_string(),
742                    effect: None,
743                    body: vec![
744                        Statement::WordCall {
745                            // Previous statement is word call (unknown type)
746                            name: "get_value".to_string(),
747                            span: None,
748                        },
749                        Statement::WordCall {
750                            // dup should NOT be optimized
751                            name: "dup".to_string(),
752                            span: None,
753                        },
754                        Statement::WordCall {
755                            name: "drop".to_string(),
756                            span: None,
757                        },
758                        Statement::WordCall {
759                            name: "drop".to_string(),
760                            span: None,
761                        },
762                    ],
763                    source: None,
764                    allowed_lints: vec![],
765                },
766                WordDef {
767                    name: "main".to_string(),
768                    effect: None,
769                    body: vec![Statement::WordCall {
770                        name: "test_dup".to_string(),
771                        span: None,
772                    }],
773                    source: None,
774                    allowed_lints: vec![],
775                },
776            ],
777        };
778
779        // No type info provided and no literal before dup
780        let ir = codegen
781            .codegen_program(&program, HashMap::new(), HashMap::new())
782            .unwrap();
783
784        // Extract just the test_dup function
785        let func_start = ir.find("define tailcc ptr @seq_test_dup").unwrap();
786        let func_end = ir[func_start..].find("\n}\n").unwrap() + func_start + 3;
787        let test_dup_fn = &ir[func_start..func_end];
788
789        // Without literal or type info, should call clone_value (safe path)
790        assert!(
791            test_dup_fn.contains("@patch_seq_clone_value"),
792            "Dup after word call should call clone_value, got:\n{}",
793            test_dup_fn
794        );
795    }
796
797    #[test]
798    fn test_roll_constant_optimization() {
799        // Test Issue #192: roll with constant N uses optimized inline code
800        // Pattern: `2 roll` should generate rot-like inline code
801        let mut codegen = CodeGen::new();
802
803        let program = Program {
804            includes: vec![],
805            unions: vec![],
806            words: vec![
807                WordDef {
808                    name: "test_roll".to_string(),
809                    effect: None,
810                    body: vec![
811                        Statement::IntLiteral(1),
812                        Statement::IntLiteral(2),
813                        Statement::IntLiteral(3),
814                        Statement::IntLiteral(2), // Constant N for roll
815                        Statement::WordCall {
816                            // 2 roll = rot
817                            name: "roll".to_string(),
818                            span: None,
819                        },
820                        Statement::WordCall {
821                            name: "drop".to_string(),
822                            span: None,
823                        },
824                        Statement::WordCall {
825                            name: "drop".to_string(),
826                            span: None,
827                        },
828                        Statement::WordCall {
829                            name: "drop".to_string(),
830                            span: None,
831                        },
832                    ],
833                    source: None,
834                    allowed_lints: vec![],
835                },
836                WordDef {
837                    name: "main".to_string(),
838                    effect: None,
839                    body: vec![Statement::WordCall {
840                        name: "test_roll".to_string(),
841                        span: None,
842                    }],
843                    source: None,
844                    allowed_lints: vec![],
845                },
846            ],
847        };
848
849        let ir = codegen
850            .codegen_program(&program, HashMap::new(), HashMap::new())
851            .unwrap();
852
853        // Extract just the test_roll function
854        let func_start = ir.find("define tailcc ptr @seq_test_roll").unwrap();
855        let func_end = ir[func_start..].find("\n}\n").unwrap() + func_start + 3;
856        let test_roll_fn = &ir[func_start..func_end];
857
858        // With constant N=2, should NOT do dynamic calculation
859        // Should NOT have dynamic add/sub for offset calculation
860        assert!(
861            !test_roll_fn.contains("= add i64 %"),
862            "Constant roll should use constant offset, not dynamic add, got:\n{}",
863            test_roll_fn
864        );
865
866        // Should NOT call memmove for small N (n=2 uses direct loads/stores)
867        assert!(
868            !test_roll_fn.contains("@llvm.memmove"),
869            "2 roll should not use memmove, got:\n{}",
870            test_roll_fn
871        );
872    }
873
874    #[test]
875    fn test_pick_constant_optimization() {
876        // Test Issue #192: pick with constant N uses constant offset
877        // Pattern: `1 pick` should generate code with constant -3 offset
878        let mut codegen = CodeGen::new();
879
880        let program = Program {
881            includes: vec![],
882            unions: vec![],
883            words: vec![
884                WordDef {
885                    name: "test_pick".to_string(),
886                    effect: None,
887                    body: vec![
888                        Statement::IntLiteral(10),
889                        Statement::IntLiteral(20),
890                        Statement::IntLiteral(1), // Constant N for pick
891                        Statement::WordCall {
892                            // 1 pick = over
893                            name: "pick".to_string(),
894                            span: None,
895                        },
896                        Statement::WordCall {
897                            name: "drop".to_string(),
898                            span: None,
899                        },
900                        Statement::WordCall {
901                            name: "drop".to_string(),
902                            span: None,
903                        },
904                        Statement::WordCall {
905                            name: "drop".to_string(),
906                            span: None,
907                        },
908                    ],
909                    source: None,
910                    allowed_lints: vec![],
911                },
912                WordDef {
913                    name: "main".to_string(),
914                    effect: None,
915                    body: vec![Statement::WordCall {
916                        name: "test_pick".to_string(),
917                        span: None,
918                    }],
919                    source: None,
920                    allowed_lints: vec![],
921                },
922            ],
923        };
924
925        let ir = codegen
926            .codegen_program(&program, HashMap::new(), HashMap::new())
927            .unwrap();
928
929        // Extract just the test_pick function
930        let func_start = ir.find("define tailcc ptr @seq_test_pick").unwrap();
931        let func_end = ir[func_start..].find("\n}\n").unwrap() + func_start + 3;
932        let test_pick_fn = &ir[func_start..func_end];
933
934        // With constant N=1, should use constant offset -3
935        // Should NOT have dynamic add/sub for offset calculation
936        assert!(
937            !test_pick_fn.contains("= add i64 %"),
938            "Constant pick should use constant offset, not dynamic add, got:\n{}",
939            test_pick_fn
940        );
941
942        // Should have the constant offset -3 in getelementptr
943        assert!(
944            test_pick_fn.contains("i64 -3"),
945            "1 pick should use offset -3 (-(1+2)), got:\n{}",
946            test_pick_fn
947        );
948    }
949
950    #[test]
951    fn test_small_word_marked_alwaysinline() {
952        // Test Issue #187: Small words get alwaysinline attribute
953        let mut codegen = CodeGen::new();
954
955        let program = Program {
956            includes: vec![],
957            unions: vec![],
958            words: vec![
959                WordDef {
960                    name: "double".to_string(), // Small word: dup i.+
961                    effect: None,
962                    body: vec![
963                        Statement::WordCall {
964                            name: "dup".to_string(),
965                            span: None,
966                        },
967                        Statement::WordCall {
968                            name: "i.+".to_string(),
969                            span: None,
970                        },
971                    ],
972                    source: None,
973                    allowed_lints: vec![],
974                },
975                WordDef {
976                    name: "main".to_string(),
977                    effect: None,
978                    body: vec![
979                        Statement::IntLiteral(21),
980                        Statement::WordCall {
981                            name: "double".to_string(),
982                            span: None,
983                        },
984                    ],
985                    source: None,
986                    allowed_lints: vec![],
987                },
988            ],
989        };
990
991        let ir = codegen
992            .codegen_program(&program, HashMap::new(), HashMap::new())
993            .unwrap();
994
995        // Small word 'double' should have alwaysinline attribute
996        assert!(
997            ir.contains("define tailcc ptr @seq_double(ptr %stack) alwaysinline"),
998            "Small word should have alwaysinline attribute, got:\n{}",
999            ir.lines()
1000                .filter(|l| l.contains("define"))
1001                .collect::<Vec<_>>()
1002                .join("\n")
1003        );
1004
1005        // main should NOT have alwaysinline (uses C calling convention)
1006        assert!(
1007            ir.contains("define ptr @seq_main(ptr %stack) {"),
1008            "main should not have alwaysinline, got:\n{}",
1009            ir.lines()
1010                .filter(|l| l.contains("define"))
1011                .collect::<Vec<_>>()
1012                .join("\n")
1013        );
1014    }
1015
1016    #[test]
1017    fn test_recursive_word_not_inlined() {
1018        // Test Issue #187: Recursive words should NOT get alwaysinline
1019        let mut codegen = CodeGen::new();
1020
1021        let program = Program {
1022            includes: vec![],
1023            unions: vec![],
1024            words: vec![
1025                WordDef {
1026                    name: "countdown".to_string(), // Recursive
1027                    effect: None,
1028                    body: vec![
1029                        Statement::WordCall {
1030                            name: "dup".to_string(),
1031                            span: None,
1032                        },
1033                        Statement::If {
1034                            then_branch: vec![
1035                                Statement::IntLiteral(1),
1036                                Statement::WordCall {
1037                                    name: "i.-".to_string(),
1038                                    span: None,
1039                                },
1040                                Statement::WordCall {
1041                                    name: "countdown".to_string(), // Recursive call
1042                                    span: None,
1043                                },
1044                            ],
1045                            else_branch: Some(vec![]),
1046                        },
1047                    ],
1048                    source: None,
1049                    allowed_lints: vec![],
1050                },
1051                WordDef {
1052                    name: "main".to_string(),
1053                    effect: None,
1054                    body: vec![
1055                        Statement::IntLiteral(5),
1056                        Statement::WordCall {
1057                            name: "countdown".to_string(),
1058                            span: None,
1059                        },
1060                    ],
1061                    source: None,
1062                    allowed_lints: vec![],
1063                },
1064            ],
1065        };
1066
1067        let ir = codegen
1068            .codegen_program(&program, HashMap::new(), HashMap::new())
1069            .unwrap();
1070
1071        // Recursive word should NOT have alwaysinline
1072        assert!(
1073            ir.contains("define tailcc ptr @seq_countdown(ptr %stack) {"),
1074            "Recursive word should NOT have alwaysinline, got:\n{}",
1075            ir.lines()
1076                .filter(|l| l.contains("define"))
1077                .collect::<Vec<_>>()
1078                .join("\n")
1079        );
1080    }
1081
1082    #[test]
1083    fn test_recursive_word_in_match_not_inlined() {
1084        // Test Issue #187: Recursive calls inside match arms should prevent inlining
1085        use crate::ast::{MatchArm, Pattern, UnionDef, UnionVariant};
1086
1087        let mut codegen = CodeGen::new();
1088
1089        let program = Program {
1090            includes: vec![],
1091            unions: vec![UnionDef {
1092                name: "Option".to_string(),
1093                variants: vec![
1094                    UnionVariant {
1095                        name: "Some".to_string(),
1096                        fields: vec![],
1097                        source: None,
1098                    },
1099                    UnionVariant {
1100                        name: "None".to_string(),
1101                        fields: vec![],
1102                        source: None,
1103                    },
1104                ],
1105                source: None,
1106            }],
1107            words: vec![
1108                WordDef {
1109                    name: "process".to_string(), // Recursive in match arm
1110                    effect: None,
1111                    body: vec![Statement::Match {
1112                        arms: vec![
1113                            MatchArm {
1114                                pattern: Pattern::Variant("Some".to_string()),
1115                                body: vec![Statement::WordCall {
1116                                    name: "process".to_string(), // Recursive call
1117                                    span: None,
1118                                }],
1119                            },
1120                            MatchArm {
1121                                pattern: Pattern::Variant("None".to_string()),
1122                                body: vec![],
1123                            },
1124                        ],
1125                    }],
1126                    source: None,
1127                    allowed_lints: vec![],
1128                },
1129                WordDef {
1130                    name: "main".to_string(),
1131                    effect: None,
1132                    body: vec![Statement::WordCall {
1133                        name: "process".to_string(),
1134                        span: None,
1135                    }],
1136                    source: None,
1137                    allowed_lints: vec![],
1138                },
1139            ],
1140        };
1141
1142        let ir = codegen
1143            .codegen_program(&program, HashMap::new(), HashMap::new())
1144            .unwrap();
1145
1146        // Recursive word (via match arm) should NOT have alwaysinline
1147        assert!(
1148            ir.contains("define tailcc ptr @seq_process(ptr %stack) {"),
1149            "Recursive word in match should NOT have alwaysinline, got:\n{}",
1150            ir.lines()
1151                .filter(|l| l.contains("define"))
1152                .collect::<Vec<_>>()
1153                .join("\n")
1154        );
1155    }
1156}