seqc/codegen/
state.rs

1//! CodeGen State and Core Types
2//!
3//! This module contains the CodeGen struct definition and core types
4//! used across the code generation modules.
5
6use crate::ast::UnionDef;
7use crate::ffi::FfiBindings;
8use crate::types::Type;
9use std::collections::HashMap;
10
11/// Sentinel value for unreachable predecessors in phi nodes.
12/// Used when a branch ends with a tail call (which emits ret directly).
13pub(super) const UNREACHABLE_PREDECESSOR: &str = "unreachable";
14
15/// Maximum number of values to keep in virtual registers (Issue #189).
16/// Values beyond this are spilled to memory.
17///
18/// Tuned for common patterns:
19/// - Binary ops need 2 values (`a b i.+`)
20/// - Dup patterns need 3 values (`a dup i.* b i.+`)
21/// - Complex expressions may use 4 (`a b i.+ c d i.* i.-`)
22///
23/// Larger values increase register pressure with diminishing returns,
24/// as most operations trigger spills (control flow, function calls, etc.).
25pub(super) const MAX_VIRTUAL_STACK: usize = 4;
26
27/// Tracks whether a statement is in tail position.
28///
29/// A statement is in tail position when its result is directly returned
30/// from the function without further processing. For tail calls, we can
31/// use LLVM's `musttail` to guarantee tail call optimization.
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub(super) enum TailPosition {
34    /// This is the last operation before return - can use musttail
35    Tail,
36    /// More operations follow - use regular call
37    NonTail,
38}
39
40/// Result of generating code for an if-statement branch.
41pub(super) struct BranchResult {
42    /// The stack variable after executing the branch
43    pub stack_var: String,
44    /// Whether the branch emitted a tail call (and thus a ret)
45    pub emitted_tail_call: bool,
46    /// The predecessor block label for the phi node (or UNREACHABLE_PREDECESSOR)
47    pub predecessor: String,
48}
49
50/// Mangle a Seq word name into a valid LLVM IR identifier.
51///
52/// LLVM IR identifiers can contain: letters, digits, underscores, dollars, periods.
53/// Seq words can contain: letters, digits, hyphens, question marks, arrows, etc.
54///
55/// We escape special characters using underscore-based encoding:
56/// - `-` (hyphen) -> `_` (hyphens not valid in LLVM IR identifiers)
57/// - `?` -> `_Q_` (question)
58/// - `>` -> `_GT_` (greater than, for ->)
59/// - `<` -> `_LT_` (less than)
60/// - `!` -> `_BANG_`
61/// - `*` -> `_STAR_`
62/// - `/` -> `_SLASH_`
63/// - `+` -> `_PLUS_`
64/// - `=` -> `_EQ_`
65/// - `.` -> `_DOT_`
66pub(super) fn mangle_name(name: &str) -> String {
67    let mut result = String::new();
68    for c in name.chars() {
69        match c {
70            '?' => result.push_str("_Q_"),
71            '>' => result.push_str("_GT_"),
72            '<' => result.push_str("_LT_"),
73            '!' => result.push_str("_BANG_"),
74            '*' => result.push_str("_STAR_"),
75            '/' => result.push_str("_SLASH_"),
76            '+' => result.push_str("_PLUS_"),
77            '=' => result.push_str("_EQ_"),
78            // Hyphens converted to underscores (hyphens not valid in LLVM IR)
79            '-' => result.push('_'),
80            // Keep these as-is (valid in LLVM IR)
81            '_' | '.' | '$' => result.push(c),
82            // Alphanumeric kept as-is
83            c if c.is_alphanumeric() => result.push(c),
84            // Any other character gets hex-encoded
85            _ => result.push_str(&format!("_x{:02X}_", c as u32)),
86        }
87    }
88    result
89}
90
91/// Result of generating a quotation: wrapper and impl function names
92/// For closures, both names are the same (no TCO support yet)
93pub(super) struct QuotationFunctions {
94    /// C-convention wrapper function (for runtime calls)
95    pub wrapper: String,
96    /// tailcc implementation function (for TCO via musttail)
97    pub impl_: String,
98}
99
100/// A value held in an LLVM virtual register instead of memory (Issue #189).
101///
102/// This optimization keeps recently-pushed values in SSA variables,
103/// avoiding memory stores/loads for common patterns like `2 3 i.+`.
104/// Values are spilled to memory at control flow points and function calls.
105#[derive(Clone, Debug)]
106#[allow(dead_code)] // Float and Bool variants for Phase 2
107pub(super) enum VirtualValue {
108    /// Integer value in an SSA variable (i64)
109    Int {
110        ssa_var: String,
111        #[allow(dead_code)] // Used for constant folding in Phase 2
112        value: i64,
113    },
114    /// Float value in an SSA variable (double)
115    Float { ssa_var: String },
116    /// Boolean value in an SSA variable (i64: 0 or 1)
117    Bool { ssa_var: String },
118}
119
120#[allow(dead_code)] // ssa_var method used in spill_virtual_stack
121impl VirtualValue {
122    /// Get the SSA variable name
123    pub fn ssa_var(&self) -> &str {
124        match self {
125            VirtualValue::Int { ssa_var, .. } => ssa_var,
126            VirtualValue::Float { ssa_var } => ssa_var,
127            VirtualValue::Bool { ssa_var } => ssa_var,
128        }
129    }
130
131    /// Get the discriminant for this value type
132    pub fn discriminant(&self) -> i64 {
133        match self {
134            VirtualValue::Int { .. } => 0,
135            VirtualValue::Float { .. } => 1,
136            VirtualValue::Bool { .. } => 2,
137        }
138    }
139}
140
141pub struct CodeGen {
142    pub(super) output: String,
143    pub(super) string_globals: String,
144    pub(super) temp_counter: usize,
145    pub(super) string_counter: usize,
146    pub(super) block_counter: usize, // For generating unique block labels
147    pub(super) quot_counter: usize,  // For generating unique quotation function names
148    pub(super) string_constants: HashMap<String, String>, // string content -> global name
149    pub(super) quotation_functions: String, // Accumulates generated quotation functions
150    pub(super) type_map: HashMap<usize, Type>, // Maps quotation ID to inferred type (from typechecker)
151    pub(super) external_builtins: HashMap<String, String>, // seq_name -> symbol (for external builtins)
152    pub(super) inside_closure: bool, // Track if we're generating code inside a closure (disables TCO)
153    pub(super) inside_main: bool, // Track if we're generating code for main (uses C convention, no musttail)
154    pub(super) inside_quotation: bool, // Track if we're generating code for a quotation (uses C convention, no musttail)
155    pub(super) unions: Vec<UnionDef>,  // Union type definitions for pattern matching
156    pub(super) ffi_bindings: FfiBindings, // FFI function bindings
157    pub(super) ffi_wrapper_code: String, // Generated FFI wrapper functions
158    /// Pure inline test mode: bypasses scheduler, returns top of stack as exit code.
159    /// Used for testing pure integer programs without FFI dependencies.
160    pub(super) pure_inline_test: bool,
161    // Symbol interning for O(1) equality (Issue #166)
162    pub(super) symbol_globals: String, // LLVM IR for static symbol globals
163    pub(super) symbol_counter: usize,  // Counter for unique symbol names
164    pub(super) symbol_constants: HashMap<String, String>, // symbol name -> global name (deduplication)
165    /// Per-statement type info for optimization (Issue #186)
166    /// Maps (word_name, statement_index) -> top-of-stack type before statement
167    pub(super) statement_types: HashMap<(String, usize), Type>,
168    /// Current word being compiled (for statement type lookup)
169    pub(super) current_word_name: Option<String>,
170    /// Current statement index within the word (for statement type lookup)
171    pub(super) current_stmt_index: usize,
172    /// Nesting depth for type lookup - only depth 0 can use type info
173    /// Nested contexts (if/else, loops) increment this to disable lookups
174    pub(super) codegen_depth: usize,
175    /// True if the previous statement was a trivially-copyable literal (Issue #195)
176    /// Used to optimize `dup` after literal push (e.g., `42 dup`)
177    pub(super) prev_stmt_is_trivial_literal: bool,
178    /// If previous statement was IntLiteral, stores its value (Issue #192)
179    /// Used to optimize `roll`/`pick` with constant N (e.g., `2 roll` -> rot)
180    pub(super) prev_stmt_int_value: Option<i64>,
181    /// Virtual register stack for top N values (Issue #189)
182    /// Values here are in SSA variables, not yet written to memory.
183    /// The memory stack pointer tracks where memory ends; virtual values are "above" it.
184    pub(super) virtual_stack: Vec<VirtualValue>,
185}
186
187impl Default for CodeGen {
188    fn default() -> Self {
189        Self::new()
190    }
191}
192
193impl CodeGen {
194    pub fn new() -> Self {
195        CodeGen {
196            output: String::new(),
197            string_globals: String::new(),
198            temp_counter: 0,
199            string_counter: 0,
200            block_counter: 0,
201            inside_closure: false,
202            inside_main: false,
203            inside_quotation: false,
204            quot_counter: 0,
205            string_constants: HashMap::new(),
206            quotation_functions: String::new(),
207            type_map: HashMap::new(),
208            external_builtins: HashMap::new(),
209            unions: Vec::new(),
210            ffi_bindings: FfiBindings::new(),
211            ffi_wrapper_code: String::new(),
212            pure_inline_test: false,
213            symbol_globals: String::new(),
214            symbol_counter: 0,
215            symbol_constants: HashMap::new(),
216            statement_types: HashMap::new(),
217            current_word_name: None,
218            current_stmt_index: 0,
219            codegen_depth: 0,
220            prev_stmt_is_trivial_literal: false,
221            prev_stmt_int_value: None,
222            virtual_stack: Vec::new(),
223        }
224    }
225
226    /// Create a CodeGen for pure inline testing.
227    /// Bypasses the scheduler, returning top of stack as exit code.
228    /// Only supports operations that are fully inlined (integers, arithmetic, stack ops).
229    #[allow(dead_code)]
230    pub fn new_pure_inline_test() -> Self {
231        let mut cg = Self::new();
232        cg.pure_inline_test = true;
233        cg
234    }
235}