Skip to main content

seqc/codegen/
state.rs

1//! CodeGen State and Core Types
2//!
3//! This module contains the CodeGen struct definition and core types
4//! used across the code generation modules.
5
6use crate::ast::UnionDef;
7use crate::ffi::FfiBindings;
8use crate::types::Type;
9use std::collections::HashMap;
10use std::path::PathBuf;
11
12use super::specialization::SpecSignature;
13
14/// Sentinel value for unreachable predecessors in phi nodes.
15/// Used when a branch ends with a tail call (which emits ret directly).
16pub(super) const UNREACHABLE_PREDECESSOR: &str = "unreachable";
17
18/// Maximum number of values to keep in virtual registers (Issue #189).
19/// Values beyond this are spilled to memory.
20///
21/// Tuned for common patterns:
22/// - Binary ops need 2 values (`a b i.+`)
23/// - Dup patterns need 3 values (`a dup i.* b i.+`)
24/// - Complex expressions may use 4 (`a b i.+ c d i.* i.-`)
25///
26/// Larger values increase register pressure with diminishing returns,
27/// as most operations trigger spills (control flow, function calls, etc.).
28pub(super) const MAX_VIRTUAL_STACK: usize = 4;
29
30/// Tracks whether a statement is in tail position.
31///
32/// A statement is in tail position when its result is directly returned
33/// from the function without further processing. For tail calls, we can
34/// use LLVM's `musttail` to guarantee tail call optimization.
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub(super) enum TailPosition {
37    /// This is the last operation before return - can use musttail
38    Tail,
39    /// More operations follow - use regular call
40    NonTail,
41}
42
43/// Result of generating code for an if-statement branch.
44pub(super) struct BranchResult {
45    /// The stack variable after executing the branch
46    pub stack_var: String,
47    /// Whether the branch emitted a tail call (and thus a ret)
48    pub emitted_tail_call: bool,
49    /// The predecessor block label for the phi node (or UNREACHABLE_PREDECESSOR)
50    pub predecessor: String,
51}
52
53/// Mangle a Seq word name into a valid LLVM IR identifier.
54///
55/// LLVM IR identifiers can contain: letters, digits, underscores, dollars, periods.
56/// Seq words can contain: letters, digits, hyphens, question marks, arrows, etc.
57///
58/// We escape special characters using underscore-based encoding:
59/// - `-` (hyphen) -> `_` (hyphens not valid in LLVM IR identifiers)
60/// - `?` -> `_Q_` (question)
61/// - `>` -> `_GT_` (greater than, for ->)
62/// - `<` -> `_LT_` (less than)
63/// - `!` -> `_BANG_`
64/// - `*` -> `_STAR_`
65/// - `/` -> `_SLASH_`
66/// - `+` -> `_PLUS_`
67/// - `=` -> `_EQ_`
68/// - `.` -> `_DOT_`
69pub(super) fn mangle_name(name: &str) -> String {
70    let mut result = String::new();
71    for c in name.chars() {
72        match c {
73            '?' => result.push_str("_Q_"),
74            '>' => result.push_str("_GT_"),
75            '<' => result.push_str("_LT_"),
76            '!' => result.push_str("_BANG_"),
77            '*' => result.push_str("_STAR_"),
78            '/' => result.push_str("_SLASH_"),
79            '+' => result.push_str("_PLUS_"),
80            '=' => result.push_str("_EQ_"),
81            // Hyphens converted to underscores (hyphens not valid in LLVM IR)
82            '-' => result.push('_'),
83            // Keep these as-is (valid in LLVM IR)
84            '_' | '.' | '$' => result.push(c),
85            // Alphanumeric kept as-is
86            c if c.is_alphanumeric() => result.push(c),
87            // Any other character gets hex-encoded
88            _ => result.push_str(&format!("_x{:02X}_", c as u32)),
89        }
90    }
91    result
92}
93
94/// Result of generating a quotation: wrapper and impl function names
95/// For closures, both names are the same (no TCO support yet)
96pub(super) struct QuotationFunctions {
97    /// C-convention wrapper function (for runtime calls)
98    pub wrapper: String,
99    /// tailcc implementation function (for TCO via musttail)
100    pub impl_: String,
101}
102
103/// Snapshot of the enclosing function's mutable codegen state while a nested
104/// quotation or closure is being generated. Returned by
105/// `enter_quotation_scope` and consumed by `exit_quotation_scope`, which
106/// commits the nested IR to `quotation_functions` and restores these fields.
107pub(super) struct QuotationScope {
108    pub output: String,
109    pub virtual_stack: Vec<VirtualValue>,
110    pub word_name: Option<String>,
111    pub aux_slots: Vec<String>,
112    pub aux_sp: usize,
113    /// Snapshot of the enclosing word's DISubprogram. Cleared while a
114    /// quotation body is being emitted (the quotation lives in its own
115    /// LLVM function with no subprogram, so any `!dbg` attached inside
116    /// would be unverifiable), and restored when the scope exits.
117    pub dbg_subprogram_id: Option<usize>,
118}
119
120/// A value held in an LLVM virtual register instead of memory (Issue #189).
121///
122/// This optimization keeps recently-pushed values in SSA variables,
123/// avoiding memory stores/loads for common patterns like `2 3 i.+`.
124/// Values are spilled to memory at control flow points and function calls.
125#[derive(Clone, Debug)]
126pub(super) enum VirtualValue {
127    /// Integer value in an SSA variable (i64)
128    Int {
129        ssa_var: String,
130        #[allow(dead_code)] // Used for constant folding in Phase 2
131        value: i64,
132    },
133    /// Float value in an SSA variable (double)
134    Float { ssa_var: String },
135    /// Boolean value in an SSA variable (i64: 0 or 1)
136    Bool { ssa_var: String },
137}
138
139pub struct CodeGen {
140    pub(super) output: String,
141    pub(super) string_globals: String,
142    pub(super) temp_counter: usize,
143    pub(super) string_counter: usize,
144    pub(super) block_counter: usize, // For generating unique block labels
145    pub(super) quot_counter: usize,  // For generating unique quotation function names
146    pub(super) string_constants: HashMap<String, String>, // string content -> global name
147    pub(super) quotation_functions: String, // Accumulates generated quotation functions
148    pub(super) type_map: HashMap<usize, Type>, // Maps quotation ID to inferred type (from typechecker)
149    pub(super) external_builtins: HashMap<String, String>, // seq_name -> symbol (for external builtins)
150    pub(super) inside_closure: bool, // Track if we're generating code inside a closure (disables TCO)
151    pub(super) inside_main: bool, // Track if we're generating code for main (uses C convention, no musttail)
152    pub(super) inside_quotation: bool, // Track if we're generating code for a quotation (uses C convention, no musttail)
153    pub(super) unions: Vec<UnionDef>,  // Union type definitions for pattern matching
154    pub(super) ffi_bindings: FfiBindings, // FFI function bindings
155    pub(super) ffi_wrapper_code: String, // Generated FFI wrapper functions
156    /// Pure inline test mode: bypasses scheduler, returns top of stack as exit code.
157    /// Used for testing pure integer programs without FFI dependencies.
158    pub(super) pure_inline_test: bool,
159    // Symbol interning for O(1) equality (Issue #166)
160    pub(super) symbol_globals: String, // LLVM IR for static symbol globals
161    pub(super) symbol_counter: usize,  // Counter for unique symbol names
162    pub(super) symbol_constants: HashMap<String, String>, // symbol name -> global name (deduplication)
163    /// Per-statement type info for optimization (Issue #186)
164    /// Maps (word_name, statement_index) -> top-of-stack type before statement
165    pub(super) statement_types: HashMap<(String, usize), Type>,
166    /// Resolved arithmetic sugar: maps (line, column) -> concrete op name
167    /// E.g., `+` at line 5, column 3 -> `"i.+"` if typechecker resolved it for Int operands
168    pub(super) resolved_sugar: HashMap<(usize, usize), String>,
169    /// Current word being compiled (for statement type lookup)
170    pub(super) current_word_name: Option<String>,
171    /// Current statement index within the word (for statement type lookup)
172    pub(super) current_stmt_index: usize,
173    /// Nesting depth for type lookup - only depth 0 can use type info
174    /// Nested contexts (if/else, loops) increment this to disable lookups
175    pub(super) codegen_depth: usize,
176    /// True if the previous statement was a trivially-copyable literal (Issue #195)
177    /// Used to optimize `dup` after literal push (e.g., `42 dup`)
178    pub(super) prev_stmt_is_trivial_literal: bool,
179    /// If previous statement was IntLiteral, stores its value (Issue #192)
180    /// Used to optimize `roll`/`pick` with constant N (e.g., `2 roll` -> rot)
181    pub(super) prev_stmt_int_value: Option<i64>,
182    /// Virtual register stack for top N values (Issue #189)
183    /// Values here are in SSA variables, not yet written to memory.
184    /// The memory stack pointer tracks where memory ends; virtual values are "above" it.
185    pub(super) virtual_stack: Vec<VirtualValue>,
186    /// Specialized word signatures for register-based codegen
187    /// Maps word name -> specialized signature
188    pub(super) specialized_words: HashMap<String, SpecSignature>,
189    /// Per-word aux stack slot counts from typechecker (Issue #350)
190    /// Maps word_name -> number of %Value allocas needed
191    pub(super) aux_slot_counts: HashMap<String, usize>,
192    /// Per-quotation aux stack slot counts from typechecker (Issue #393)
193    /// Maps quotation_id -> number of %Value allocas needed for that quotation
194    pub(super) quotation_aux_slot_counts: HashMap<usize, usize>,
195    /// LLVM alloca names for current word's aux slots (Issue #350)
196    pub(super) current_aux_slots: Vec<String>,
197    /// Compile-time index into aux slots (Issue #350)
198    pub(super) current_aux_sp: usize,
199    /// Whether to emit per-word atomic call counters (--instrument)
200    pub(super) instrument: bool,
201    /// True if the user's `main` word has effect `( -- Int )`.
202    /// Determines whether `seq_main` writes the top-of-stack int to the
203    /// global exit code before freeing the stack. (Issue #355)
204    pub(super) main_returns_int: bool,
205    /// Maps word name -> sequential ID for instrumentation counters
206    pub(super) word_instrument_ids: HashMap<String, usize>,
207    // -------------------------------------------------------------------
208    // Debug info (DWARF) — see codegen/debug_info.rs.
209    //
210    // When enabled, emits LLVM `!DICompileUnit`, `!DIFile`, `!DISubprogram`,
211    // and per-call `!DILocation` metadata so panics in the runtime resolve
212    // back to .seq source lines via the standard Rust backtrace path.
213    // Zero runtime cost — pure metadata. The clang invocation must pass
214    // `-g` to preserve these into the final binary's DWARF section.
215    // -------------------------------------------------------------------
216    /// Source file the program was compiled from (for DIFile). When
217    /// `None`, debug info is disabled.
218    pub(super) dbg_source: Option<PathBuf>,
219    /// Accumulated DWARF metadata definitions (`!N = !DI...`). Appended to
220    /// the end of the IR file alongside the module flags.
221    pub(super) dbg_metadata: String,
222    /// Counter for unique metadata IDs. Started at 1000 to leave headroom
223    /// for any future module-level metadata that may want lower ids.
224    pub(super) dbg_md_counter: usize,
225    /// ID of the per-program `!DICompileUnit`. Set during program prologue
226    /// when debug info is enabled.
227    pub(super) dbg_cu_id: Option<usize>,
228    /// ID of the shared `!DIFile` for the source file.
229    pub(super) dbg_file_id: Option<usize>,
230    /// ID of the shared `!DISubroutineType` reused by every subprogram —
231    /// our generated functions all have the same opaque ptr-in/ptr-out
232    /// signature from a debugger's point of view.
233    pub(super) dbg_subroutine_type_id: Option<usize>,
234    /// `!DISubprogram` ID for the function currently being emitted, if any.
235    /// Call sites use this as the scope for their `!DILocation` records.
236    pub(super) current_dbg_subprogram_id: Option<usize>,
237    /// IDs of the two `!llvm.module.flags` records ("Dwarf Version",
238    /// "Debug Info Version"). Allocated through `dbg_alloc_id` at program
239    /// init so they never collide with later subprogram/location records.
240    pub(super) dbg_module_flag_ids: Option<(usize, usize)>,
241}
242
243impl Default for CodeGen {
244    fn default() -> Self {
245        Self::new()
246    }
247}
248
249impl CodeGen {
250    pub fn new() -> Self {
251        CodeGen {
252            output: String::new(),
253            string_globals: String::new(),
254            temp_counter: 0,
255            string_counter: 0,
256            block_counter: 0,
257            inside_closure: false,
258            inside_main: false,
259            inside_quotation: false,
260            quot_counter: 0,
261            string_constants: HashMap::new(),
262            quotation_functions: String::new(),
263            type_map: HashMap::new(),
264            external_builtins: HashMap::new(),
265            unions: Vec::new(),
266            ffi_bindings: FfiBindings::new(),
267            ffi_wrapper_code: String::new(),
268            pure_inline_test: false,
269            symbol_globals: String::new(),
270            symbol_counter: 0,
271            symbol_constants: HashMap::new(),
272            statement_types: HashMap::new(),
273            resolved_sugar: HashMap::new(),
274            current_word_name: None,
275            current_stmt_index: 0,
276            codegen_depth: 0,
277            prev_stmt_is_trivial_literal: false,
278            prev_stmt_int_value: None,
279            virtual_stack: Vec::new(),
280            specialized_words: HashMap::new(),
281            aux_slot_counts: HashMap::new(),
282            quotation_aux_slot_counts: HashMap::new(),
283            current_aux_slots: Vec::new(),
284            current_aux_sp: 0,
285            instrument: false,
286            word_instrument_ids: HashMap::new(),
287            main_returns_int: false,
288            dbg_source: None,
289            dbg_metadata: String::new(),
290            dbg_md_counter: 1000,
291            dbg_cu_id: None,
292            dbg_file_id: None,
293            dbg_subroutine_type_id: None,
294            current_dbg_subprogram_id: None,
295            dbg_module_flag_ids: None,
296        }
297    }
298
299    /// Enable DWARF debug info generation, anchored at the given source file.
300    ///
301    /// Must be called before `codegen_program*`. With debug info enabled,
302    /// every user-defined word gets a `!DISubprogram` and every call site
303    /// with a span gets a `!DILocation` — so a runtime panic backtrace
304    /// resolves the Seq frame to `.seq:line:col`. Zero runtime overhead.
305    pub fn set_source_file(&mut self, path: PathBuf) {
306        self.dbg_source = Some(path);
307    }
308
309    /// Create a CodeGen for pure inline testing.
310    /// Bypasses the scheduler, returning top of stack as exit code.
311    /// Only supports operations that are fully inlined (integers, arithmetic, stack ops).
312    pub fn new_pure_inline_test() -> Self {
313        let mut cg = Self::new();
314        cg.pure_inline_test = true;
315        cg
316    }
317
318    /// Set per-word aux stack slot counts from typechecker (Issue #350)
319    pub fn set_aux_slot_counts(&mut self, counts: HashMap<String, usize>) {
320        self.aux_slot_counts = counts;
321    }
322
323    /// Set per-quotation aux stack slot counts from typechecker (Issue #393)
324    pub fn set_quotation_aux_slot_counts(&mut self, counts: HashMap<usize, usize>) {
325        self.quotation_aux_slot_counts = counts;
326    }
327
328    /// Set resolved arithmetic sugar mappings from the typechecker
329    pub fn set_resolved_sugar(&mut self, sugar: HashMap<(usize, usize), String>) {
330        self.resolved_sugar = sugar;
331    }
332
333    /// Look up the resolved name for an arithmetic sugar op by source location
334    pub(super) fn resolve_sugar_at(&self, line: usize, column: usize) -> Option<&str> {
335        self.resolved_sugar.get(&(line, column)).map(|s| s.as_str())
336    }
337}