seqc/codegen/state.rs
1//! CodeGen State and Core Types
2//!
3//! This module contains the CodeGen struct definition and core types
4//! used across the code generation modules.
5
6use crate::ast::UnionDef;
7use crate::ffi::FfiBindings;
8use crate::types::Type;
9use std::collections::HashMap;
10use std::path::PathBuf;
11
12use super::specialization::SpecSignature;
13
14/// Sentinel value for unreachable predecessors in phi nodes.
15/// Used when a branch ends with a tail call (which emits ret directly).
16pub(super) const UNREACHABLE_PREDECESSOR: &str = "unreachable";
17
18/// Maximum number of values to keep in virtual registers (Issue #189).
19/// Values beyond this are spilled to memory.
20///
21/// Tuned for common patterns:
22/// - Binary ops need 2 values (`a b i.+`)
23/// - Dup patterns need 3 values (`a dup i.* b i.+`)
24/// - Complex expressions may use 4 (`a b i.+ c d i.* i.-`)
25///
26/// Larger values increase register pressure with diminishing returns,
27/// as most operations trigger spills (control flow, function calls, etc.).
28pub(super) const MAX_VIRTUAL_STACK: usize = 4;
29
30/// Tracks whether a statement is in tail position.
31///
32/// A statement is in tail position when its result is directly returned
33/// from the function without further processing. For tail calls, we can
34/// use LLVM's `musttail` to guarantee tail call optimization.
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub(super) enum TailPosition {
37 /// This is the last operation before return - can use musttail
38 Tail,
39 /// More operations follow - use regular call
40 NonTail,
41}
42
43/// Result of generating code for an if-statement branch.
44pub(super) struct BranchResult {
45 /// The stack variable after executing the branch
46 pub stack_var: String,
47 /// Whether the branch emitted a tail call (and thus a ret)
48 pub emitted_tail_call: bool,
49 /// The predecessor block label for the phi node (or UNREACHABLE_PREDECESSOR)
50 pub predecessor: String,
51}
52
53/// Mangle a Seq word name into a valid LLVM IR identifier.
54///
55/// LLVM IR identifiers can contain: letters, digits, underscores, dollars, periods.
56/// Seq words can contain: letters, digits, hyphens, question marks, arrows, etc.
57///
58/// We escape special characters using underscore-based encoding:
59/// - `-` (hyphen) -> `_` (hyphens not valid in LLVM IR identifiers)
60/// - `?` -> `_Q_` (question)
61/// - `>` -> `_GT_` (greater than, for ->)
62/// - `<` -> `_LT_` (less than)
63/// - `!` -> `_BANG_`
64/// - `*` -> `_STAR_`
65/// - `/` -> `_SLASH_`
66/// - `+` -> `_PLUS_`
67/// - `=` -> `_EQ_`
68/// - `.` -> `_DOT_`
69pub(super) fn mangle_name(name: &str) -> String {
70 let mut result = String::new();
71 for c in name.chars() {
72 match c {
73 '?' => result.push_str("_Q_"),
74 '>' => result.push_str("_GT_"),
75 '<' => result.push_str("_LT_"),
76 '!' => result.push_str("_BANG_"),
77 '*' => result.push_str("_STAR_"),
78 '/' => result.push_str("_SLASH_"),
79 '+' => result.push_str("_PLUS_"),
80 '=' => result.push_str("_EQ_"),
81 // Hyphens converted to underscores (hyphens not valid in LLVM IR)
82 '-' => result.push('_'),
83 // Keep these as-is (valid in LLVM IR)
84 '_' | '.' | '$' => result.push(c),
85 // Alphanumeric kept as-is
86 c if c.is_alphanumeric() => result.push(c),
87 // Any other character gets hex-encoded
88 _ => result.push_str(&format!("_x{:02X}_", c as u32)),
89 }
90 }
91 result
92}
93
94/// Result of generating a quotation: wrapper and impl function names
95/// For closures, both names are the same (no TCO support yet)
96pub(super) struct QuotationFunctions {
97 /// C-convention wrapper function (for runtime calls)
98 pub wrapper: String,
99 /// tailcc implementation function (for TCO via musttail)
100 pub impl_: String,
101}
102
103/// Snapshot of the enclosing function's mutable codegen state while a nested
104/// quotation or closure is being generated. Returned by
105/// `enter_quotation_scope` and consumed by `exit_quotation_scope`, which
106/// commits the nested IR to `quotation_functions` and restores these fields.
107pub(super) struct QuotationScope {
108 pub output: String,
109 pub virtual_stack: Vec<VirtualValue>,
110 pub word_name: Option<String>,
111 pub aux_slots: Vec<String>,
112 pub aux_sp: usize,
113 /// Snapshot of the enclosing word's DISubprogram. Cleared while a
114 /// quotation body is being emitted (the quotation lives in its own
115 /// LLVM function with no subprogram, so any `!dbg` attached inside
116 /// would be unverifiable), and restored when the scope exits.
117 pub dbg_subprogram_id: Option<usize>,
118}
119
120/// A value held in an LLVM virtual register instead of memory (Issue #189).
121///
122/// This optimization keeps recently-pushed values in SSA variables,
123/// avoiding memory stores/loads for common patterns like `2 3 i.+`.
124/// Values are spilled to memory at control flow points and function calls.
125#[derive(Clone, Debug)]
126pub(super) enum VirtualValue {
127 /// Integer value in an SSA variable (i64)
128 Int {
129 ssa_var: String,
130 #[allow(dead_code)] // Used for constant folding in Phase 2
131 value: i64,
132 },
133 /// Float value in an SSA variable (double)
134 Float { ssa_var: String },
135 /// Boolean value in an SSA variable (i64: 0 or 1)
136 Bool { ssa_var: String },
137}
138
139pub struct CodeGen {
140 pub(super) output: String,
141 pub(super) string_globals: String,
142 pub(super) temp_counter: usize,
143 pub(super) string_counter: usize,
144 pub(super) block_counter: usize, // For generating unique block labels
145 pub(super) quot_counter: usize, // For generating unique quotation function names
146 pub(super) string_constants: HashMap<String, String>, // string content -> global name
147 pub(super) quotation_functions: String, // Accumulates generated quotation functions
148 pub(super) type_map: HashMap<usize, Type>, // Maps quotation ID to inferred type (from typechecker)
149 pub(super) external_builtins: HashMap<String, String>, // seq_name -> symbol (for external builtins)
150 pub(super) inside_closure: bool, // Track if we're generating code inside a closure (disables TCO)
151 pub(super) inside_main: bool, // Track if we're generating code for main (uses C convention, no musttail)
152 pub(super) inside_quotation: bool, // Track if we're generating code for a quotation (uses C convention, no musttail)
153 pub(super) unions: Vec<UnionDef>, // Union type definitions for pattern matching
154 pub(super) ffi_bindings: FfiBindings, // FFI function bindings
155 pub(super) ffi_wrapper_code: String, // Generated FFI wrapper functions
156 /// Pure inline test mode: bypasses scheduler, returns top of stack as exit code.
157 /// Used for testing pure integer programs without FFI dependencies.
158 pub(super) pure_inline_test: bool,
159 // Symbol interning for O(1) equality (Issue #166)
160 pub(super) symbol_globals: String, // LLVM IR for static symbol globals
161 pub(super) symbol_counter: usize, // Counter for unique symbol names
162 pub(super) symbol_constants: HashMap<String, String>, // symbol name -> global name (deduplication)
163 /// Per-statement type info for optimization (Issue #186)
164 /// Maps (word_name, statement_index) -> top-of-stack type before statement
165 pub(super) statement_types: HashMap<(String, usize), Type>,
166 /// Resolved arithmetic sugar: maps (line, column) -> concrete op name
167 /// E.g., `+` at line 5, column 3 -> `"i.+"` if typechecker resolved it for Int operands
168 pub(super) resolved_sugar: HashMap<(usize, usize), String>,
169 /// Current word being compiled (for statement type lookup)
170 pub(super) current_word_name: Option<String>,
171 /// Current statement index within the word (for statement type lookup)
172 pub(super) current_stmt_index: usize,
173 /// Nesting depth for type lookup - only depth 0 can use type info
174 /// Nested contexts (if/else, loops) increment this to disable lookups
175 pub(super) codegen_depth: usize,
176 /// True if the previous statement was a trivially-copyable literal (Issue #195)
177 /// Used to optimize `dup` after literal push (e.g., `42 dup`)
178 pub(super) prev_stmt_is_trivial_literal: bool,
179 /// If previous statement was IntLiteral, stores its value (Issue #192)
180 /// Used to optimize `roll`/`pick` with constant N (e.g., `2 roll` -> rot)
181 pub(super) prev_stmt_int_value: Option<i64>,
182 /// Virtual register stack for top N values (Issue #189)
183 /// Values here are in SSA variables, not yet written to memory.
184 /// The memory stack pointer tracks where memory ends; virtual values are "above" it.
185 pub(super) virtual_stack: Vec<VirtualValue>,
186 /// Specialized word signatures for register-based codegen
187 /// Maps word name -> specialized signature
188 pub(super) specialized_words: HashMap<String, SpecSignature>,
189 /// Per-word aux stack slot counts from typechecker (Issue #350)
190 /// Maps word_name -> number of %Value allocas needed
191 pub(super) aux_slot_counts: HashMap<String, usize>,
192 /// Per-quotation aux stack slot counts from typechecker (Issue #393)
193 /// Maps quotation_id -> number of %Value allocas needed for that quotation
194 pub(super) quotation_aux_slot_counts: HashMap<usize, usize>,
195 /// LLVM alloca names for current word's aux slots (Issue #350)
196 pub(super) current_aux_slots: Vec<String>,
197 /// Compile-time index into aux slots (Issue #350)
198 pub(super) current_aux_sp: usize,
199 /// Whether to emit per-word atomic call counters (--instrument)
200 pub(super) instrument: bool,
201 /// True if the user's `main` word has effect `( -- Int )`.
202 /// Determines whether `seq_main` writes the top-of-stack int to the
203 /// global exit code before freeing the stack. (Issue #355)
204 pub(super) main_returns_int: bool,
205 /// Maps word name -> sequential ID for instrumentation counters
206 pub(super) word_instrument_ids: HashMap<String, usize>,
207 // -------------------------------------------------------------------
208 // Debug info (DWARF) — see codegen/debug_info.rs.
209 //
210 // When enabled, emits LLVM `!DICompileUnit`, `!DIFile`, `!DISubprogram`,
211 // and per-call `!DILocation` metadata so panics in the runtime resolve
212 // back to .seq source lines via the standard Rust backtrace path.
213 // Zero runtime cost — pure metadata. The clang invocation must pass
214 // `-g` to preserve these into the final binary's DWARF section.
215 // -------------------------------------------------------------------
216 /// Source file the program was compiled from (for DIFile). When
217 /// `None`, debug info is disabled.
218 pub(super) dbg_source: Option<PathBuf>,
219 /// Accumulated DWARF metadata definitions (`!N = !DI...`). Appended to
220 /// the end of the IR file alongside the module flags.
221 pub(super) dbg_metadata: String,
222 /// Counter for unique metadata IDs. Started at 1000 to leave headroom
223 /// for any future module-level metadata that may want lower ids.
224 pub(super) dbg_md_counter: usize,
225 /// ID of the per-program `!DICompileUnit`. Set during program prologue
226 /// when debug info is enabled.
227 pub(super) dbg_cu_id: Option<usize>,
228 /// ID of the shared `!DIFile` for the source file.
229 pub(super) dbg_file_id: Option<usize>,
230 /// ID of the shared `!DISubroutineType` reused by every subprogram —
231 /// our generated functions all have the same opaque ptr-in/ptr-out
232 /// signature from a debugger's point of view.
233 pub(super) dbg_subroutine_type_id: Option<usize>,
234 /// `!DISubprogram` ID for the function currently being emitted, if any.
235 /// Call sites use this as the scope for their `!DILocation` records.
236 pub(super) current_dbg_subprogram_id: Option<usize>,
237 /// IDs of the two `!llvm.module.flags` records ("Dwarf Version",
238 /// "Debug Info Version"). Allocated through `dbg_alloc_id` at program
239 /// init so they never collide with later subprogram/location records.
240 pub(super) dbg_module_flag_ids: Option<(usize, usize)>,
241}
242
243impl Default for CodeGen {
244 fn default() -> Self {
245 Self::new()
246 }
247}
248
249impl CodeGen {
250 pub fn new() -> Self {
251 CodeGen {
252 output: String::new(),
253 string_globals: String::new(),
254 temp_counter: 0,
255 string_counter: 0,
256 block_counter: 0,
257 inside_closure: false,
258 inside_main: false,
259 inside_quotation: false,
260 quot_counter: 0,
261 string_constants: HashMap::new(),
262 quotation_functions: String::new(),
263 type_map: HashMap::new(),
264 external_builtins: HashMap::new(),
265 unions: Vec::new(),
266 ffi_bindings: FfiBindings::new(),
267 ffi_wrapper_code: String::new(),
268 pure_inline_test: false,
269 symbol_globals: String::new(),
270 symbol_counter: 0,
271 symbol_constants: HashMap::new(),
272 statement_types: HashMap::new(),
273 resolved_sugar: HashMap::new(),
274 current_word_name: None,
275 current_stmt_index: 0,
276 codegen_depth: 0,
277 prev_stmt_is_trivial_literal: false,
278 prev_stmt_int_value: None,
279 virtual_stack: Vec::new(),
280 specialized_words: HashMap::new(),
281 aux_slot_counts: HashMap::new(),
282 quotation_aux_slot_counts: HashMap::new(),
283 current_aux_slots: Vec::new(),
284 current_aux_sp: 0,
285 instrument: false,
286 word_instrument_ids: HashMap::new(),
287 main_returns_int: false,
288 dbg_source: None,
289 dbg_metadata: String::new(),
290 dbg_md_counter: 1000,
291 dbg_cu_id: None,
292 dbg_file_id: None,
293 dbg_subroutine_type_id: None,
294 current_dbg_subprogram_id: None,
295 dbg_module_flag_ids: None,
296 }
297 }
298
299 /// Enable DWARF debug info generation, anchored at the given source file.
300 ///
301 /// Must be called before `codegen_program*`. With debug info enabled,
302 /// every user-defined word gets a `!DISubprogram` and every call site
303 /// with a span gets a `!DILocation` — so a runtime panic backtrace
304 /// resolves the Seq frame to `.seq:line:col`. Zero runtime overhead.
305 pub fn set_source_file(&mut self, path: PathBuf) {
306 self.dbg_source = Some(path);
307 }
308
309 /// Create a CodeGen for pure inline testing.
310 /// Bypasses the scheduler, returning top of stack as exit code.
311 /// Only supports operations that are fully inlined (integers, arithmetic, stack ops).
312 pub fn new_pure_inline_test() -> Self {
313 let mut cg = Self::new();
314 cg.pure_inline_test = true;
315 cg
316 }
317
318 /// Set per-word aux stack slot counts from typechecker (Issue #350)
319 pub fn set_aux_slot_counts(&mut self, counts: HashMap<String, usize>) {
320 self.aux_slot_counts = counts;
321 }
322
323 /// Set per-quotation aux stack slot counts from typechecker (Issue #393)
324 pub fn set_quotation_aux_slot_counts(&mut self, counts: HashMap<usize, usize>) {
325 self.quotation_aux_slot_counts = counts;
326 }
327
328 /// Set resolved arithmetic sugar mappings from the typechecker
329 pub fn set_resolved_sugar(&mut self, sugar: HashMap<(usize, usize), String>) {
330 self.resolved_sugar = sugar;
331 }
332
333 /// Look up the resolved name for an arithmetic sugar op by source location
334 pub(super) fn resolve_sugar_at(&self, line: usize, column: usize) -> Option<&str> {
335 self.resolved_sugar.get(&(line, column)).map(|s| s.as_str())
336 }
337}