seqc/codegen/state.rs
1//! CodeGen State and Core Types
2//!
3//! This module contains the CodeGen struct definition and core types
4//! used across the code generation modules.
5
6use crate::ast::UnionDef;
7use crate::ffi::FfiBindings;
8use crate::types::Type;
9use std::collections::HashMap;
10
11use super::specialization::SpecSignature;
12
13/// Sentinel value for unreachable predecessors in phi nodes.
14/// Used when a branch ends with a tail call (which emits ret directly).
15pub(super) const UNREACHABLE_PREDECESSOR: &str = "unreachable";
16
17/// Maximum number of values to keep in virtual registers (Issue #189).
18/// Values beyond this are spilled to memory.
19///
20/// Tuned for common patterns:
21/// - Binary ops need 2 values (`a b i.+`)
22/// - Dup patterns need 3 values (`a dup i.* b i.+`)
23/// - Complex expressions may use 4 (`a b i.+ c d i.* i.-`)
24///
25/// Larger values increase register pressure with diminishing returns,
26/// as most operations trigger spills (control flow, function calls, etc.).
27pub(super) const MAX_VIRTUAL_STACK: usize = 4;
28
29/// Tracks whether a statement is in tail position.
30///
31/// A statement is in tail position when its result is directly returned
32/// from the function without further processing. For tail calls, we can
33/// use LLVM's `musttail` to guarantee tail call optimization.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub(super) enum TailPosition {
36 /// This is the last operation before return - can use musttail
37 Tail,
38 /// More operations follow - use regular call
39 NonTail,
40}
41
42/// Result of generating code for an if-statement branch.
43pub(super) struct BranchResult {
44 /// The stack variable after executing the branch
45 pub stack_var: String,
46 /// Whether the branch emitted a tail call (and thus a ret)
47 pub emitted_tail_call: bool,
48 /// The predecessor block label for the phi node (or UNREACHABLE_PREDECESSOR)
49 pub predecessor: String,
50}
51
52/// Mangle a Seq word name into a valid LLVM IR identifier.
53///
54/// LLVM IR identifiers can contain: letters, digits, underscores, dollars, periods.
55/// Seq words can contain: letters, digits, hyphens, question marks, arrows, etc.
56///
57/// We escape special characters using underscore-based encoding:
58/// - `-` (hyphen) -> `_` (hyphens not valid in LLVM IR identifiers)
59/// - `?` -> `_Q_` (question)
60/// - `>` -> `_GT_` (greater than, for ->)
61/// - `<` -> `_LT_` (less than)
62/// - `!` -> `_BANG_`
63/// - `*` -> `_STAR_`
64/// - `/` -> `_SLASH_`
65/// - `+` -> `_PLUS_`
66/// - `=` -> `_EQ_`
67/// - `.` -> `_DOT_`
68pub(super) fn mangle_name(name: &str) -> String {
69 let mut result = String::new();
70 for c in name.chars() {
71 match c {
72 '?' => result.push_str("_Q_"),
73 '>' => result.push_str("_GT_"),
74 '<' => result.push_str("_LT_"),
75 '!' => result.push_str("_BANG_"),
76 '*' => result.push_str("_STAR_"),
77 '/' => result.push_str("_SLASH_"),
78 '+' => result.push_str("_PLUS_"),
79 '=' => result.push_str("_EQ_"),
80 // Hyphens converted to underscores (hyphens not valid in LLVM IR)
81 '-' => result.push('_'),
82 // Keep these as-is (valid in LLVM IR)
83 '_' | '.' | '$' => result.push(c),
84 // Alphanumeric kept as-is
85 c if c.is_alphanumeric() => result.push(c),
86 // Any other character gets hex-encoded
87 _ => result.push_str(&format!("_x{:02X}_", c as u32)),
88 }
89 }
90 result
91}
92
93/// Result of generating a quotation: wrapper and impl function names
94/// For closures, both names are the same (no TCO support yet)
95pub(super) struct QuotationFunctions {
96 /// C-convention wrapper function (for runtime calls)
97 pub wrapper: String,
98 /// tailcc implementation function (for TCO via musttail)
99 pub impl_: String,
100}
101
102/// A value held in an LLVM virtual register instead of memory (Issue #189).
103///
104/// This optimization keeps recently-pushed values in SSA variables,
105/// avoiding memory stores/loads for common patterns like `2 3 i.+`.
106/// Values are spilled to memory at control flow points and function calls.
107#[derive(Clone, Debug)]
108#[allow(dead_code)] // Float and Bool variants for Phase 2
109pub(super) enum VirtualValue {
110 /// Integer value in an SSA variable (i64)
111 Int {
112 ssa_var: String,
113 #[allow(dead_code)] // Used for constant folding in Phase 2
114 value: i64,
115 },
116 /// Float value in an SSA variable (double)
117 Float { ssa_var: String },
118 /// Boolean value in an SSA variable (i64: 0 or 1)
119 Bool { ssa_var: String },
120}
121
122#[allow(dead_code)] // ssa_var method used in spill_virtual_stack
123impl VirtualValue {
124 /// Get the SSA variable name
125 pub fn ssa_var(&self) -> &str {
126 match self {
127 VirtualValue::Int { ssa_var, .. } => ssa_var,
128 VirtualValue::Float { ssa_var } => ssa_var,
129 VirtualValue::Bool { ssa_var } => ssa_var,
130 }
131 }
132
133 /// Get the discriminant for this value type
134 pub fn discriminant(&self) -> i64 {
135 match self {
136 VirtualValue::Int { .. } => 0,
137 VirtualValue::Float { .. } => 1,
138 VirtualValue::Bool { .. } => 2,
139 }
140 }
141}
142
143pub struct CodeGen {
144 pub(super) output: String,
145 pub(super) string_globals: String,
146 pub(super) temp_counter: usize,
147 pub(super) string_counter: usize,
148 pub(super) block_counter: usize, // For generating unique block labels
149 pub(super) quot_counter: usize, // For generating unique quotation function names
150 pub(super) string_constants: HashMap<String, String>, // string content -> global name
151 pub(super) quotation_functions: String, // Accumulates generated quotation functions
152 pub(super) type_map: HashMap<usize, Type>, // Maps quotation ID to inferred type (from typechecker)
153 pub(super) external_builtins: HashMap<String, String>, // seq_name -> symbol (for external builtins)
154 pub(super) inside_closure: bool, // Track if we're generating code inside a closure (disables TCO)
155 pub(super) inside_main: bool, // Track if we're generating code for main (uses C convention, no musttail)
156 pub(super) inside_quotation: bool, // Track if we're generating code for a quotation (uses C convention, no musttail)
157 pub(super) unions: Vec<UnionDef>, // Union type definitions for pattern matching
158 pub(super) ffi_bindings: FfiBindings, // FFI function bindings
159 pub(super) ffi_wrapper_code: String, // Generated FFI wrapper functions
160 /// Pure inline test mode: bypasses scheduler, returns top of stack as exit code.
161 /// Used for testing pure integer programs without FFI dependencies.
162 pub(super) pure_inline_test: bool,
163 // Symbol interning for O(1) equality (Issue #166)
164 pub(super) symbol_globals: String, // LLVM IR for static symbol globals
165 pub(super) symbol_counter: usize, // Counter for unique symbol names
166 pub(super) symbol_constants: HashMap<String, String>, // symbol name -> global name (deduplication)
167 /// Per-statement type info for optimization (Issue #186)
168 /// Maps (word_name, statement_index) -> top-of-stack type before statement
169 pub(super) statement_types: HashMap<(String, usize), Type>,
170 /// Current word being compiled (for statement type lookup)
171 pub(super) current_word_name: Option<String>,
172 /// Current statement index within the word (for statement type lookup)
173 pub(super) current_stmt_index: usize,
174 /// Nesting depth for type lookup - only depth 0 can use type info
175 /// Nested contexts (if/else, loops) increment this to disable lookups
176 pub(super) codegen_depth: usize,
177 /// True if the previous statement was a trivially-copyable literal (Issue #195)
178 /// Used to optimize `dup` after literal push (e.g., `42 dup`)
179 pub(super) prev_stmt_is_trivial_literal: bool,
180 /// If previous statement was IntLiteral, stores its value (Issue #192)
181 /// Used to optimize `roll`/`pick` with constant N (e.g., `2 roll` -> rot)
182 pub(super) prev_stmt_int_value: Option<i64>,
183 /// Virtual register stack for top N values (Issue #189)
184 /// Values here are in SSA variables, not yet written to memory.
185 /// The memory stack pointer tracks where memory ends; virtual values are "above" it.
186 pub(super) virtual_stack: Vec<VirtualValue>,
187 /// Specialized word signatures for register-based codegen
188 /// Maps word name -> specialized signature
189 pub(super) specialized_words: HashMap<String, SpecSignature>,
190}
191
192impl Default for CodeGen {
193 fn default() -> Self {
194 Self::new()
195 }
196}
197
198impl CodeGen {
199 pub fn new() -> Self {
200 CodeGen {
201 output: String::new(),
202 string_globals: String::new(),
203 temp_counter: 0,
204 string_counter: 0,
205 block_counter: 0,
206 inside_closure: false,
207 inside_main: false,
208 inside_quotation: false,
209 quot_counter: 0,
210 string_constants: HashMap::new(),
211 quotation_functions: String::new(),
212 type_map: HashMap::new(),
213 external_builtins: HashMap::new(),
214 unions: Vec::new(),
215 ffi_bindings: FfiBindings::new(),
216 ffi_wrapper_code: String::new(),
217 pure_inline_test: false,
218 symbol_globals: String::new(),
219 symbol_counter: 0,
220 symbol_constants: HashMap::new(),
221 statement_types: HashMap::new(),
222 current_word_name: None,
223 current_stmt_index: 0,
224 codegen_depth: 0,
225 prev_stmt_is_trivial_literal: false,
226 prev_stmt_int_value: None,
227 virtual_stack: Vec::new(),
228 specialized_words: HashMap::new(),
229 }
230 }
231
232 /// Create a CodeGen for pure inline testing.
233 /// Bypasses the scheduler, returning top of stack as exit code.
234 /// Only supports operations that are fully inlined (integers, arithmetic, stack ops).
235 #[allow(dead_code)]
236 pub fn new_pure_inline_test() -> Self {
237 let mut cg = Self::new();
238 cg.pure_inline_test = true;
239 cg
240 }
241}