seqc/codegen/state.rs
1//! CodeGen State and Core Types
2//!
3//! This module contains the CodeGen struct definition and core types
4//! used across the code generation modules.
5
6use crate::ast::UnionDef;
7use crate::ffi::FfiBindings;
8use crate::types::Type;
9use std::collections::HashMap;
10
11/// Sentinel value for unreachable predecessors in phi nodes.
12/// Used when a branch ends with a tail call (which emits ret directly).
13pub(super) const UNREACHABLE_PREDECESSOR: &str = "unreachable";
14
15/// Maximum number of values to keep in virtual registers (Issue #189).
16/// Values beyond this are spilled to memory.
17///
18/// Tuned for common patterns:
19/// - Binary ops need 2 values (`a b i.+`)
20/// - Dup patterns need 3 values (`a dup i.* b i.+`)
21/// - Complex expressions may use 4 (`a b i.+ c d i.* i.-`)
22///
23/// Larger values increase register pressure with diminishing returns,
24/// as most operations trigger spills (control flow, function calls, etc.).
25pub(super) const MAX_VIRTUAL_STACK: usize = 4;
26
27/// Tracks whether a statement is in tail position.
28///
29/// A statement is in tail position when its result is directly returned
30/// from the function without further processing. For tail calls, we can
31/// use LLVM's `musttail` to guarantee tail call optimization.
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub(super) enum TailPosition {
34 /// This is the last operation before return - can use musttail
35 Tail,
36 /// More operations follow - use regular call
37 NonTail,
38}
39
40/// Result of generating code for an if-statement branch.
41pub(super) struct BranchResult {
42 /// The stack variable after executing the branch
43 pub stack_var: String,
44 /// Whether the branch emitted a tail call (and thus a ret)
45 pub emitted_tail_call: bool,
46 /// The predecessor block label for the phi node (or UNREACHABLE_PREDECESSOR)
47 pub predecessor: String,
48}
49
50/// Mangle a Seq word name into a valid LLVM IR identifier.
51///
52/// LLVM IR identifiers can contain: letters, digits, underscores, dollars, periods.
53/// Seq words can contain: letters, digits, hyphens, question marks, arrows, etc.
54///
55/// We escape special characters using underscore-based encoding:
56/// - `-` (hyphen) -> `_` (hyphens not valid in LLVM IR identifiers)
57/// - `?` -> `_Q_` (question)
58/// - `>` -> `_GT_` (greater than, for ->)
59/// - `<` -> `_LT_` (less than)
60/// - `!` -> `_BANG_`
61/// - `*` -> `_STAR_`
62/// - `/` -> `_SLASH_`
63/// - `+` -> `_PLUS_`
64/// - `=` -> `_EQ_`
65/// - `.` -> `_DOT_`
66pub(super) fn mangle_name(name: &str) -> String {
67 let mut result = String::new();
68 for c in name.chars() {
69 match c {
70 '?' => result.push_str("_Q_"),
71 '>' => result.push_str("_GT_"),
72 '<' => result.push_str("_LT_"),
73 '!' => result.push_str("_BANG_"),
74 '*' => result.push_str("_STAR_"),
75 '/' => result.push_str("_SLASH_"),
76 '+' => result.push_str("_PLUS_"),
77 '=' => result.push_str("_EQ_"),
78 // Hyphens converted to underscores (hyphens not valid in LLVM IR)
79 '-' => result.push('_'),
80 // Keep these as-is (valid in LLVM IR)
81 '_' | '.' | '$' => result.push(c),
82 // Alphanumeric kept as-is
83 c if c.is_alphanumeric() => result.push(c),
84 // Any other character gets hex-encoded
85 _ => result.push_str(&format!("_x{:02X}_", c as u32)),
86 }
87 }
88 result
89}
90
91/// Result of generating a quotation: wrapper and impl function names
92/// For closures, both names are the same (no TCO support yet)
93pub(super) struct QuotationFunctions {
94 /// C-convention wrapper function (for runtime calls)
95 pub wrapper: String,
96 /// tailcc implementation function (for TCO via musttail)
97 pub impl_: String,
98}
99
100/// A value held in an LLVM virtual register instead of memory (Issue #189).
101///
102/// This optimization keeps recently-pushed values in SSA variables,
103/// avoiding memory stores/loads for common patterns like `2 3 i.+`.
104/// Values are spilled to memory at control flow points and function calls.
105#[derive(Clone, Debug)]
106#[allow(dead_code)] // Float and Bool variants for Phase 2
107pub(super) enum VirtualValue {
108 /// Integer value in an SSA variable (i64)
109 Int {
110 ssa_var: String,
111 #[allow(dead_code)] // Used for constant folding in Phase 2
112 value: i64,
113 },
114 /// Float value in an SSA variable (double)
115 Float { ssa_var: String },
116 /// Boolean value in an SSA variable (i64: 0 or 1)
117 Bool { ssa_var: String },
118}
119
120#[allow(dead_code)] // ssa_var method used in spill_virtual_stack
121impl VirtualValue {
122 /// Get the SSA variable name
123 pub fn ssa_var(&self) -> &str {
124 match self {
125 VirtualValue::Int { ssa_var, .. } => ssa_var,
126 VirtualValue::Float { ssa_var } => ssa_var,
127 VirtualValue::Bool { ssa_var } => ssa_var,
128 }
129 }
130
131 /// Get the discriminant for this value type
132 pub fn discriminant(&self) -> i64 {
133 match self {
134 VirtualValue::Int { .. } => 0,
135 VirtualValue::Float { .. } => 1,
136 VirtualValue::Bool { .. } => 2,
137 }
138 }
139}
140
141pub struct CodeGen {
142 pub(super) output: String,
143 pub(super) string_globals: String,
144 pub(super) temp_counter: usize,
145 pub(super) string_counter: usize,
146 pub(super) block_counter: usize, // For generating unique block labels
147 pub(super) quot_counter: usize, // For generating unique quotation function names
148 pub(super) string_constants: HashMap<String, String>, // string content -> global name
149 pub(super) quotation_functions: String, // Accumulates generated quotation functions
150 pub(super) type_map: HashMap<usize, Type>, // Maps quotation ID to inferred type (from typechecker)
151 pub(super) external_builtins: HashMap<String, String>, // seq_name -> symbol (for external builtins)
152 pub(super) inside_closure: bool, // Track if we're generating code inside a closure (disables TCO)
153 pub(super) inside_main: bool, // Track if we're generating code for main (uses C convention, no musttail)
154 pub(super) inside_quotation: bool, // Track if we're generating code for a quotation (uses C convention, no musttail)
155 pub(super) unions: Vec<UnionDef>, // Union type definitions for pattern matching
156 pub(super) ffi_bindings: FfiBindings, // FFI function bindings
157 pub(super) ffi_wrapper_code: String, // Generated FFI wrapper functions
158 /// Pure inline test mode: bypasses scheduler, returns top of stack as exit code.
159 /// Used for testing pure integer programs without FFI dependencies.
160 pub(super) pure_inline_test: bool,
161 // Symbol interning for O(1) equality (Issue #166)
162 pub(super) symbol_globals: String, // LLVM IR for static symbol globals
163 pub(super) symbol_counter: usize, // Counter for unique symbol names
164 pub(super) symbol_constants: HashMap<String, String>, // symbol name -> global name (deduplication)
165 /// Per-statement type info for optimization (Issue #186)
166 /// Maps (word_name, statement_index) -> top-of-stack type before statement
167 pub(super) statement_types: HashMap<(String, usize), Type>,
168 /// Current word being compiled (for statement type lookup)
169 pub(super) current_word_name: Option<String>,
170 /// Current statement index within the word (for statement type lookup)
171 pub(super) current_stmt_index: usize,
172 /// Nesting depth for type lookup - only depth 0 can use type info
173 /// Nested contexts (if/else, loops) increment this to disable lookups
174 pub(super) codegen_depth: usize,
175 /// True if the previous statement was a trivially-copyable literal (Issue #195)
176 /// Used to optimize `dup` after literal push (e.g., `42 dup`)
177 pub(super) prev_stmt_is_trivial_literal: bool,
178 /// If previous statement was IntLiteral, stores its value (Issue #192)
179 /// Used to optimize `roll`/`pick` with constant N (e.g., `2 roll` -> rot)
180 pub(super) prev_stmt_int_value: Option<i64>,
181 /// Virtual register stack for top N values (Issue #189)
182 /// Values here are in SSA variables, not yet written to memory.
183 /// The memory stack pointer tracks where memory ends; virtual values are "above" it.
184 pub(super) virtual_stack: Vec<VirtualValue>,
185}
186
187impl Default for CodeGen {
188 fn default() -> Self {
189 Self::new()
190 }
191}
192
193impl CodeGen {
194 pub fn new() -> Self {
195 CodeGen {
196 output: String::new(),
197 string_globals: String::new(),
198 temp_counter: 0,
199 string_counter: 0,
200 block_counter: 0,
201 inside_closure: false,
202 inside_main: false,
203 inside_quotation: false,
204 quot_counter: 0,
205 string_constants: HashMap::new(),
206 quotation_functions: String::new(),
207 type_map: HashMap::new(),
208 external_builtins: HashMap::new(),
209 unions: Vec::new(),
210 ffi_bindings: FfiBindings::new(),
211 ffi_wrapper_code: String::new(),
212 pure_inline_test: false,
213 symbol_globals: String::new(),
214 symbol_counter: 0,
215 symbol_constants: HashMap::new(),
216 statement_types: HashMap::new(),
217 current_word_name: None,
218 current_stmt_index: 0,
219 codegen_depth: 0,
220 prev_stmt_is_trivial_literal: false,
221 prev_stmt_int_value: None,
222 virtual_stack: Vec::new(),
223 }
224 }
225
226 /// Create a CodeGen for pure inline testing.
227 /// Bypasses the scheduler, returning top of stack as exit code.
228 /// Only supports operations that are fully inlined (integers, arithmetic, stack ops).
229 #[allow(dead_code)]
230 pub fn new_pure_inline_test() -> Self {
231 let mut cg = Self::new();
232 cg.pure_inline_test = true;
233 cg
234 }
235}