Skip to main content

seqc/codegen/
state.rs

1//! CodeGen State and Core Types
2//!
3//! This module contains the CodeGen struct definition and core types
4//! used across the code generation modules.
5
6use crate::ast::UnionDef;
7use crate::ffi::FfiBindings;
8use crate::types::Type;
9use std::collections::HashMap;
10
11use super::specialization::SpecSignature;
12
13/// Sentinel value for unreachable predecessors in phi nodes.
14/// Used when a branch ends with a tail call (which emits ret directly).
15pub(super) const UNREACHABLE_PREDECESSOR: &str = "unreachable";
16
17/// Maximum number of values to keep in virtual registers (Issue #189).
18/// Values beyond this are spilled to memory.
19///
20/// Tuned for common patterns:
21/// - Binary ops need 2 values (`a b i.+`)
22/// - Dup patterns need 3 values (`a dup i.* b i.+`)
23/// - Complex expressions may use 4 (`a b i.+ c d i.* i.-`)
24///
25/// Larger values increase register pressure with diminishing returns,
26/// as most operations trigger spills (control flow, function calls, etc.).
27pub(super) const MAX_VIRTUAL_STACK: usize = 4;
28
29/// Tracks whether a statement is in tail position.
30///
31/// A statement is in tail position when its result is directly returned
32/// from the function without further processing. For tail calls, we can
33/// use LLVM's `musttail` to guarantee tail call optimization.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub(super) enum TailPosition {
36    /// This is the last operation before return - can use musttail
37    Tail,
38    /// More operations follow - use regular call
39    NonTail,
40}
41
42/// Result of generating code for an if-statement branch.
43pub(super) struct BranchResult {
44    /// The stack variable after executing the branch
45    pub stack_var: String,
46    /// Whether the branch emitted a tail call (and thus a ret)
47    pub emitted_tail_call: bool,
48    /// The predecessor block label for the phi node (or UNREACHABLE_PREDECESSOR)
49    pub predecessor: String,
50}
51
52/// Mangle a Seq word name into a valid LLVM IR identifier.
53///
54/// LLVM IR identifiers can contain: letters, digits, underscores, dollars, periods.
55/// Seq words can contain: letters, digits, hyphens, question marks, arrows, etc.
56///
57/// We escape special characters using underscore-based encoding:
58/// - `-` (hyphen) -> `_` (hyphens not valid in LLVM IR identifiers)
59/// - `?` -> `_Q_` (question)
60/// - `>` -> `_GT_` (greater than, for ->)
61/// - `<` -> `_LT_` (less than)
62/// - `!` -> `_BANG_`
63/// - `*` -> `_STAR_`
64/// - `/` -> `_SLASH_`
65/// - `+` -> `_PLUS_`
66/// - `=` -> `_EQ_`
67/// - `.` -> `_DOT_`
68pub(super) fn mangle_name(name: &str) -> String {
69    let mut result = String::new();
70    for c in name.chars() {
71        match c {
72            '?' => result.push_str("_Q_"),
73            '>' => result.push_str("_GT_"),
74            '<' => result.push_str("_LT_"),
75            '!' => result.push_str("_BANG_"),
76            '*' => result.push_str("_STAR_"),
77            '/' => result.push_str("_SLASH_"),
78            '+' => result.push_str("_PLUS_"),
79            '=' => result.push_str("_EQ_"),
80            // Hyphens converted to underscores (hyphens not valid in LLVM IR)
81            '-' => result.push('_'),
82            // Keep these as-is (valid in LLVM IR)
83            '_' | '.' | '$' => result.push(c),
84            // Alphanumeric kept as-is
85            c if c.is_alphanumeric() => result.push(c),
86            // Any other character gets hex-encoded
87            _ => result.push_str(&format!("_x{:02X}_", c as u32)),
88        }
89    }
90    result
91}
92
93/// Result of generating a quotation: wrapper and impl function names
94/// For closures, both names are the same (no TCO support yet)
95pub(super) struct QuotationFunctions {
96    /// C-convention wrapper function (for runtime calls)
97    pub wrapper: String,
98    /// tailcc implementation function (for TCO via musttail)
99    pub impl_: String,
100}
101
102/// Snapshot of the enclosing function's mutable codegen state while a nested
103/// quotation or closure is being generated. Returned by
104/// `enter_quotation_scope` and consumed by `exit_quotation_scope`, which
105/// commits the nested IR to `quotation_functions` and restores these fields.
106pub(super) struct QuotationScope {
107    pub output: String,
108    pub virtual_stack: Vec<VirtualValue>,
109    pub word_name: Option<String>,
110    pub aux_slots: Vec<String>,
111    pub aux_sp: usize,
112}
113
114/// A value held in an LLVM virtual register instead of memory (Issue #189).
115///
116/// This optimization keeps recently-pushed values in SSA variables,
117/// avoiding memory stores/loads for common patterns like `2 3 i.+`.
118/// Values are spilled to memory at control flow points and function calls.
119#[derive(Clone, Debug)]
120pub(super) enum VirtualValue {
121    /// Integer value in an SSA variable (i64)
122    Int {
123        ssa_var: String,
124        #[allow(dead_code)] // Used for constant folding in Phase 2
125        value: i64,
126    },
127    /// Float value in an SSA variable (double)
128    Float { ssa_var: String },
129    /// Boolean value in an SSA variable (i64: 0 or 1)
130    Bool { ssa_var: String },
131}
132
133pub struct CodeGen {
134    pub(super) output: String,
135    pub(super) string_globals: String,
136    pub(super) temp_counter: usize,
137    pub(super) string_counter: usize,
138    pub(super) block_counter: usize, // For generating unique block labels
139    pub(super) quot_counter: usize,  // For generating unique quotation function names
140    pub(super) string_constants: HashMap<String, String>, // string content -> global name
141    pub(super) quotation_functions: String, // Accumulates generated quotation functions
142    pub(super) type_map: HashMap<usize, Type>, // Maps quotation ID to inferred type (from typechecker)
143    pub(super) external_builtins: HashMap<String, String>, // seq_name -> symbol (for external builtins)
144    pub(super) inside_closure: bool, // Track if we're generating code inside a closure (disables TCO)
145    pub(super) inside_main: bool, // Track if we're generating code for main (uses C convention, no musttail)
146    pub(super) inside_quotation: bool, // Track if we're generating code for a quotation (uses C convention, no musttail)
147    pub(super) unions: Vec<UnionDef>,  // Union type definitions for pattern matching
148    pub(super) ffi_bindings: FfiBindings, // FFI function bindings
149    pub(super) ffi_wrapper_code: String, // Generated FFI wrapper functions
150    /// Pure inline test mode: bypasses scheduler, returns top of stack as exit code.
151    /// Used for testing pure integer programs without FFI dependencies.
152    pub(super) pure_inline_test: bool,
153    // Symbol interning for O(1) equality (Issue #166)
154    pub(super) symbol_globals: String, // LLVM IR for static symbol globals
155    pub(super) symbol_counter: usize,  // Counter for unique symbol names
156    pub(super) symbol_constants: HashMap<String, String>, // symbol name -> global name (deduplication)
157    /// Per-statement type info for optimization (Issue #186)
158    /// Maps (word_name, statement_index) -> top-of-stack type before statement
159    pub(super) statement_types: HashMap<(String, usize), Type>,
160    /// Resolved arithmetic sugar: maps (line, column) -> concrete op name
161    /// E.g., `+` at line 5, column 3 -> `"i.+"` if typechecker resolved it for Int operands
162    pub(super) resolved_sugar: HashMap<(usize, usize), String>,
163    /// Current word being compiled (for statement type lookup)
164    pub(super) current_word_name: Option<String>,
165    /// Current statement index within the word (for statement type lookup)
166    pub(super) current_stmt_index: usize,
167    /// Nesting depth for type lookup - only depth 0 can use type info
168    /// Nested contexts (if/else, loops) increment this to disable lookups
169    pub(super) codegen_depth: usize,
170    /// True if the previous statement was a trivially-copyable literal (Issue #195)
171    /// Used to optimize `dup` after literal push (e.g., `42 dup`)
172    pub(super) prev_stmt_is_trivial_literal: bool,
173    /// If previous statement was IntLiteral, stores its value (Issue #192)
174    /// Used to optimize `roll`/`pick` with constant N (e.g., `2 roll` -> rot)
175    pub(super) prev_stmt_int_value: Option<i64>,
176    /// Virtual register stack for top N values (Issue #189)
177    /// Values here are in SSA variables, not yet written to memory.
178    /// The memory stack pointer tracks where memory ends; virtual values are "above" it.
179    pub(super) virtual_stack: Vec<VirtualValue>,
180    /// Specialized word signatures for register-based codegen
181    /// Maps word name -> specialized signature
182    pub(super) specialized_words: HashMap<String, SpecSignature>,
183    /// Per-word aux stack slot counts from typechecker (Issue #350)
184    /// Maps word_name -> number of %Value allocas needed
185    pub(super) aux_slot_counts: HashMap<String, usize>,
186    /// Per-quotation aux stack slot counts from typechecker (Issue #393)
187    /// Maps quotation_id -> number of %Value allocas needed for that quotation
188    pub(super) quotation_aux_slot_counts: HashMap<usize, usize>,
189    /// LLVM alloca names for current word's aux slots (Issue #350)
190    pub(super) current_aux_slots: Vec<String>,
191    /// Compile-time index into aux slots (Issue #350)
192    pub(super) current_aux_sp: usize,
193    /// Whether to emit per-word atomic call counters (--instrument)
194    pub(super) instrument: bool,
195    /// True if the user's `main` word has effect `( -- Int )`.
196    /// Determines whether `seq_main` writes the top-of-stack int to the
197    /// global exit code before freeing the stack. (Issue #355)
198    pub(super) main_returns_int: bool,
199    /// Maps word name -> sequential ID for instrumentation counters
200    pub(super) word_instrument_ids: HashMap<String, usize>,
201}
202
203impl Default for CodeGen {
204    fn default() -> Self {
205        Self::new()
206    }
207}
208
209impl CodeGen {
210    pub fn new() -> Self {
211        CodeGen {
212            output: String::new(),
213            string_globals: String::new(),
214            temp_counter: 0,
215            string_counter: 0,
216            block_counter: 0,
217            inside_closure: false,
218            inside_main: false,
219            inside_quotation: false,
220            quot_counter: 0,
221            string_constants: HashMap::new(),
222            quotation_functions: String::new(),
223            type_map: HashMap::new(),
224            external_builtins: HashMap::new(),
225            unions: Vec::new(),
226            ffi_bindings: FfiBindings::new(),
227            ffi_wrapper_code: String::new(),
228            pure_inline_test: false,
229            symbol_globals: String::new(),
230            symbol_counter: 0,
231            symbol_constants: HashMap::new(),
232            statement_types: HashMap::new(),
233            resolved_sugar: HashMap::new(),
234            current_word_name: None,
235            current_stmt_index: 0,
236            codegen_depth: 0,
237            prev_stmt_is_trivial_literal: false,
238            prev_stmt_int_value: None,
239            virtual_stack: Vec::new(),
240            specialized_words: HashMap::new(),
241            aux_slot_counts: HashMap::new(),
242            quotation_aux_slot_counts: HashMap::new(),
243            current_aux_slots: Vec::new(),
244            current_aux_sp: 0,
245            instrument: false,
246            word_instrument_ids: HashMap::new(),
247            main_returns_int: false,
248        }
249    }
250
251    /// Create a CodeGen for pure inline testing.
252    /// Bypasses the scheduler, returning top of stack as exit code.
253    /// Only supports operations that are fully inlined (integers, arithmetic, stack ops).
254    pub fn new_pure_inline_test() -> Self {
255        let mut cg = Self::new();
256        cg.pure_inline_test = true;
257        cg
258    }
259
260    /// Set per-word aux stack slot counts from typechecker (Issue #350)
261    pub fn set_aux_slot_counts(&mut self, counts: HashMap<String, usize>) {
262        self.aux_slot_counts = counts;
263    }
264
265    /// Set per-quotation aux stack slot counts from typechecker (Issue #393)
266    pub fn set_quotation_aux_slot_counts(&mut self, counts: HashMap<usize, usize>) {
267        self.quotation_aux_slot_counts = counts;
268    }
269
270    /// Set resolved arithmetic sugar mappings from the typechecker
271    pub fn set_resolved_sugar(&mut self, sugar: HashMap<(usize, usize), String>) {
272        self.resolved_sugar = sugar;
273    }
274
275    /// Look up the resolved name for an arithmetic sugar op by source location
276    pub(super) fn resolve_sugar_at(&self, line: usize, column: usize) -> Option<&str> {
277        self.resolved_sugar.get(&(line, column)).map(|s| s.as_str())
278    }
279}