seqc/
codegen.rs

1//! LLVM IR Code Generation via Text
2//!
3//! Generates LLVM IR as text (.ll files) and invokes clang to produce executables.
4//! This approach is simpler and more portable than using FFI bindings (inkwell).
5//!
6//! # Code Generation Strategy
7//!
8//! Stack is threaded through all operations as a pointer:
9//! 1. Start with null stack pointer
10//! 2. Each operation takes stack, returns new stack
11//! 3. Final stack is ignored (should be null for well-typed programs)
12//!
13//! # Runtime Function Declarations
14//!
15//! All runtime functions follow the pattern:
16//! - `define ptr @name(ptr %stack) { ... }` for stack operations
17//! - `define ptr @push_int(ptr %stack, i64 %value) { ... }` for literals
18//! - Stack type is represented as `ptr` (opaque pointer in modern LLVM)
19
20use crate::ast::{Program, Statement, WordDef};
21use crate::types::Type;
22use std::collections::HashMap;
23use std::fmt::Write as _;
24
25/// Mangle a Seq word name into a valid LLVM IR identifier.
26///
27/// LLVM IR identifiers can contain: letters, digits, underscores, dollars, periods.
28/// Seq words can contain: letters, digits, hyphens, question marks, arrows, etc.
29///
30/// We escape special characters using underscore-based encoding:
31/// - `-` (hyphen) -> `_` (hyphens not valid in LLVM IR identifiers)
32/// - `?` -> `_Q_` (question)
33/// - `>` -> `_GT_` (greater than, for ->)
34/// - `<` -> `_LT_` (less than)
35/// - `!` -> `_BANG_`
36/// - `*` -> `_STAR_`
37/// - `/` -> `_SLASH_`
38/// - `+` -> `_PLUS_`
39/// - `=` -> `_EQ_`
40/// - `.` -> `_DOT_`
41fn mangle_name(name: &str) -> String {
42    let mut result = String::new();
43    for c in name.chars() {
44        match c {
45            '?' => result.push_str("_Q_"),
46            '>' => result.push_str("_GT_"),
47            '<' => result.push_str("_LT_"),
48            '!' => result.push_str("_BANG_"),
49            '*' => result.push_str("_STAR_"),
50            '/' => result.push_str("_SLASH_"),
51            '+' => result.push_str("_PLUS_"),
52            '=' => result.push_str("_EQ_"),
53            // Hyphens converted to underscores (hyphens not valid in LLVM IR)
54            '-' => result.push('_'),
55            // Keep these as-is (valid in LLVM IR)
56            '_' | '.' | '$' => result.push(c),
57            // Alphanumeric kept as-is
58            c if c.is_alphanumeric() => result.push(c),
59            // Any other character gets hex-encoded
60            _ => result.push_str(&format!("_x{:02X}_", c as u32)),
61        }
62    }
63    result
64}
65
66pub struct CodeGen {
67    output: String,
68    string_globals: String,
69    temp_counter: usize,
70    string_counter: usize,
71    block_counter: usize, // For generating unique block labels
72    quot_counter: usize,  // For generating unique quotation function names
73    string_constants: HashMap<String, String>, // string content -> global name
74    quotation_functions: String, // Accumulates generated quotation functions
75    type_map: HashMap<usize, Type>, // Maps quotation ID to inferred type (from typechecker)
76}
77
78impl CodeGen {
79    pub fn new() -> Self {
80        CodeGen {
81            output: String::new(),
82            string_globals: String::new(),
83            temp_counter: 0,
84            string_counter: 0,
85            block_counter: 0,
86            quot_counter: 0,
87            string_constants: HashMap::new(),
88            quotation_functions: String::new(),
89            type_map: HashMap::new(),
90        }
91    }
92
93    /// Generate a fresh temporary variable name
94    fn fresh_temp(&mut self) -> String {
95        let name = format!("{}", self.temp_counter);
96        self.temp_counter += 1;
97        name
98    }
99
100    /// Generate a fresh block label
101    fn fresh_block(&mut self, prefix: &str) -> String {
102        let name = format!("{}{}", prefix, self.block_counter);
103        self.block_counter += 1;
104        name
105    }
106
107    /// Get the next quotation type (consumes it in DFS traversal order)
108    /// Get the inferred type for a quotation by its ID
109    fn get_quotation_type(&self, id: usize) -> Result<&Type, String> {
110        self.type_map.get(&id).ok_or_else(|| {
111            format!(
112                "CodeGen: no type information for quotation ID {}. This is a compiler bug.",
113                id
114            )
115        })
116    }
117
118    /// Escape a string for LLVM IR string literals
119    fn escape_llvm_string(s: &str) -> String {
120        let mut result = String::new();
121        for ch in s.chars() {
122            match ch {
123                ' '..='!' | '#'..='[' | ']'..='~' => result.push(ch),
124                '\\' => result.push_str(r"\\"),
125                '"' => result.push_str(r#"\22"#),
126                '\n' => result.push_str(r"\0A"),
127                '\r' => result.push_str(r"\0D"),
128                '\t' => result.push_str(r"\09"),
129                _ => {
130                    // Non-printable: use hex escape
131                    for byte in ch.to_string().as_bytes() {
132                        write!(&mut result, r"\{:02X}", byte).unwrap();
133                    }
134                }
135            }
136        }
137        result
138    }
139
140    /// Get or create a global string constant
141    fn get_string_global(&mut self, s: &str) -> String {
142        if let Some(global_name) = self.string_constants.get(s) {
143            return global_name.clone();
144        }
145
146        let global_name = format!("@.str.{}", self.string_counter);
147        self.string_counter += 1;
148
149        let escaped = Self::escape_llvm_string(s);
150        let len = s.len() + 1; // +1 for null terminator
151
152        writeln!(
153            &mut self.string_globals,
154            "{} = private unnamed_addr constant [{} x i8] c\"{}\\00\"",
155            global_name, len, escaped
156        )
157        .unwrap();
158
159        self.string_constants
160            .insert(s.to_string(), global_name.clone());
161        global_name
162    }
163
164    /// Generate LLVM IR for entire program
165    pub fn codegen_program(
166        &mut self,
167        program: &Program,
168        type_map: HashMap<usize, Type>,
169    ) -> Result<String, String> {
170        // Store type map for use during code generation
171        self.type_map = type_map;
172
173        // Verify we have a main word
174        if program.find_word("main").is_none() {
175            return Err("No main word defined".to_string());
176        }
177
178        // Generate all user-defined words
179        for word in &program.words {
180            self.codegen_word(word)?;
181        }
182
183        // Generate main function
184        self.codegen_main()?;
185
186        // Assemble final IR
187        let mut ir = String::new();
188
189        // Target and type declarations
190        writeln!(&mut ir, "; ModuleID = 'main'").unwrap();
191        writeln!(&mut ir, "target triple = \"{}\"", get_target_triple()).unwrap();
192        writeln!(&mut ir).unwrap();
193
194        // Opaque Value type (Rust enum)
195        writeln!(&mut ir, "; Opaque Value type (Rust enum)").unwrap();
196        writeln!(&mut ir, "%Value = type opaque").unwrap();
197        writeln!(&mut ir).unwrap();
198
199        // String constants
200        if !self.string_globals.is_empty() {
201            ir.push_str(&self.string_globals);
202            writeln!(&mut ir).unwrap();
203        }
204
205        // Runtime function declarations
206        writeln!(&mut ir, "; Runtime function declarations").unwrap();
207        writeln!(&mut ir, "declare ptr @patch_seq_push_int(ptr, i64)").unwrap();
208        writeln!(&mut ir, "declare ptr @patch_seq_push_string(ptr, ptr)").unwrap();
209        writeln!(&mut ir, "declare ptr @patch_seq_write_line(ptr)").unwrap();
210        writeln!(&mut ir, "declare ptr @patch_seq_read_line(ptr)").unwrap();
211        writeln!(&mut ir, "declare ptr @patch_seq_int_to_string(ptr)").unwrap();
212        writeln!(&mut ir, "declare ptr @patch_seq_add(ptr)").unwrap();
213        writeln!(&mut ir, "declare ptr @patch_seq_subtract(ptr)").unwrap();
214        writeln!(&mut ir, "declare ptr @patch_seq_multiply(ptr)").unwrap();
215        writeln!(&mut ir, "declare ptr @patch_seq_divide(ptr)").unwrap();
216        writeln!(&mut ir, "declare ptr @patch_seq_eq(ptr)").unwrap();
217        writeln!(&mut ir, "declare ptr @patch_seq_lt(ptr)").unwrap();
218        writeln!(&mut ir, "declare ptr @patch_seq_gt(ptr)").unwrap();
219        writeln!(&mut ir, "declare ptr @patch_seq_lte(ptr)").unwrap();
220        writeln!(&mut ir, "declare ptr @patch_seq_gte(ptr)").unwrap();
221        writeln!(&mut ir, "declare ptr @patch_seq_neq(ptr)").unwrap();
222        writeln!(&mut ir, "; Boolean operations").unwrap();
223        writeln!(&mut ir, "declare ptr @patch_seq_and(ptr)").unwrap();
224        writeln!(&mut ir, "declare ptr @patch_seq_or(ptr)").unwrap();
225        writeln!(&mut ir, "declare ptr @patch_seq_not(ptr)").unwrap();
226        writeln!(&mut ir, "; Stack operations").unwrap();
227        writeln!(&mut ir, "declare ptr @patch_seq_dup(ptr)").unwrap();
228        writeln!(&mut ir, "declare ptr @patch_seq_drop_op(ptr)").unwrap();
229        writeln!(&mut ir, "declare ptr @patch_seq_swap(ptr)").unwrap();
230        writeln!(&mut ir, "declare ptr @patch_seq_over(ptr)").unwrap();
231        writeln!(&mut ir, "declare ptr @patch_seq_rot(ptr)").unwrap();
232        writeln!(&mut ir, "declare ptr @patch_seq_nip(ptr)").unwrap();
233        writeln!(&mut ir, "declare ptr @patch_seq_tuck(ptr)").unwrap();
234        writeln!(&mut ir, "declare ptr @patch_seq_pick_op(ptr)").unwrap();
235        writeln!(&mut ir, "declare ptr @patch_seq_roll(ptr)").unwrap();
236        writeln!(&mut ir, "declare ptr @patch_seq_push_value(ptr, %Value)").unwrap();
237        writeln!(&mut ir, "; Quotation operations").unwrap();
238        writeln!(&mut ir, "declare ptr @patch_seq_push_quotation(ptr, i64)").unwrap();
239        writeln!(&mut ir, "declare ptr @patch_seq_call(ptr)").unwrap();
240        writeln!(&mut ir, "declare ptr @patch_seq_times(ptr)").unwrap();
241        writeln!(&mut ir, "declare ptr @patch_seq_while_loop(ptr)").unwrap();
242        writeln!(&mut ir, "declare ptr @patch_seq_until_loop(ptr)").unwrap();
243        writeln!(&mut ir, "declare ptr @patch_seq_forever(ptr)").unwrap();
244        writeln!(&mut ir, "declare ptr @patch_seq_spawn(ptr)").unwrap();
245        writeln!(&mut ir, "declare ptr @patch_seq_cond(ptr)").unwrap();
246        writeln!(&mut ir, "; Closure operations").unwrap();
247        writeln!(&mut ir, "declare ptr @patch_seq_create_env(i32)").unwrap();
248        writeln!(&mut ir, "declare void @patch_seq_env_set(ptr, i32, %Value)").unwrap();
249        writeln!(&mut ir, "declare %Value @patch_seq_env_get(ptr, i64, i32)").unwrap();
250        writeln!(&mut ir, "declare i64 @patch_seq_env_get_int(ptr, i64, i32)").unwrap();
251        writeln!(
252            &mut ir,
253            "declare ptr @patch_seq_env_get_string(ptr, i64, i32)"
254        )
255        .unwrap();
256        writeln!(&mut ir, "declare %Value @patch_seq_make_closure(i64, ptr)").unwrap();
257        writeln!(
258            &mut ir,
259            "declare ptr @patch_seq_push_closure(ptr, i64, i32)"
260        )
261        .unwrap();
262        writeln!(&mut ir, "declare ptr @patch_seq_push_seqstring(ptr, ptr)").unwrap();
263        writeln!(&mut ir, "; Concurrency operations").unwrap();
264        writeln!(&mut ir, "declare ptr @patch_seq_make_channel(ptr)").unwrap();
265        writeln!(&mut ir, "declare ptr @patch_seq_chan_send(ptr)").unwrap();
266        writeln!(&mut ir, "declare ptr @patch_seq_chan_receive(ptr)").unwrap();
267        writeln!(&mut ir, "declare ptr @patch_seq_close_channel(ptr)").unwrap();
268        writeln!(&mut ir, "declare ptr @patch_seq_yield_strand(ptr)").unwrap();
269        writeln!(&mut ir, "; Scheduler operations").unwrap();
270        writeln!(&mut ir, "declare void @patch_seq_scheduler_init()").unwrap();
271        writeln!(&mut ir, "declare ptr @patch_seq_scheduler_run()").unwrap();
272        writeln!(&mut ir, "declare i64 @patch_seq_strand_spawn(ptr, ptr)").unwrap();
273        writeln!(&mut ir, "; Command-line argument operations").unwrap();
274        writeln!(&mut ir, "declare void @patch_seq_args_init(i32, ptr)").unwrap();
275        writeln!(&mut ir, "declare ptr @patch_seq_arg_count(ptr)").unwrap();
276        writeln!(&mut ir, "declare ptr @patch_seq_arg_at(ptr)").unwrap();
277        writeln!(&mut ir, "; File operations").unwrap();
278        writeln!(&mut ir, "declare ptr @patch_seq_file_slurp(ptr)").unwrap();
279        writeln!(&mut ir, "declare ptr @patch_seq_file_exists(ptr)").unwrap();
280        writeln!(&mut ir, "; TCP operations").unwrap();
281        writeln!(&mut ir, "declare ptr @patch_seq_tcp_listen(ptr)").unwrap();
282        writeln!(&mut ir, "declare ptr @patch_seq_tcp_accept(ptr)").unwrap();
283        writeln!(&mut ir, "declare ptr @patch_seq_tcp_read(ptr)").unwrap();
284        writeln!(&mut ir, "declare ptr @patch_seq_tcp_write(ptr)").unwrap();
285        writeln!(&mut ir, "declare ptr @patch_seq_tcp_close(ptr)").unwrap();
286        writeln!(&mut ir, "; String operations").unwrap();
287        writeln!(&mut ir, "declare ptr @patch_seq_string_concat(ptr)").unwrap();
288        writeln!(&mut ir, "declare ptr @patch_seq_string_length(ptr)").unwrap();
289        writeln!(&mut ir, "declare ptr @patch_seq_string_byte_length(ptr)").unwrap();
290        writeln!(&mut ir, "declare ptr @patch_seq_string_char_at(ptr)").unwrap();
291        writeln!(&mut ir, "declare ptr @patch_seq_string_substring(ptr)").unwrap();
292        writeln!(&mut ir, "declare ptr @patch_seq_char_to_string(ptr)").unwrap();
293        writeln!(&mut ir, "declare ptr @patch_seq_string_find(ptr)").unwrap();
294        writeln!(&mut ir, "declare ptr @patch_seq_string_split(ptr)").unwrap();
295        writeln!(&mut ir, "declare ptr @patch_seq_string_contains(ptr)").unwrap();
296        writeln!(&mut ir, "declare ptr @patch_seq_string_starts_with(ptr)").unwrap();
297        writeln!(&mut ir, "declare ptr @patch_seq_string_empty(ptr)").unwrap();
298        writeln!(&mut ir, "declare ptr @patch_seq_string_trim(ptr)").unwrap();
299        writeln!(&mut ir, "declare ptr @patch_seq_string_to_upper(ptr)").unwrap();
300        writeln!(&mut ir, "declare ptr @patch_seq_string_to_lower(ptr)").unwrap();
301        writeln!(&mut ir, "declare ptr @patch_seq_string_equal(ptr)").unwrap();
302        writeln!(&mut ir, "; Variant operations").unwrap();
303        writeln!(&mut ir, "declare ptr @patch_seq_variant_field_count(ptr)").unwrap();
304        writeln!(&mut ir, "declare ptr @patch_seq_variant_tag(ptr)").unwrap();
305        writeln!(&mut ir, "declare ptr @patch_seq_variant_field_at(ptr)").unwrap();
306        writeln!(&mut ir, "declare ptr @patch_seq_variant_append(ptr)").unwrap();
307        writeln!(&mut ir, "declare ptr @patch_seq_variant_last(ptr)").unwrap();
308        writeln!(&mut ir, "declare ptr @patch_seq_variant_init(ptr)").unwrap();
309        writeln!(&mut ir, "declare ptr @patch_seq_make_variant(ptr)").unwrap();
310        writeln!(&mut ir, "; Float operations").unwrap();
311        writeln!(&mut ir, "declare ptr @patch_seq_push_float(ptr, double)").unwrap();
312        writeln!(&mut ir, "declare ptr @patch_seq_f_add(ptr)").unwrap();
313        writeln!(&mut ir, "declare ptr @patch_seq_f_subtract(ptr)").unwrap();
314        writeln!(&mut ir, "declare ptr @patch_seq_f_multiply(ptr)").unwrap();
315        writeln!(&mut ir, "declare ptr @patch_seq_f_divide(ptr)").unwrap();
316        writeln!(&mut ir, "declare ptr @patch_seq_f_eq(ptr)").unwrap();
317        writeln!(&mut ir, "declare ptr @patch_seq_f_lt(ptr)").unwrap();
318        writeln!(&mut ir, "declare ptr @patch_seq_f_gt(ptr)").unwrap();
319        writeln!(&mut ir, "declare ptr @patch_seq_f_lte(ptr)").unwrap();
320        writeln!(&mut ir, "declare ptr @patch_seq_f_gte(ptr)").unwrap();
321        writeln!(&mut ir, "declare ptr @patch_seq_f_neq(ptr)").unwrap();
322        writeln!(&mut ir, "declare ptr @patch_seq_int_to_float(ptr)").unwrap();
323        writeln!(&mut ir, "declare ptr @patch_seq_float_to_int(ptr)").unwrap();
324        writeln!(&mut ir, "declare ptr @patch_seq_float_to_string(ptr)").unwrap();
325        writeln!(&mut ir, "declare ptr @patch_seq_string_to_float(ptr)").unwrap();
326        writeln!(&mut ir, "; Helpers for conditionals").unwrap();
327        writeln!(&mut ir, "declare i64 @patch_seq_peek_int_value(ptr)").unwrap();
328        writeln!(&mut ir, "declare ptr @patch_seq_pop_stack(ptr)").unwrap();
329        writeln!(&mut ir).unwrap();
330
331        // Quotation functions (generated from quotation literals)
332        if !self.quotation_functions.is_empty() {
333            writeln!(&mut ir, "; Quotation functions").unwrap();
334            ir.push_str(&self.quotation_functions);
335            writeln!(&mut ir).unwrap();
336        }
337
338        // User-defined words and main
339        ir.push_str(&self.output);
340
341        Ok(ir)
342    }
343
344    /// Generate code for a word definition
345    fn codegen_word(&mut self, word: &WordDef) -> Result<(), String> {
346        // Prefix word names with "seq_" to avoid conflicts with C symbols
347        // Also mangle special characters that aren't valid in LLVM IR identifiers
348        let function_name = format!("seq_{}", mangle_name(&word.name));
349        writeln!(
350            &mut self.output,
351            "define ptr @{}(ptr %stack) {{",
352            function_name
353        )
354        .unwrap();
355        writeln!(&mut self.output, "entry:").unwrap();
356
357        let mut stack_var = "stack".to_string();
358
359        // Generate code for each statement
360        for statement in &word.body {
361            stack_var = self.codegen_statement(&stack_var, statement)?;
362        }
363
364        writeln!(&mut self.output, "  ret ptr %{}", stack_var).unwrap();
365        writeln!(&mut self.output, "}}").unwrap();
366        writeln!(&mut self.output).unwrap();
367
368        Ok(())
369    }
370
371    /// Generate a quotation function
372    /// Returns the function name
373    fn codegen_quotation(
374        &mut self,
375        body: &[Statement],
376        quot_type: &Type,
377    ) -> Result<String, String> {
378        // Generate unique function name
379        let function_name = format!("seq_quot_{}", self.quot_counter);
380        self.quot_counter += 1;
381
382        // Save current output and switch to quotation_functions
383        let saved_output = std::mem::take(&mut self.output);
384
385        // Generate function signature based on type
386        match quot_type {
387            Type::Quotation(_) => {
388                // Stateless quotation: fn(Stack) -> Stack
389                writeln!(
390                    &mut self.output,
391                    "define ptr @{}(ptr %stack) {{",
392                    function_name
393                )
394                .unwrap();
395            }
396            Type::Closure { captures, .. } => {
397                // Closure: fn(Stack, *const Value, usize) -> Stack
398                writeln!(
399                    &mut self.output,
400                    "define ptr @{}(ptr %stack, ptr %env_data, i64 %env_len) {{",
401                    function_name
402                )
403                .unwrap();
404                writeln!(&mut self.output, "entry:").unwrap();
405
406                // Push captured values onto the stack before executing body
407                // Captures are stored bottom-to-top, so push them in order
408                let mut stack_var = "stack".to_string();
409                for (index, capture_type) in captures.iter().enumerate() {
410                    // Use type-specific getters to avoid passing large Value enum through FFI
411                    match capture_type {
412                        Type::Int => {
413                            let int_var = self.fresh_temp();
414                            writeln!(
415                                &mut self.output,
416                                "  %{} = call i64 @patch_seq_env_get_int(ptr %env_data, i64 %env_len, i32 {})",
417                                int_var, index
418                            )
419                            .unwrap();
420                            let new_stack_var = self.fresh_temp();
421                            writeln!(
422                                &mut self.output,
423                                "  %{} = call ptr @patch_seq_push_int(ptr %{}, i64 %{})",
424                                new_stack_var, stack_var, int_var
425                            )
426                            .unwrap();
427                            stack_var = new_stack_var;
428                        }
429                        Type::String => {
430                            let string_var = self.fresh_temp();
431                            writeln!(
432                                &mut self.output,
433                                "  %{} = call ptr @patch_seq_env_get_string(ptr %env_data, i64 %env_len, i32 {})",
434                                string_var, index
435                            )
436                            .unwrap();
437                            let new_stack_var = self.fresh_temp();
438                            writeln!(
439                                &mut self.output,
440                                "  %{} = call ptr @patch_seq_push_seqstring(ptr %{}, ptr %{})",
441                                new_stack_var, stack_var, string_var
442                            )
443                            .unwrap();
444                            stack_var = new_stack_var;
445                        }
446                        _ => {
447                            // TODO: Implement type-specific getters for Bool and other types
448                            // Each type needs:
449                            //   - Runtime: env_get_<type> in closures.rs
450                            //   - CodeGen: Match arm here to call the right getter
451                            return Err(format!(
452                                "CodeGen: Only Int and String captures are currently supported, got {:?}. \
453                                 Other types require implementing env_get_<type> functions.",
454                                capture_type
455                            ));
456                        }
457                    }
458                }
459
460                // Generate code for each statement in the quotation body
461                for statement in body {
462                    stack_var = self.codegen_statement(&stack_var, statement)?;
463                }
464
465                writeln!(&mut self.output, "  ret ptr %{}", stack_var).unwrap();
466                writeln!(&mut self.output, "}}").unwrap();
467                writeln!(&mut self.output).unwrap();
468
469                // Move generated function to quotation_functions
470                self.quotation_functions.push_str(&self.output);
471
472                // Restore original output
473                self.output = saved_output;
474
475                return Ok(function_name);
476            }
477            _ => {
478                return Err(format!(
479                    "CodeGen: expected Quotation or Closure type, got {:?}",
480                    quot_type
481                ));
482            }
483        }
484
485        writeln!(&mut self.output, "entry:").unwrap();
486
487        let mut stack_var = "stack".to_string();
488
489        // Generate code for each statement in the quotation body
490        for statement in body {
491            stack_var = self.codegen_statement(&stack_var, statement)?;
492        }
493
494        writeln!(&mut self.output, "  ret ptr %{}", stack_var).unwrap();
495        writeln!(&mut self.output, "}}").unwrap();
496        writeln!(&mut self.output).unwrap();
497
498        // Move generated function to quotation_functions
499        self.quotation_functions.push_str(&self.output);
500
501        // Restore original output
502        self.output = saved_output;
503
504        Ok(function_name)
505    }
506
507    /// Generate code for a single statement
508    fn codegen_statement(
509        &mut self,
510        stack_var: &str,
511        statement: &Statement,
512    ) -> Result<String, String> {
513        match statement {
514            Statement::IntLiteral(n) => {
515                let result_var = self.fresh_temp();
516                writeln!(
517                    &mut self.output,
518                    "  %{} = call ptr @patch_seq_push_int(ptr %{}, i64 {})",
519                    result_var, stack_var, n
520                )
521                .unwrap();
522                Ok(result_var)
523            }
524
525            Statement::FloatLiteral(f) => {
526                let result_var = self.fresh_temp();
527                // Format float to ensure LLVM recognizes it as a double literal
528                // Use hex representation for precise and always-valid format
529                let float_str = if f.is_nan() {
530                    "0x7FF8000000000000".to_string() // NaN
531                } else if f.is_infinite() {
532                    if f.is_sign_positive() {
533                        "0x7FF0000000000000".to_string() // +Infinity
534                    } else {
535                        "0xFFF0000000000000".to_string() // -Infinity
536                    }
537                } else {
538                    // Use LLVM's hexadecimal floating point format for exact representation
539                    let bits = f.to_bits();
540                    format!("0x{:016X}", bits)
541                };
542                writeln!(
543                    &mut self.output,
544                    "  %{} = call ptr @patch_seq_push_float(ptr %{}, double {})",
545                    result_var, stack_var, float_str
546                )
547                .unwrap();
548                Ok(result_var)
549            }
550
551            Statement::BoolLiteral(b) => {
552                let result_var = self.fresh_temp();
553                let val = if *b { 1 } else { 0 };
554                writeln!(
555                    &mut self.output,
556                    "  %{} = call ptr @patch_seq_push_int(ptr %{}, i64 {})",
557                    result_var, stack_var, val
558                )
559                .unwrap();
560                Ok(result_var)
561            }
562
563            Statement::StringLiteral(s) => {
564                let global = self.get_string_global(s);
565                let ptr_temp = self.fresh_temp();
566                writeln!(
567                    &mut self.output,
568                    "  %{} = getelementptr inbounds [{} x i8], ptr {}, i32 0, i32 0",
569                    ptr_temp,
570                    s.len() + 1,
571                    global
572                )
573                .unwrap();
574                let result_var = self.fresh_temp();
575                writeln!(
576                    &mut self.output,
577                    "  %{} = call ptr @patch_seq_push_string(ptr %{}, ptr %{})",
578                    result_var, stack_var, ptr_temp
579                )
580                .unwrap();
581                Ok(result_var)
582            }
583
584            Statement::WordCall(name) => {
585                let result_var = self.fresh_temp();
586                // Map source-level word names to runtime function names
587                // Most built-ins use their source name directly, but some need mapping:
588                // - Symbolic operators (=, <, >) map to names (eq, lt, gt)
589                // - 'drop' maps to 'drop_op' (drop is LLVM reserved)
590                // - User words get 'seq_' prefix to avoid C symbol conflicts
591                let function_name = match name.as_str() {
592                    // I/O operations
593                    "write_line" | "read_line" => format!("patch_seq_{}", name),
594                    "int->string" => "patch_seq_int_to_string".to_string(),
595                    // Command-line argument operations
596                    "arg-count" => "patch_seq_arg_count".to_string(),
597                    "arg" => "patch_seq_arg_at".to_string(),
598                    // Arithmetic operations
599                    "add" | "subtract" | "multiply" | "divide" => format!("patch_seq_{}", name),
600                    // Comparison operations (symbolic → named)
601                    // These return Int (0 or 1) for Forth-style boolean semantics
602                    "=" => "patch_seq_eq".to_string(),
603                    "<" => "patch_seq_lt".to_string(),
604                    ">" => "patch_seq_gt".to_string(),
605                    "<=" => "patch_seq_lte".to_string(),
606                    ">=" => "patch_seq_gte".to_string(),
607                    "<>" => "patch_seq_neq".to_string(),
608                    // Boolean operations
609                    "and" | "or" | "not" => format!("patch_seq_{}", name),
610                    // Stack operations (simple - no parameters)
611                    "dup" | "swap" | "over" | "rot" | "nip" | "tuck" => {
612                        format!("patch_seq_{}", name)
613                    }
614                    "drop" => "patch_seq_drop_op".to_string(), // 'drop' is reserved in LLVM IR
615                    "pick" => "patch_seq_pick_op".to_string(), // pick takes Int parameter from stack
616                    "roll" => "patch_seq_roll".to_string(),    // roll takes Int depth from stack
617                    // Concurrency operations (hyphen → underscore for C compatibility)
618                    "make-channel" => "patch_seq_make_channel".to_string(),
619                    "send" => "patch_seq_chan_send".to_string(),
620                    "receive" => "patch_seq_chan_receive".to_string(),
621                    "close-channel" => "patch_seq_close_channel".to_string(),
622                    "yield" => "patch_seq_yield_strand".to_string(),
623                    // Quotation operations
624                    "call" => "patch_seq_call".to_string(),
625                    "times" => "patch_seq_times".to_string(),
626                    "while" => "patch_seq_while_loop".to_string(),
627                    "until" => "patch_seq_until_loop".to_string(),
628                    "forever" => "patch_seq_forever".to_string(),
629                    "spawn" => "patch_seq_spawn".to_string(),
630                    "cond" => "patch_seq_cond".to_string(),
631                    // TCP operations (hyphen → underscore for C compatibility)
632                    "tcp-listen" => "patch_seq_tcp_listen".to_string(),
633                    "tcp-accept" => "patch_seq_tcp_accept".to_string(),
634                    "tcp-read" => "patch_seq_tcp_read".to_string(),
635                    "tcp-write" => "patch_seq_tcp_write".to_string(),
636                    "tcp-close" => "patch_seq_tcp_close".to_string(),
637                    // String operations (hyphen → underscore for C compatibility)
638                    "string-concat" => "patch_seq_string_concat".to_string(),
639                    "string-length" => "patch_seq_string_length".to_string(),
640                    "string-byte-length" => "patch_seq_string_byte_length".to_string(),
641                    "string-char-at" => "patch_seq_string_char_at".to_string(),
642                    "string-substring" => "patch_seq_string_substring".to_string(),
643                    "char->string" => "patch_seq_char_to_string".to_string(),
644                    "string-find" => "patch_seq_string_find".to_string(),
645                    "string-split" => "patch_seq_string_split".to_string(),
646                    "string-contains" => "patch_seq_string_contains".to_string(),
647                    "string-starts-with" => "patch_seq_string_starts_with".to_string(),
648                    "string-empty" => "patch_seq_string_empty".to_string(),
649                    "string-trim" => "patch_seq_string_trim".to_string(),
650                    "string-to-upper" => "patch_seq_string_to_upper".to_string(),
651                    "string-to-lower" => "patch_seq_string_to_lower".to_string(),
652                    "string-equal" => "patch_seq_string_equal".to_string(),
653                    // File operations (hyphen → underscore for C compatibility)
654                    "file-slurp" => "patch_seq_file_slurp".to_string(),
655                    "file-exists?" => "patch_seq_file_exists".to_string(),
656                    // Variant operations (hyphen → underscore for C compatibility)
657                    "variant-field-count" => "patch_seq_variant_field_count".to_string(),
658                    "variant-tag" => "patch_seq_variant_tag".to_string(),
659                    "variant-field-at" => "patch_seq_variant_field_at".to_string(),
660                    "variant-append" => "patch_seq_variant_append".to_string(),
661                    "variant-last" => "patch_seq_variant_last".to_string(),
662                    "variant-init" => "patch_seq_variant_init".to_string(),
663                    "make-variant" => "patch_seq_make_variant".to_string(),
664                    // Float arithmetic operations (dot notation → underscore)
665                    "f.add" => "patch_seq_f_add".to_string(),
666                    "f.subtract" => "patch_seq_f_subtract".to_string(),
667                    "f.multiply" => "patch_seq_f_multiply".to_string(),
668                    "f.divide" => "patch_seq_f_divide".to_string(),
669                    // Float comparison operations (symbolic → named)
670                    "f.=" => "patch_seq_f_eq".to_string(),
671                    "f.<" => "patch_seq_f_lt".to_string(),
672                    "f.>" => "patch_seq_f_gt".to_string(),
673                    "f.<=" => "patch_seq_f_lte".to_string(),
674                    "f.>=" => "patch_seq_f_gte".to_string(),
675                    "f.<>" => "patch_seq_f_neq".to_string(),
676                    // Float type conversions
677                    "int->float" => "patch_seq_int_to_float".to_string(),
678                    "float->int" => "patch_seq_float_to_int".to_string(),
679                    "float->string" => "patch_seq_float_to_string".to_string(),
680                    "string->float" => "patch_seq_string_to_float".to_string(),
681                    // User-defined word (prefix to avoid C symbol conflicts)
682                    // Also mangle special characters for LLVM IR compatibility
683                    _ => format!("seq_{}", mangle_name(name)),
684                };
685                writeln!(
686                    &mut self.output,
687                    "  %{} = call ptr @{}(ptr %{})",
688                    result_var, function_name, stack_var
689                )
690                .unwrap();
691                Ok(result_var)
692            }
693
694            Statement::If {
695                then_branch,
696                else_branch,
697            } => {
698                // NOTE: Stack effect validation is performed by the type checker (see typechecker.rs).
699                // Both branches must produce the same stack depth, which is validated before
700                // we reach codegen. This ensures the phi node merges compatible stack pointers.
701
702                // Peek the condition value first (doesn't modify stack)
703                // Then pop separately to properly free the stack node
704                // (prevents memory leak while allowing us to use the value for branching)
705                let cond_temp = self.fresh_temp();
706                writeln!(
707                    &mut self.output,
708                    "  %{} = call i64 @patch_seq_peek_int_value(ptr %{})",
709                    cond_temp, stack_var
710                )
711                .unwrap();
712
713                // Pop the condition from the stack (frees the node)
714                let popped_stack = self.fresh_temp();
715                writeln!(
716                    &mut self.output,
717                    "  %{} = call ptr @patch_seq_pop_stack(ptr %{})",
718                    popped_stack, stack_var
719                )
720                .unwrap();
721
722                // Compare with 0 (0 = zero, non-zero = non-zero)
723                let cmp_temp = self.fresh_temp();
724                writeln!(
725                    &mut self.output,
726                    "  %{} = icmp ne i64 %{}, 0",
727                    cmp_temp, cond_temp
728                )
729                .unwrap();
730
731                // Generate unique block labels
732                let then_block = self.fresh_block("if_then");
733                let else_block = self.fresh_block("if_else");
734                let merge_block = self.fresh_block("if_merge");
735
736                // Conditional branch
737                writeln!(
738                    &mut self.output,
739                    "  br i1 %{}, label %{}, label %{}",
740                    cmp_temp, then_block, else_block
741                )
742                .unwrap();
743
744                // Then branch (executed when condition is non-zero)
745                writeln!(&mut self.output, "{}:", then_block).unwrap();
746                let mut then_stack = popped_stack.clone();
747                for stmt in then_branch {
748                    then_stack = self.codegen_statement(&then_stack, stmt)?;
749                }
750                // Create landing block for phi node predecessor tracking.
751                // This is CRITICAL for nested conditionals: if then_branch contains
752                // another if statement, the actual control flow predecessor is the
753                // inner if's merge block, not then_block. The landing block ensures
754                // the phi node always references the correct immediate predecessor.
755                let then_predecessor = self.fresh_block("if_then_end");
756                writeln!(&mut self.output, "  br label %{}", then_predecessor).unwrap();
757                writeln!(&mut self.output, "{}:", then_predecessor).unwrap();
758                writeln!(&mut self.output, "  br label %{}", merge_block).unwrap();
759
760                // Else branch (executed when condition is zero)
761                writeln!(&mut self.output, "{}:", else_block).unwrap();
762                let else_stack = if let Some(eb) = else_branch {
763                    let mut es = popped_stack.clone();
764                    for stmt in eb {
765                        es = self.codegen_statement(&es, stmt)?;
766                    }
767                    es
768                } else {
769                    // No else clause - stack unchanged
770                    popped_stack.clone()
771                };
772                // Landing block for else branch (same reasoning as then_branch)
773                let else_predecessor = self.fresh_block("if_else_end");
774                writeln!(&mut self.output, "  br label %{}", else_predecessor).unwrap();
775                writeln!(&mut self.output, "{}:", else_predecessor).unwrap();
776                writeln!(&mut self.output, "  br label %{}", merge_block).unwrap();
777
778                // Merge block - phi node to merge stack pointers from both paths
779                writeln!(&mut self.output, "{}:", merge_block).unwrap();
780                let result_var = self.fresh_temp();
781                writeln!(
782                    &mut self.output,
783                    "  %{} = phi ptr [ %{}, %{} ], [ %{}, %{} ]",
784                    result_var, then_stack, then_predecessor, else_stack, else_predecessor
785                )
786                .unwrap();
787
788                Ok(result_var)
789            }
790
791            Statement::Quotation { id, body } => {
792                // Get the inferred type for this quotation using its ID
793                let quot_type = self.get_quotation_type(*id)?.clone();
794
795                // Generate a function for the quotation body
796                let fn_name = self.codegen_quotation(body, &quot_type)?;
797
798                // Get function pointer as usize
799                let fn_ptr_var = self.fresh_temp();
800                writeln!(
801                    &mut self.output,
802                    "  %{} = ptrtoint ptr @{} to i64",
803                    fn_ptr_var, fn_name
804                )
805                .unwrap();
806
807                // Generate code based on quotation type
808                match quot_type {
809                    Type::Quotation(_effect) => {
810                        // Stateless quotation - use push_quotation
811                        let result_var = self.fresh_temp();
812                        writeln!(
813                            &mut self.output,
814                            "  %{} = call ptr @patch_seq_push_quotation(ptr %{}, i64 %{})",
815                            result_var, stack_var, fn_ptr_var
816                        )
817                        .unwrap();
818                        Ok(result_var)
819                    }
820                    Type::Closure {
821                        effect: _effect,
822                        captures,
823                    } => {
824                        // Closure with captures - use push_closure
825                        let capture_count = captures.len() as i32;
826                        let result_var = self.fresh_temp();
827                        writeln!(
828                            &mut self.output,
829                            "  %{} = call ptr @patch_seq_push_closure(ptr %{}, i64 %{}, i32 {})",
830                            result_var, stack_var, fn_ptr_var, capture_count
831                        )
832                        .unwrap();
833                        Ok(result_var)
834                    }
835                    _ => Err(format!(
836                        "CodeGen: expected Quotation or Closure type, got {:?}",
837                        quot_type
838                    )),
839                }
840            }
841        }
842    }
843
844    /// Generate main function that calls user's main word
845    fn codegen_main(&mut self) -> Result<(), String> {
846        writeln!(
847            &mut self.output,
848            "define i32 @main(i32 %argc, ptr %argv) {{"
849        )
850        .unwrap();
851        writeln!(&mut self.output, "entry:").unwrap();
852
853        // Initialize command-line arguments (before scheduler so args are available)
854        writeln!(
855            &mut self.output,
856            "  call void @patch_seq_args_init(i32 %argc, ptr %argv)"
857        )
858        .unwrap();
859
860        // Initialize scheduler
861        writeln!(&mut self.output, "  call void @patch_seq_scheduler_init()").unwrap();
862
863        // Spawn user's main function as the first strand
864        // This ensures all code runs in coroutine context for non-blocking I/O
865        writeln!(
866            &mut self.output,
867            "  %0 = call i64 @patch_seq_strand_spawn(ptr @seq_main, ptr null)"
868        )
869        .unwrap();
870
871        // Wait for all spawned strands to complete (including main)
872        writeln!(
873            &mut self.output,
874            "  %1 = call ptr @patch_seq_scheduler_run()"
875        )
876        .unwrap();
877
878        writeln!(&mut self.output, "  ret i32 0").unwrap();
879        writeln!(&mut self.output, "}}").unwrap();
880
881        Ok(())
882    }
883}
884
885impl Default for CodeGen {
886    fn default() -> Self {
887        Self::new()
888    }
889}
890
891/// Get the target triple for the current platform
892fn get_target_triple() -> &'static str {
893    #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
894    {
895        "arm64-apple-macosx14.0.0"
896    }
897
898    #[cfg(all(target_os = "macos", target_arch = "x86_64"))]
899    {
900        "x86_64-apple-darwin"
901    }
902
903    #[cfg(all(target_os = "linux", target_arch = "x86_64"))]
904    {
905        "x86_64-unknown-linux-gnu"
906    }
907
908    #[cfg(all(target_os = "linux", target_arch = "aarch64"))]
909    {
910        "aarch64-unknown-linux-gnu"
911    }
912
913    #[cfg(not(any(
914        all(target_os = "macos", target_arch = "aarch64"),
915        all(target_os = "macos", target_arch = "x86_64"),
916        all(target_os = "linux", target_arch = "x86_64"),
917        all(target_os = "linux", target_arch = "aarch64")
918    )))]
919    {
920        "unknown"
921    }
922}
923
924#[cfg(test)]
925mod tests {
926    use super::*;
927    use crate::ast::{Program, Statement, WordDef};
928
929    #[test]
930    fn test_codegen_hello_world() {
931        let mut codegen = CodeGen::new();
932
933        let program = Program {
934            includes: vec![],
935            words: vec![WordDef {
936                name: "main".to_string(),
937                effect: None,
938                body: vec![
939                    Statement::StringLiteral("Hello, World!".to_string()),
940                    Statement::WordCall("write_line".to_string()),
941                ],
942                source: None,
943            }],
944        };
945
946        let ir = codegen.codegen_program(&program, HashMap::new()).unwrap();
947
948        assert!(ir.contains("define i32 @main(i32 %argc, ptr %argv)"));
949        assert!(ir.contains("define ptr @seq_main(ptr %stack)"));
950        assert!(ir.contains("call ptr @patch_seq_push_string"));
951        assert!(ir.contains("call ptr @patch_seq_write_line"));
952        assert!(ir.contains("\"Hello, World!\\00\""));
953    }
954
955    #[test]
956    fn test_codegen_arithmetic() {
957        let mut codegen = CodeGen::new();
958
959        let program = Program {
960            includes: vec![],
961            words: vec![WordDef {
962                name: "main".to_string(),
963                effect: None,
964                body: vec![
965                    Statement::IntLiteral(2),
966                    Statement::IntLiteral(3),
967                    Statement::WordCall("add".to_string()),
968                ],
969                source: None,
970            }],
971        };
972
973        let ir = codegen.codegen_program(&program, HashMap::new()).unwrap();
974
975        assert!(ir.contains("call ptr @patch_seq_push_int(ptr %stack, i64 2)"));
976        assert!(ir.contains("call ptr @patch_seq_push_int"));
977        assert!(ir.contains("call ptr @patch_seq_add"));
978    }
979
980    #[test]
981    fn test_escape_llvm_string() {
982        assert_eq!(CodeGen::escape_llvm_string("hello"), "hello");
983        assert_eq!(CodeGen::escape_llvm_string("a\nb"), r"a\0Ab");
984        assert_eq!(CodeGen::escape_llvm_string("a\tb"), r"a\09b");
985        assert_eq!(CodeGen::escape_llvm_string("a\"b"), r"a\22b");
986    }
987}