Skip to main content

gaia_assembler/backends/x86/
mod.rs

1//! Native x86_64 backend compiler
2
3use crate::{
4    config::GaiaConfig,
5    instruction::{CoreInstruction, DomainInstruction, GaiaInstruction, ManagedInstruction},
6    program::{GaiaConstant, GaiaFunction, GaiaModule},
7    types::GaiaType,
8    Backend, GeneratedFiles,
9};
10use gaia_types::{
11    helpers::{AbiCompatible, ApiCompatible, Architecture, CompilationTarget},
12    GaiaError, Result,
13};
14use std::collections::HashMap;
15
16/// Native x86_64 Backend implementation
17#[derive(Default)]
18pub struct X86Backend {}
19
20impl Backend for X86Backend {
21    fn name(&self) -> &'static str {
22        "Native x86_64"
23    }
24
25    fn primary_target(&self) -> CompilationTarget {
26        CompilationTarget { build: Architecture::X86_64, host: AbiCompatible::PE, target: ApiCompatible::MicrosoftVisualC }
27    }
28
29    fn match_score(&self, target: &CompilationTarget) -> f32 {
30        if target.build == Architecture::X86_64 && target.host == AbiCompatible::PE {
31            if target.target == ApiCompatible::MicrosoftVisualC {
32                return 100.0; // Perfect match for native x86_64 on Windows
33            }
34            return 80.0;
35        }
36        0.0
37    }
38
39    fn generate(&self, program: &GaiaModule, _config: &GaiaConfig) -> Result<GeneratedFiles> {
40        let mut code = Vec::new();
41        let mut external_call_positions: HashMap<String, Vec<usize>> = HashMap::new();
42        let mut string_patches = Vec::new(); // (position of imm32, string offset in rdata)
43
44        // 0. Pre-pass: Collect strings
45        let mut string_table = HashMap::new();
46        let mut rdata_content = Vec::new();
47        let mut next_string_offset = 0;
48
49        for function in &program.functions {
50            for block in &function.blocks {
51                for inst in &block.instructions {
52                    match inst {
53                        GaiaInstruction::Core(CoreInstruction::PushConstant(GaiaConstant::String(s)))
54                        | GaiaInstruction::Core(CoreInstruction::New(s))
55                        | GaiaInstruction::Core(CoreInstruction::StoreField(_, s))
56                        | GaiaInstruction::Core(CoreInstruction::LoadField(_, s))
57                        | GaiaInstruction::Managed(ManagedInstruction::CallMethod { method: s, .. }) => {
58                            if !string_table.contains_key(s) {
59                                string_table.insert(s.clone(), next_string_offset);
60                                rdata_content.extend_from_slice(s.as_bytes());
61                                rdata_content.push(0); // null terminator
62                                next_string_offset += s.len() + 1;
63                            }
64                        }
65                        _ => {}
66                    }
67                }
68            }
69        }
70
71        // 1. Generate entry point (stub)
72        // sub rsp, 32 (shadow space for Win64 calls)
73        code.extend_from_slice(&[0x48, 0x83, 0xEC, 0x20]);
74
75        // Initialize runtime
76        let symbol = "nyar_init_runtime".to_string();
77        let pos = code.len();
78        code.extend_from_slice(&[0xFF, 0x15, 0x00, 0x00, 0x00, 0x00]); // call [rip+offset]
79        external_call_positions.entry(symbol).or_default().push(pos);
80
81        // call <main>
82        let call_main_pos = code.len();
83        code.extend_from_slice(&[0xE8, 0x00, 0x00, 0x00, 0x00]);
84
85        // mov rcx, rax (return value as exit code)
86        code.extend_from_slice(&[0x48, 0x89, 0xC1]);
87
88        // call [rip + <iat_offset>] (ExitProcess)
89        // Note: ExitProcess is usually required for EXEs, but we'll try to find it in imports
90        let call_exit_pos = code.len();
91        code.extend_from_slice(&[0xFF, 0x15, 0x00, 0x00, 0x00, 0x00]);
92
93        // 2. Generate functions
94        let mut function_offsets = HashMap::new();
95        let mut internal_functions = std::collections::HashSet::new();
96        for function in &program.functions {
97            internal_functions.insert(function.name.clone());
98        }
99
100        let mut internal_call_positions = HashMap::new();
101
102        for function in &program.functions {
103            function_offsets.insert(function.name.clone(), code.len());
104            self.generate_function(
105                function,
106                &internal_functions,
107                &mut code,
108                &mut external_call_positions,
109                &mut internal_call_positions,
110                &string_table,
111                &mut string_patches,
112            )?;
113        }
114
115        // 3. Patch main call
116        let main_name = if function_offsets.contains_key("main") {
117            "main"
118        }
119        else {
120            program.functions.first().map(|f| f.name.as_str()).unwrap_or("")
121        };
122
123        if !main_name.is_empty() {
124            let main_offset = function_offsets[main_name];
125            let relative_offset = (main_offset as i32) - (call_main_pos as i32 + 5);
126            code[call_main_pos + 1..call_main_pos + 5].copy_from_slice(&relative_offset.to_le_bytes());
127        }
128
129        // 4. Patch internal calls
130        for (name, positions) in internal_call_positions {
131            if let Some(&func_offset) = function_offsets.get(&name) {
132                for pos in positions {
133                    let relative_offset = (func_offset as i32) - (pos as i32 + 5);
134                    code[pos + 1..pos + 5].copy_from_slice(&relative_offset.to_le_bytes());
135                }
136            }
137        }
138
139        let mut files = HashMap::new();
140        files.insert("main.bin".to_string(), code.clone());
141
142        let pe_bytes =
143            self.create_pe_exe(&code, program, call_exit_pos, &external_call_positions, &rdata_content, &string_patches)?;
144        files.insert("main.exe".to_string(), pe_bytes);
145
146        Ok(GeneratedFiles { files, diagnostics: vec![] })
147    }
148}
149
150impl X86Backend {
151    fn generate_function(
152        &self,
153        function: &GaiaFunction,
154        internal_functions: &std::collections::HashSet<String>,
155        code: &mut Vec<u8>,
156        external_call_positions: &mut HashMap<String, Vec<usize>>,
157        internal_call_positions: &mut HashMap<String, Vec<usize>>,
158        string_table: &HashMap<String, usize>,
159        string_patches: &mut Vec<(usize, usize)>,
160    ) -> Result<()> {
161        let mut labels = HashMap::new();
162        let mut jump_patches = Vec::new();
163        let _function_start = code.len();
164
165        // Function prologue
166        code.push(0x55); // push rbp
167        code.extend_from_slice(&[0x48, 0x89, 0xE5]); // mov rbp, rsp
168
169        // Calculate stack space for locals and shadow space (32 bytes for Win64)
170        // Ensure 16-byte alignment
171        let locals_count = function
172            .blocks
173            .iter()
174            .flat_map(|b| &b.instructions)
175            .filter(|i| matches!(i, GaiaInstruction::Core(CoreInstruction::Alloca(_, _))))
176            .count();
177        let has_managed_calls = function
178            .blocks
179            .iter()
180            .flat_map(|b| &b.instructions)
181            .any(|i| matches!(i, GaiaInstruction::Managed(ManagedInstruction::CallMethod { .. })));
182
183        let locals_size = locals_count * 8;
184        let shadow_space = if has_managed_calls { 64 } else { 32 };
185        let total_stack_size = (locals_size + shadow_space + 15) & !15;
186
187        if total_stack_size > 0 {
188            if total_stack_size <= 127 {
189                code.extend_from_slice(&[0x48, 0x83, 0xEC]); // sub rsp, imm8
190                code.push(total_stack_size as u8);
191            }
192            else {
193                code.extend_from_slice(&[0x48, 0x81, 0xEC]); // sub rsp, imm32
194                code.extend_from_slice(&(total_stack_size as i32).to_le_bytes());
195            }
196        }
197
198        // Save arguments to shadow space for later use (e.g. by ManagedInstructions)
199        // [rbp + 16] = rcx, [rbp + 24] = rdx, [rbp + 32] = r8, [rbp + 40] = r9
200        code.extend_from_slice(&[0x48, 0x89, 0x4D, 0x10]);
201        code.extend_from_slice(&[0x48, 0x89, 0x55, 0x18]);
202        code.extend_from_slice(&[0x4C, 0x89, 0x45, 0x20]);
203        code.extend_from_slice(&[0x4C, 0x89, 0x4D, 0x28]);
204
205        for block in &function.blocks {
206            labels.insert(block.label.clone(), code.len());
207
208            for inst in &block.instructions {
209                match inst {
210                    GaiaInstruction::Core(core_inst) => match core_inst {
211                        CoreInstruction::PushConstant(constant) => {
212                            match constant {
213                                GaiaConstant::I8(v) => {
214                                    code.push(0x6A); // push (imm8)
215                                    code.push(*v as u8);
216                                }
217                                GaiaConstant::I16(v) => {
218                                    code.push(0x68); // push (imm32, sign-extended)
219                                    code.extend_from_slice(&(*v as i32).to_le_bytes());
220                                }
221                                GaiaConstant::I32(v) => {
222                                    code.push(0x68); // push (imm32)
223                                    code.extend_from_slice(&v.to_le_bytes());
224                                }
225                                GaiaConstant::I64(v) => {
226                                    // mov rax, imm64; push rax
227                                    code.extend_from_slice(&[0x48, 0xB8]);
228                                    code.extend_from_slice(&v.to_le_bytes());
229                                    code.push(0x50); // push rax
230                                }
231                                GaiaConstant::String(s) => {
232                                    // lea rax, [rip + offset]
233                                    code.extend_from_slice(&[0x48, 0x8D, 0x05]);
234                                    let str_offset = *string_table.get(s).unwrap();
235                                    string_patches.push((code.len(), str_offset));
236                                    code.extend_from_slice(&[0, 0, 0, 0]);
237                                    code.push(0x50); // push rax
238                                }
239                                _ => return Err(GaiaError::custom_error("Unsupported constant type for x86 backend")),
240                            }
241                        }
242                        CoreInstruction::Pop => {
243                            code.push(0x58); // pop rax
244                        }
245                        CoreInstruction::Dup => {
246                            // mov rax, [rsp]; push rax
247                            code.extend_from_slice(&[0x48, 0x8B, 0x04, 0x24]);
248                            code.push(0x50);
249                        }
250                        CoreInstruction::Add(_) => {
251                            // pop rbx; pop rax; add rax, rbx; push rax
252                            code.push(0x5B); // pop rbx
253                            code.push(0x58); // pop rax
254                            code.extend_from_slice(&[0x48, 0x01, 0xD8]); // add rax, rbx
255                            code.push(0x50); // push rax
256                        }
257                        CoreInstruction::Sub(_) => {
258                            // pop rbx; pop rax; sub rax, rbx; push rax
259                            code.push(0x5B); // pop rbx
260                            code.push(0x58); // pop rax
261                            code.extend_from_slice(&[0x48, 0x29, 0xD8]); // sub rax, rbx
262                            code.push(0x50); // push rax
263                        }
264                        CoreInstruction::Mul(_) => {
265                            // pop rbx; pop rax; imul rax, rbx; push rax
266                            code.push(0x5B); // pop rbx
267                            code.push(0x58); // pop rax
268                            code.extend_from_slice(&[0x48, 0x0F, 0xAF, 0xC3]); // imul rax, rbx
269                            code.push(0x50); // push rax
270                        }
271                        CoreInstruction::Div(_) => {
272                            // pop rbx; pop rax; cqo; idiv rbx; push rax
273                            code.push(0x5B); // pop rbx
274                            code.push(0x58); // pop rax
275                            code.extend_from_slice(&[0x48, 0x99]); // cqo
276                            code.extend_from_slice(&[0x48, 0xF7, 0xFB]); // idiv rbx
277                            code.push(0x50); // push rax
278                        }
279                        CoreInstruction::Rem(_) => {
280                            // pop rbx; pop rax; cqo; idiv rbx; push rdx
281                            code.push(0x5B); // pop rbx
282                            code.push(0x58); // pop rax
283                            code.extend_from_slice(&[0x48, 0x99]); // cqo
284                            code.extend_from_slice(&[0x48, 0xF7, 0xFB]); // idiv rbx
285                            code.push(0x52); // push rdx
286                        }
287                        CoreInstruction::Neg(_) => {
288                            // pop rax; neg rax; push rax
289                            code.push(0x58); // pop rax
290                            code.extend_from_slice(&[0x48, 0xF7, 0xD8]); // neg rax
291                            code.push(0x50); // push rax
292                        }
293                        CoreInstruction::Not(ty) => {
294                            // pop rax; not rax; push rax
295                            // If boolean, xor rax, 1
296                            code.push(0x58); // pop rax
297                            match ty {
298                                GaiaType::Bool => {
299                                    code.extend_from_slice(&[0x48, 0x83, 0xF0, 0x01]);
300                                    // xor rax, 1
301                                }
302                                _ => {
303                                    code.extend_from_slice(&[0x48, 0xF7, 0xD0]);
304                                    // not rax
305                                }
306                            }
307                            code.push(0x50); // push rax
308                        }
309                        CoreInstruction::Shl(_) => {
310                            // pop rcx; pop rax; shl rax, cl; push rax
311                            code.push(0x59); // pop rcx
312                            code.push(0x58); // pop rax
313                            code.extend_from_slice(&[0x48, 0xD3, 0xE0]); // shl rax, cl
314                            code.push(0x50); // push rax
315                        }
316                        CoreInstruction::Shr(_) => {
317                            // pop rcx; pop rax; shr rax, cl; push rax
318                            code.push(0x59); // pop rcx
319                            code.push(0x58); // pop rax
320                            code.extend_from_slice(&[0x48, 0xD3, 0xE8]); // shr rax, cl
321                            code.push(0x50); // push rax
322                        }
323                        CoreInstruction::And(_) => {
324                            // pop rbx; pop rax; and rax, rbx; push rax
325                            code.push(0x5B); // pop rbx
326                            code.push(0x58); // pop rax
327                            code.extend_from_slice(&[0x48, 0x21, 0xD8]); // and rax, rbx
328                            code.push(0x50); // push rax
329                        }
330                        CoreInstruction::Or(_) => {
331                            // pop rbx; pop rax; or rax, rbx; push rax
332                            code.push(0x5B); // pop rbx
333                            code.push(0x58); // pop rax
334                            code.extend_from_slice(&[0x48, 0x09, 0xD8]); // or rax, rbx
335                            code.push(0x50); // push rax
336                        }
337                        CoreInstruction::Xor(_) => {
338                            // pop rbx; pop rax; xor rax, rbx; push rax
339                            code.push(0x5B); // pop rbx
340                            code.push(0x58); // pop rax
341                            code.extend_from_slice(&[0x48, 0x31, 0xD8]); // xor rax, rbx
342                            code.push(0x50); // push rax
343                        }
344                        CoreInstruction::LoadArg(idx, _ty) => {
345                            match idx {
346                                0 => code.push(0x51),                       // push rcx
347                                1 => code.push(0x52),                       // push rdx
348                                2 => code.extend_from_slice(&[0x41, 0x50]), // push r8
349                                3 => code.extend_from_slice(&[0x41, 0x51]), // push r9
350                                _ => {
351                                    // Load from stack [rbp + 16 + 32 + (idx-4)*8]
352                                    let offset = 48 + (idx - 4) * 8;
353                                    code.extend_from_slice(&[0x48, 0x8B, 0x45]);
354                                    code.push(offset as u8);
355                                    code.push(0x50); // push rax
356                                }
357                            }
358                        }
359                        CoreInstruction::StoreLocal(idx, _ty) => {
360                            // pop rax
361                            code.push(0x58);
362                            // mov [rbp - (idx+1)*8], rax
363                            let offset = (idx + 1) * 8;
364                            code.extend_from_slice(&[0x48, 0x89, 0x45]);
365                            code.push((-(offset as i32)) as u8);
366                        }
367                        CoreInstruction::LoadLocal(idx, _ty) => {
368                            // mov rax, [rbp - (idx+1)*8]
369                            let offset = (idx + 1) * 8;
370                            code.extend_from_slice(&[0x48, 0x8B, 0x45]);
371                            code.push((-(offset as i32)) as u8);
372                            // push rax
373                            code.push(0x50);
374                        }
375                        CoreInstruction::Alloca(_, _) => {
376                            // Handled in prologue, no-op here
377                        }
378                        CoreInstruction::Label(name) => {
379                            labels.insert(name.clone(), code.len());
380                        }
381                        CoreInstruction::Br(target) => {
382                            code.push(0xE9); // jmp rel32
383                            jump_patches.push((code.len(), target.clone()));
384                            code.extend_from_slice(&[0, 0, 0, 0]);
385                        }
386                        CoreInstruction::BrTrue(target) => {
387                            // pop rax; test rax, rax; jnz rel32
388                            code.push(0x58); // pop rax
389                            code.extend_from_slice(&[0x48, 0x85, 0xC0]); // test rax, rax
390                            code.extend_from_slice(&[0x0F, 0x85]); // jnz rel32
391                            jump_patches.push((code.len(), target.clone()));
392                            code.extend_from_slice(&[0, 0, 0, 0]);
393                        }
394                        CoreInstruction::BrFalse(target) => {
395                            // pop rax; test rax, rax; jz rel32
396                            code.push(0x58); // pop rax
397                            code.extend_from_slice(&[0x48, 0x85, 0xC0]); // test rax, rax
398                            code.extend_from_slice(&[0x0F, 0x84]); // jz rel32
399                            jump_patches.push((code.len(), target.clone()));
400                            code.extend_from_slice(&[0, 0, 0, 0]);
401                        }
402                        CoreInstruction::Cmp(cond, _) => {
403                            use crate::instruction::CmpCondition;
404                            // pop rbx; pop rax; cmp rax, rbx; set<cond> al; movzx rax, al; push rax
405                            code.push(0x5B); // pop rbx
406                            code.push(0x58); // pop rax
407                            code.extend_from_slice(&[0x48, 0x39, 0xD8]); // cmp rax, rbx
408                            match cond {
409                                CmpCondition::Eq => code.extend_from_slice(&[0x0F, 0x94, 0xC0]), // sete al
410                                CmpCondition::Ne => code.extend_from_slice(&[0x0F, 0x95, 0xC0]), // setne al
411                                CmpCondition::Lt => code.extend_from_slice(&[0x0F, 0x9C, 0xC0]), // setl al
412                                CmpCondition::Le => code.extend_from_slice(&[0x0F, 0x9E, 0xC0]), // setle al
413                                CmpCondition::Gt => code.extend_from_slice(&[0x0F, 0x9F, 0xC0]), // setg al
414                                CmpCondition::Ge => code.extend_from_slice(&[0x0F, 0x9D, 0xC0]), // setge al
415                            }
416                            code.extend_from_slice(&[0x48, 0x0F, 0xB6, 0xC0]); // movzx rax, al
417                            code.push(0x50); // push rax
418                        }
419                        CoreInstruction::LoadArg(idx, _ty) => {
420                            match idx {
421                                0 => code.push(0x51),                       // push rcx
422                                1 => code.push(0x52),                       // push rdx
423                                2 => code.extend_from_slice(&[0x41, 0x50]), // push r8
424                                3 => code.extend_from_slice(&[0x41, 0x51]), // push r9
425                                _ => {
426                                    // Load from stack [rbp + 16 + 32 + (idx-4)*8]
427                                    let offset = 48 + (idx - 4) * 8;
428                                    code.extend_from_slice(&[0x48, 0x8B, 0x45]);
429                                    code.push(offset as u8);
430                                    code.push(0x50); // push rax
431                                }
432                            }
433                        }
434                        CoreInstruction::StoreLocal(idx, _ty) => {
435                            // pop rax
436                            code.push(0x58);
437                            // mov [rbp - (idx+1)*8], rax
438                            let offset = (idx + 1) * 8;
439                            code.extend_from_slice(&[0x48, 0x89, 0x45]);
440                            code.push((-(offset as i32)) as u8);
441                        }
442                        CoreInstruction::LoadLocal(idx, _ty) => {
443                            // mov rax, [rbp - (idx+1)*8]
444                            let offset = (idx + 1) * 8;
445                            code.extend_from_slice(&[0x48, 0x8B, 0x45]);
446                            code.push((-(offset as i32)) as u8);
447                            // push rax
448                            code.push(0x50);
449                        }
450                        CoreInstruction::Alloca(_, _) => {
451                            // Handled in prologue, no-op here
452                        }
453                        CoreInstruction::Call(name, argc) => {
454                            // Win64 ABI: RCX, RDX, R8, R9
455                            // Pop arguments in reverse order (last arg first)
456                            if *argc >= 4 {
457                                code.extend_from_slice(&[0x41, 0x59]); // pop r9
458                            }
459                            if *argc >= 3 {
460                                code.extend_from_slice(&[0x41, 0x58]); // pop r8
461                            }
462                            if *argc >= 2 {
463                                code.push(0x5A); // pop rdx
464                            }
465                            if *argc >= 1 {
466                                code.push(0x59); // pop rcx
467                            }
468
469                            if internal_functions.contains(name) {
470                                // call rel32 (internal)
471                                let pos = code.len();
472                                code.extend_from_slice(&[0xE8, 0x00, 0x00, 0x00, 0x00]);
473                                internal_call_positions.entry(name.clone()).or_default().push(pos);
474                            }
475                            else {
476                                // call [rip + offset] (external)
477                                let pos = code.len();
478                                code.extend_from_slice(&[0xFF, 0x15, 0x00, 0x00, 0x00, 0x00]);
479                                external_call_positions.entry(name.clone()).or_default().push(pos);
480                            }
481
482                            code.push(0x50); // push rax
483                        }
484                        CoreInstruction::CallIndirect(argc) => {
485                            // Stack: [..., func_ptr, arg1, arg2, ...]
486                            // Pops: args in reverse, then func_ptr
487
488                            if *argc >= 4 {
489                                code.extend_from_slice(&[0x41, 0x59]); // pop r9
490                            }
491                            if *argc >= 3 {
492                                code.extend_from_slice(&[0x41, 0x58]); // pop r8
493                            }
494                            if *argc >= 2 {
495                                code.push(0x5A); // pop rdx
496                            }
497                            if *argc >= 1 {
498                                code.push(0x59); // pop rcx
499                            }
500
501                            // Pop func_ptr into R10
502                            code.extend_from_slice(&[0x41, 0x5A]); // pop r10
503
504                            // Shadow space
505                            code.extend_from_slice(&[0x48, 0x83, 0xEC, 0x20]); // sub rsp, 32
506
507                            // Call R10
508                            code.extend_from_slice(&[0x41, 0xFF, 0xD2]); // call r10
509
510                            // Restore stack
511                            code.extend_from_slice(&[0x48, 0x83, 0xC4, 0x20]); // add rsp, 32
512
513                            code.push(0x50); // push rax
514                        }
515                        CoreInstruction::New(ty_name) => {
516                            // call nyar_new_object(type_name)
517                            // 1. Load string address into RCX (1st arg)
518                            code.extend_from_slice(&[0x48, 0x8D, 0x0D]); // lea rcx, [rip + offset]
519                            let str_offset = *string_table.get(ty_name).unwrap();
520                            let pos = code.len();
521                            string_patches.push((pos, str_offset));
522                            code.extend_from_slice(&[0, 0, 0, 0]);
523
524                            // 2. Call runtime
525                            let symbol = "nyar_new_object".to_string();
526                            code.extend_from_slice(&[0x48, 0x83, 0xEC, 0x20]); // sub rsp, 32
527                            let pos = code.len();
528                            code.extend_from_slice(&[0xFF, 0x15, 0x00, 0x00, 0x00, 0x00]); // call [rip+offset]
529                            external_call_positions.entry(symbol).or_default().push(pos);
530                            code.extend_from_slice(&[0x48, 0x83, 0xC4, 0x20]); // add rsp, 32
531
532                            code.push(0x50); // push rax (obj ptr)
533                        }
534                        CoreInstruction::StoreField(_ty, field) => {
535                            // Stack: [..., obj, value] (value is top)
536                            // Pops: value(r8), obj(rcx)
537                            // Arg2: field_name (rdx)
538
539                            code.extend_from_slice(&[0x41, 0x58]); // pop r8 (value)
540                            code.push(0x59); // pop rcx (obj)
541
542                            // Load field name string into RDX
543                            code.extend_from_slice(&[0x48, 0x8D, 0x15]); // lea rdx, [rip + offset]
544                            let str_offset = *string_table.get(field).unwrap();
545                            let pos = code.len();
546                            string_patches.push((pos, str_offset));
547                            code.extend_from_slice(&[0, 0, 0, 0]);
548
549                            let symbol = "nyar_object_set".to_string();
550                            code.extend_from_slice(&[0x48, 0x83, 0xEC, 0x20]);
551                            let pos = code.len();
552                            code.extend_from_slice(&[0xFF, 0x15, 0x00, 0x00, 0x00, 0x00]);
553                            external_call_positions.entry(symbol).or_default().push(pos);
554                            code.extend_from_slice(&[0x48, 0x83, 0xC4, 0x20]);
555
556                            // StoreField returns void, no push
557                        }
558                        CoreInstruction::LoadField(_ty, field) => {
559                            // Stack: [..., obj]
560                            // Pops: obj(rcx)
561                            // Arg2: key(rdx)
562
563                            code.push(0x59); // pop rcx
564
565                            // Load field name string into RDX
566                            code.extend_from_slice(&[0x48, 0x8D, 0x15]); // lea rdx, [rip + offset]
567                            let str_offset = *string_table.get(field).unwrap();
568                            let pos = code.len();
569                            string_patches.push((pos, str_offset));
570                            code.extend_from_slice(&[0, 0, 0, 0]);
571
572                            let symbol = "nyar_object_get".to_string();
573                            code.extend_from_slice(&[0x48, 0x83, 0xEC, 0x20]);
574                            let pos = code.len();
575                            code.extend_from_slice(&[0xFF, 0x15, 0x00, 0x00, 0x00, 0x00]);
576                            external_call_positions.entry(symbol).or_default().push(pos);
577                            code.extend_from_slice(&[0x48, 0x83, 0xC4, 0x20]);
578
579                            code.push(0x50); // push rax
580                        }
581                        CoreInstruction::NewArray(_, len_on_stack) => {
582                            if *len_on_stack {
583                                code.push(0x59); // pop rcx (length)
584                            }
585                            else {
586                                // Assume 0 length if not on stack? Or error?
587                                // For now, just zero out rcx
588                                code.extend_from_slice(&[0x48, 0x31, 0xC9]); // xor rcx, rcx
589                            }
590
591                            // call nyar_new_array(length)
592                            // We need to implement this symbol in runtime or link it
593                            let symbol = "nyar_new_array".to_string();
594
595                            // Prepare call
596                            code.extend_from_slice(&[0x48, 0x83, 0xEC, 0x20]); // sub rsp, 32
597
598                            let pos = code.len();
599                            code.extend_from_slice(&[0xFF, 0x15, 0x00, 0x00, 0x00, 0x00]); // call [rip+offset]
600                            external_call_positions.entry(symbol).or_default().push(pos);
601
602                            code.extend_from_slice(&[0x48, 0x83, 0xC4, 0x20]); // add rsp, 32
603                            code.push(0x50); // push rax (array ptr)
604                        }
605                        CoreInstruction::StoreElement(_) => {
606                            // Stack: [..., array, index, value]
607                            // Pops: value(r8), index(rdx), array(rcx)
608
609                            code.extend_from_slice(&[0x41, 0x58]); // pop r8 (value)
610                            code.push(0x5A); // pop rdx (index)
611                            code.push(0x59); // pop rcx (array)
612
613                            let symbol = "nyar_array_set".to_string();
614
615                            code.extend_from_slice(&[0x48, 0x83, 0xEC, 0x20]); // sub rsp, 32
616
617                            let pos = code.len();
618                            code.extend_from_slice(&[0xFF, 0x15, 0x00, 0x00, 0x00, 0x00]); // call [rip+offset]
619                            external_call_positions.entry(symbol).or_default().push(pos);
620
621                            code.extend_from_slice(&[0x48, 0x83, 0xC4, 0x20]); // add rsp, 32
622                        }
623                        CoreInstruction::LoadElement(_) => {
624                            // Stack: [..., array, index]
625                            // Pops: index(rdx), array(rcx)
626
627                            code.push(0x5A); // pop rdx (index)
628                            code.push(0x59); // pop rcx (array)
629
630                            let symbol = "nyar_array_get".to_string();
631
632                            code.extend_from_slice(&[0x48, 0x83, 0xEC, 0x20]); // sub rsp, 32
633
634                            let pos = code.len();
635                            code.extend_from_slice(&[0xFF, 0x15, 0x00, 0x00, 0x00, 0x00]); // call [rip+offset]
636                            external_call_positions.entry(symbol).or_default().push(pos);
637
638                            code.extend_from_slice(&[0x48, 0x83, 0xC4, 0x20]); // add rsp, 32
639                            code.push(0x50); // push rax
640                        }
641                        CoreInstruction::ArrayLength => {
642                            // Stack: [..., array]
643                            // Pops: array(rcx)
644
645                            code.push(0x59); // pop rcx
646
647                            let symbol = "nyar_array_len".to_string();
648
649                            code.extend_from_slice(&[0x48, 0x83, 0xEC, 0x20]); // sub rsp, 32
650
651                            let pos = code.len();
652                            code.extend_from_slice(&[0xFF, 0x15, 0x00, 0x00, 0x00, 0x00]); // call [rip+offset]
653                            external_call_positions.entry(symbol).or_default().push(pos);
654
655                            code.extend_from_slice(&[0x48, 0x83, 0xC4, 0x20]); // add rsp, 32
656                            code.push(0x50); // push rax
657                        }
658                        CoreInstruction::ArrayPush => {
659                            // Stack: [..., array, value]
660                            // Pops: value(rdx), array(rcx)
661
662                            code.push(0x5A); // pop rdx (value)
663                            code.push(0x59); // pop rcx (array)
664
665                            let symbol = "nyar_array_push".to_string();
666
667                            code.extend_from_slice(&[0x48, 0x83, 0xEC, 0x20]); // sub rsp, 32
668
669                            let pos = code.len();
670                            code.extend_from_slice(&[0xFF, 0x15, 0x00, 0x00, 0x00, 0x00]); // call [rip+offset]
671                            external_call_positions.entry(symbol).or_default().push(pos);
672
673                            code.extend_from_slice(&[0x48, 0x83, 0xC4, 0x20]); // add rsp, 32
674
675                            // Push result (void -> 0 or new length?)
676                            // JS push returns new length.
677                            // nyar_array_push returns void.
678                            // For now, push 0 (undefined/void)
679                            code.push(0x31);
680                            code.push(0xC0); // xor eax, eax
681                            code.push(0x50); // push rax
682                        }
683                        _ => return Err(GaiaError::custom_error(format!("Unsupported core instruction: {:?}", core_inst))),
684                    },
685                    GaiaInstruction::Managed(managed_inst) => match managed_inst {
686                        ManagedInstruction::CallMethod { method, signature, call_site_id, .. } => {
687                            let argc = signature.params.len() as u32;
688
689                            // 0. Allocate space for the call (shadow space + 5th/6th args)
690                            // 32 (shadow) + 8 (method_name) + 8 (argc) = 48.
691                            // 48 is 16-byte aligned.
692                            code.extend_from_slice(&[0x48, 0x83, 0xEC, 0x30]);
693
694                            // 1. Load vm (rcx) from [rbp + 16]
695                            code.extend_from_slice(&[0x48, 0x8B, 0x4D, 0x10]);
696
697                            // 2. Load ic (rdx) from [rbp + 24]
698                            code.extend_from_slice(&[0x48, 0x8B, 0x55, 0x18]);
699
700                            // 3. Load call_site_id (r8)
701                            code.extend_from_slice(&[0x49, 0xC7, 0xC0]);
702                            if let Some(id) = call_site_id {
703                                code.extend_from_slice(&id.to_le_bytes());
704                            }
705                            else {
706                                code.extend_from_slice(&[0xFF, 0xFF, 0xFF, 0xFF]);
707                            }
708
709                            // 4. Load receiver (r9) from [rsp + 48 + argc * 8]
710                            // Adjusting offset because stack was already sub 48.
711                            // Arguments were pushed onto stack BEFORE this call.
712                            let receiver_offset = 48 + argc * 8;
713                            code.extend_from_slice(&[0x4C, 0x8B, 0x8C, 0x24]);
714                            code.extend_from_slice(&receiver_offset.to_le_bytes());
715
716                            // 5. Load method_name pointer into [rsp + 32]
717                            code.extend_from_slice(&[0x48, 0x8D, 0x05]);
718                            let str_offset = *string_table.get(method).unwrap();
719                            let pos = code.len();
720                            string_patches.push((pos, str_offset));
721                            code.extend_from_slice(&[0, 0, 0, 0]);
722                            code.extend_from_slice(&[0x48, 0x89, 0x44, 0x24, 0x20]);
723
724                            // 6. Load argc into [rsp + 40]
725                            code.extend_from_slice(&[0x48, 0xC7, 0x44, 0x24, 0x28]);
726                            code.extend_from_slice(&argc.to_le_bytes());
727
728                            // 7. Call nyar_managed_call_method
729                            let symbol = "nyar_managed_call_method".to_string();
730                            let pos = code.len();
731                            code.extend_from_slice(&[0xFF, 0x15, 0x00, 0x00, 0x00, 0x00]);
732                            external_call_positions.entry(symbol).or_default().push(pos);
733
734                            // 8. Clean up call space
735                            code.extend_from_slice(&[0x48, 0x83, 0xC4, 0x30]);
736
737                            // 9. Clean up stack (pop args and receiver)
738                            let total_to_pop = (argc + 1) * 8;
739                            code.extend_from_slice(&[0x48, 0x81, 0xC4]);
740                            code.extend_from_slice(&total_to_pop.to_le_bytes());
741
742                            // 10. Push result
743                            code.push(0x50);
744                        }
745                        _ => {
746                            return Err(GaiaError::custom_error(format!("Unsupported managed instruction: {:?}", managed_inst)))
747                        }
748                    },
749                    GaiaInstruction::Domain(domain_inst) => match domain_inst {
750                        DomainInstruction::Neural(node) => {
751                            // 调用 matmul (专用加速路径)
752                            if let gaia_types::neural::NeuralNode::MatMul(_) = node {
753                                let symbol = "gaia_matmul".to_string();
754                                external_call_positions.entry(symbol).or_default().push(code.len());
755                                // 这里插入一个占位符,后续在 PE 生成时打补丁到导入表或静态库
756                                code.extend_from_slice(&[0xFF, 0x15, 0x00, 0x00, 0x00, 0x00]);
757                            }
758                        }
759                        _ => return Err(GaiaError::custom_error(format!("Unsupported domain instruction: {:?}", domain_inst))),
760                    },
761                    _ => return Err(GaiaError::custom_error(format!("Unsupported instruction tier for x86: {:?}", inst))),
762                }
763            }
764
765            // Handle terminator
766            match &block.terminator {
767                crate::program::GaiaTerminator::Jump(target) => {
768                    code.push(0xE9); // jmp rel32
769                    jump_patches.push((code.len(), target.clone()));
770                    code.extend_from_slice(&[0, 0, 0, 0]);
771                }
772                crate::program::GaiaTerminator::Branch { true_label, false_label } => {
773                    // pop rax; test rax, rax; jnz true; jmp false
774                    code.push(0x58); // pop rax
775                    code.extend_from_slice(&[0x48, 0x85, 0xC0]); // test rax, rax
776                    code.extend_from_slice(&[0x0F, 0x85]); // jnz rel32
777                    jump_patches.push((code.len(), true_label.clone()));
778                    code.extend_from_slice(&[0, 0, 0, 0]);
779
780                    code.push(0xE9); // jmp rel32
781                    jump_patches.push((code.len(), false_label.clone()));
782                    code.extend_from_slice(&[0, 0, 0, 0]);
783                }
784                crate::program::GaiaTerminator::Return => {
785                    // Function epilogue
786                    code.extend_from_slice(&[0x48, 0x89, 0xEC]); // mov rsp, rbp
787                    code.push(0x5D); // pop rbp
788                    code.push(0xC3); // ret
789                }
790                _ => {}
791            }
792        }
793
794        for (pos, name) in jump_patches {
795            if let Some(&label_pos) = labels.get(&name) {
796                let relative_offset = (label_pos as i32) - (pos as i32 + 4);
797                code[pos..pos + 4].copy_from_slice(&relative_offset.to_le_bytes());
798            }
799        }
800
801        Ok(())
802    }
803
804    fn create_pe_exe(
805        &self,
806        code: &[u8],
807        program: &GaiaModule,
808        call_exit_pos: usize,
809        external_call_positions: &HashMap<String, Vec<usize>>,
810        rdata_content: &Vec<u8>,
811        string_patches: &Vec<(usize, usize)>,
812    ) -> Result<Vec<u8>> {
813        let mut imports = pe_assembler::types::ImportTable::new();
814
815        // Group imports by library
816        let mut lib_imports: HashMap<String, Vec<String>> = HashMap::new();
817        for imp in &program.imports {
818            lib_imports.entry(imp.library.clone()).or_default().push(imp.symbol.clone());
819        }
820
821        // Add implicit imports from external calls
822        for symbol in external_call_positions.keys() {
823            if symbol.starts_with("nyar_") {
824                let entry = lib_imports.entry("nyar_runtime.dll".to_string()).or_default();
825                if !entry.contains(symbol) {
826                    entry.push(symbol.clone());
827                }
828            }
829        }
830
831        // Ensure kernel32.dll!ExitProcess is present if we are an EXE and it's not provided
832        if !lib_imports.values().any(|funcs| funcs.contains(&"ExitProcess".to_string())) {
833            lib_imports.entry("kernel32.dll".to_string()).or_default().push("ExitProcess".to_string());
834        }
835
836        for (lib, funcs) in lib_imports {
837            imports.entries.push(pe_assembler::types::ImportEntry { dll_name: lib, functions: funcs });
838        }
839
840        let mut pe_program = pe_assembler::types::PeProgram::create_executable(code.to_vec()).with_imports(imports);
841
842        // Ensure some critical fields are set correctly for native x64
843        pe_program.header.optional_header.image_base = 0x400000;
844        pe_program.header.optional_header.section_alignment = 0x1000;
845        pe_program.header.optional_header.file_alignment = 0x200;
846        pe_program.header.optional_header.major_operating_system_version = 6;
847        pe_program.header.optional_header.minor_operating_system_version = 0;
848        pe_program.header.optional_header.major_subsystem_version = 6;
849        pe_program.header.optional_header.minor_subsystem_version = 0;
850
851        // DYNAMIC_BASE | NX_COMPAT | NO_SEH | TERMINAL_SERVER_AWARE
852        pe_program.header.optional_header.dll_characteristics = 0x8160;
853
854        // Recalculate size of image and headers
855        let text_size_aligned = (pe_program.sections[0].data.len() as u32 + 0xFFF) & !0xFFF;
856        let idata_size_aligned =
857            if pe_program.sections.len() > 1 { (pe_program.sections[1].virtual_size + 0xFFF) & !0xFFF } else { 0 };
858
859        let rdata_rva = 0x1000 + text_size_aligned + idata_size_aligned;
860
861        if !rdata_content.is_empty() {
862            pe_program.sections.push(pe_assembler::types::PeSection {
863                name: ".rdata".to_string(),
864                characteristics: 0x40000040, // IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ
865                virtual_size: rdata_content.len() as u32,
866                data: rdata_content.clone(),
867                number_of_line_numbers: 0,
868                number_of_relocations: 0,
869                pointer_to_line_numbers: 0,
870                pointer_to_relocations: 0,
871                pointer_to_raw_data: 0,
872                size_of_raw_data: 0,
873                virtual_address: 0,
874            });
875
876            let code_data = &mut pe_program.sections[0].data;
877            for &(pos, str_offset) in string_patches {
878                let next_rip_rva = 0x1000 + (pos as u32) + 4; // pos points to start of imm32
879                let target_rva = rdata_rva + str_offset as u32;
880                let rel_offset = target_rva as i32 - next_rip_rva as i32;
881                code_data[pos..pos + 4].copy_from_slice(&rel_offset.to_le_bytes());
882            }
883        }
884
885        let rdata_size_aligned = if pe_program.sections.len() > (if idata_size_aligned > 0 { 2 } else { 1 }) {
886            (pe_program.sections.last().unwrap().virtual_size + 0xFFF) & !0xFFF
887        }
888        else {
889            0
890        };
891
892        pe_program.header.optional_header.size_of_image = 0x1000 + text_size_aligned + idata_size_aligned + rdata_size_aligned;
893        pe_program.header.optional_header.size_of_headers = 0x200;
894
895        // Patch calls to imported functions
896        if pe_program.sections.len() > 1 {
897            let iat_rva = pe_program.header.optional_header.data_directories[12].virtual_address;
898            let code_data = &mut pe_program.sections[0].data;
899
900            // Find ExitProcess in IAT
901            let mut exit_process_iat_rva = 0;
902            let mut current_iat_offset = 0;
903
904            for entry in &pe_program.imports.entries {
905                for (_i, func) in entry.functions.iter().enumerate() {
906                    let rva = iat_rva + current_iat_offset;
907
908                    if func == "ExitProcess" {
909                        exit_process_iat_rva = rva;
910                    }
911
912                    if let Some(positions) = external_call_positions.get(func) {
913                        for &pos in positions {
914                            let next_rip_rva = 0x1000 + pos as u32 + 6;
915                            let relative_offset = (rva as i32) - (next_rip_rva as i32);
916                            code_data[pos + 2..pos + 6].copy_from_slice(&relative_offset.to_le_bytes());
917                        }
918                    }
919
920                    current_iat_offset += 8; // x64 IAT entry size
921                }
922                current_iat_offset += 8; // Null terminator for DLL
923            }
924
925            // Patch the hardcoded ExitProcess call in entry point
926            if exit_process_iat_rva != 0 {
927                let next_rip_rva_exit = 0x1000 + call_exit_pos as u32 + 6;
928                let relative_offset_exit = (exit_process_iat_rva as i32) - (next_rip_rva_exit as i32);
929                code_data[call_exit_pos + 2..call_exit_pos + 6].copy_from_slice(&relative_offset_exit.to_le_bytes());
930            }
931        }
932
933        let mut buffer = Vec::new();
934        let mut cursor = std::io::Cursor::new(&mut buffer);
935        let mut writer = pe_assembler::formats::exe::writer::ExeWriter::new(&mut cursor);
936        use pe_assembler::helpers::PeWriter;
937        writer.write_program(&pe_program).map_err(|e| GaiaError::custom_error(format!("PE write error: {}", e)))?;
938        Ok(buffer)
939    }
940}