Skip to main content

wave_emu/
lib.rs

1// Copyright 2026 Ojima Abraham
2// SPDX-License-Identifier: Apache-2.0
3
4//! Public API for the WAVE emulator. Provides Emulator struct for running WAVE
5//!
6//! binaries, configuration options for grid/workgroup dimensions, and execution
7//! results with statistics. Entry point for programmatic use of the emulator.
8
9#![allow(
10    clippy::cast_lossless,
11    clippy::cast_possible_truncation,
12    clippy::cast_possible_wrap,
13    clippy::cast_precision_loss,
14    clippy::cast_sign_loss,
15    clippy::collapsible_if,
16    clippy::collapsible_str_replace,
17    clippy::comparison_to_empty,
18    clippy::float_cmp,
19    clippy::manual_let_else,
20    clippy::match_same_arms,
21    clippy::missing_errors_doc,
22    clippy::missing_panics_doc,
23    clippy::module_name_repetitions,
24    clippy::must_use_candidate,
25    clippy::needless_pass_by_value,
26    clippy::needless_return,
27    clippy::redundant_closure_for_method_calls,
28    clippy::similar_names,
29    clippy::single_match_else,
30    clippy::struct_excessive_bools,
31    clippy::too_many_arguments,
32    clippy::too_many_lines,
33    clippy::unnecessary_wraps,
34    clippy::unused_self
35)]
36
37pub mod barrier;
38pub mod control_flow;
39pub mod core;
40pub mod decoder;
41pub mod executor;
42pub mod memory;
43pub mod scheduler;
44pub mod shuffle;
45pub mod stats;
46pub mod thread;
47pub mod wave;
48
49use crate::core::Core;
50use crate::memory::DeviceMemory;
51use crate::stats::ExecutionStats;
52use std::io::Read;
53use thiserror::Error;
54
55#[derive(Debug, Error)]
56pub enum EmulatorError {
57    #[error("invalid WBIN file: {message}")]
58    InvalidBinary { message: String },
59
60    #[error("memory access out of bounds: address 0x{address:016x}")]
61    MemoryOutOfBounds { address: u64 },
62
63    #[error("invalid instruction at PC 0x{pc:08x}: {message}")]
64    InvalidInstruction { pc: u32, message: String },
65
66    #[error("control flow error: {message}")]
67    ControlFlowError { message: String },
68
69    #[error("deadlock detected: {message}")]
70    Deadlock { message: String },
71
72    #[error("division by zero")]
73    DivisionByZero,
74
75    #[error("stack overflow: {kind}")]
76    StackOverflow { kind: String },
77
78    #[error("IO error: {message}")]
79    IoError { message: String },
80
81    #[error(
82        "instruction limit exceeded: {executed} instructions (limit: {limit}) at PC 0x{pc:08x}"
83    )]
84    InstructionLimitExceeded { limit: u64, executed: u64, pc: u32 },
85}
86
87#[derive(Debug, Clone)]
88pub struct EmulatorConfig {
89    pub grid_dim: [u32; 3],
90    pub workgroup_dim: [u32; 3],
91    pub register_count: u32,
92    pub local_memory_size: usize,
93    pub device_memory_size: usize,
94    pub wave_width: u32,
95    pub trace_enabled: bool,
96    pub f64_enabled: bool,
97    /// Maximum instructions to execute (0 = unlimited). Default: 10,000,000
98    pub max_instructions: u64,
99    /// Initial register values set for all threads before execution.
100    /// Vec of (`register_index`, value) pairs.
101    pub initial_registers: Vec<(u8, u32)>,
102}
103
104impl Default for EmulatorConfig {
105    fn default() -> Self {
106        Self {
107            grid_dim: [1, 1, 1],
108            workgroup_dim: [32, 1, 1],
109            register_count: 32,
110            local_memory_size: 16384,
111            device_memory_size: 1024 * 1024,
112            wave_width: 32,
113            trace_enabled: false,
114            f64_enabled: false,
115            max_instructions: 10_000_000,
116            initial_registers: Vec::new(),
117        }
118    }
119}
120
121#[derive(Debug)]
122pub struct EmulatorResult {
123    pub stats: ExecutionStats,
124}
125
126pub struct Emulator {
127    config: EmulatorConfig,
128    device_memory: DeviceMemory,
129    code: Vec<u8>,
130    kernel_metadata: Vec<KernelMetadata>,
131}
132
133#[derive(Debug, Clone)]
134pub struct KernelMetadata {
135    pub name: String,
136    pub register_count: u32,
137    pub local_memory_size: u32,
138    pub workgroup_size: [u32; 3],
139    pub code_offset: u32,
140    pub code_size: u32,
141}
142
143impl Emulator {
144    pub fn new(config: EmulatorConfig) -> Self {
145        let device_memory = DeviceMemory::new(config.device_memory_size);
146        Self {
147            config,
148            device_memory,
149            code: Vec::new(),
150            kernel_metadata: Vec::new(),
151        }
152    }
153
154    pub fn load_binary(&mut self, binary: &[u8]) -> Result<(), EmulatorError> {
155        if binary.len() < 0x20 {
156            return Err(EmulatorError::InvalidBinary {
157                message: "file too small for WBIN header".into(),
158            });
159        }
160
161        if &binary[0..4] != b"WAVE" {
162            return Err(EmulatorError::InvalidBinary {
163                message: "invalid magic number".into(),
164            });
165        }
166
167        let code_offset =
168            u32::from_le_bytes([binary[0x08], binary[0x09], binary[0x0A], binary[0x0B]]) as usize;
169        let code_size =
170            u32::from_le_bytes([binary[0x0C], binary[0x0D], binary[0x0E], binary[0x0F]]) as usize;
171        let symbol_offset =
172            u32::from_le_bytes([binary[0x10], binary[0x11], binary[0x12], binary[0x13]]) as usize;
173        let metadata_offset =
174            u32::from_le_bytes([binary[0x18], binary[0x19], binary[0x1A], binary[0x1B]]) as usize;
175
176        if code_offset + code_size > binary.len() {
177            return Err(EmulatorError::InvalidBinary {
178                message: "code section extends beyond file".into(),
179            });
180        }
181
182        self.code = binary[code_offset..code_offset + code_size].to_vec();
183
184        if metadata_offset < binary.len() {
185            let kernel_count = u32::from_le_bytes([
186                binary[metadata_offset],
187                binary[metadata_offset + 1],
188                binary[metadata_offset + 2],
189                binary[metadata_offset + 3],
190            ]) as usize;
191
192            for i in 0..kernel_count {
193                let base = metadata_offset + 4 + i * 32;
194                if base + 32 > binary.len() {
195                    break;
196                }
197
198                let name_offset = u32::from_le_bytes([
199                    binary[base],
200                    binary[base + 1],
201                    binary[base + 2],
202                    binary[base + 3],
203                ]) as usize;
204
205                let name = if symbol_offset > 0
206                    && name_offset >= symbol_offset
207                    && name_offset < binary.len()
208                {
209                    let mut end = name_offset;
210                    while end < binary.len() && binary[end] != 0 {
211                        end += 1;
212                    }
213                    String::from_utf8_lossy(&binary[name_offset..end]).to_string()
214                } else {
215                    format!("kernel_{i}")
216                };
217
218                let register_count = u32::from_le_bytes([
219                    binary[base + 4],
220                    binary[base + 5],
221                    binary[base + 6],
222                    binary[base + 7],
223                ]);
224
225                let local_memory_size = u32::from_le_bytes([
226                    binary[base + 8],
227                    binary[base + 9],
228                    binary[base + 10],
229                    binary[base + 11],
230                ]);
231
232                let workgroup_size = [
233                    u32::from_le_bytes([
234                        binary[base + 12],
235                        binary[base + 13],
236                        binary[base + 14],
237                        binary[base + 15],
238                    ]),
239                    u32::from_le_bytes([
240                        binary[base + 16],
241                        binary[base + 17],
242                        binary[base + 18],
243                        binary[base + 19],
244                    ]),
245                    u32::from_le_bytes([
246                        binary[base + 20],
247                        binary[base + 21],
248                        binary[base + 22],
249                        binary[base + 23],
250                    ]),
251                ];
252
253                let kernel_code_offset = u32::from_le_bytes([
254                    binary[base + 24],
255                    binary[base + 25],
256                    binary[base + 26],
257                    binary[base + 27],
258                ]);
259
260                let kernel_code_size = u32::from_le_bytes([
261                    binary[base + 28],
262                    binary[base + 29],
263                    binary[base + 30],
264                    binary[base + 31],
265                ]);
266
267                self.kernel_metadata.push(KernelMetadata {
268                    name,
269                    register_count,
270                    local_memory_size,
271                    workgroup_size,
272                    code_offset: kernel_code_offset,
273                    code_size: kernel_code_size,
274                });
275            }
276        }
277
278        Ok(())
279    }
280
281    pub fn load_device_memory(&mut self, offset: u64, data: &[u8]) -> Result<(), EmulatorError> {
282        self.device_memory.write_slice(offset, data)
283    }
284
285    pub fn read_device_memory(&self, offset: u64, len: usize) -> Result<Vec<u8>, EmulatorError> {
286        let mut result = Vec::with_capacity(len);
287        for i in 0..len {
288            result.push(self.device_memory.read_u8(offset + i as u64)?);
289        }
290        Ok(result)
291    }
292
293    pub fn run(&mut self) -> Result<EmulatorResult, EmulatorError> {
294        self.run_kernel(0)
295    }
296
297    pub fn run_kernel(&mut self, kernel_index: usize) -> Result<EmulatorResult, EmulatorError> {
298        let mut effective_config = self.config.clone();
299
300        if kernel_index < self.kernel_metadata.len() {
301            let meta = &self.kernel_metadata[kernel_index];
302            if meta.register_count > 0 {
303                effective_config.register_count = meta.register_count;
304            }
305            if meta.local_memory_size > 0 {
306                effective_config.local_memory_size = meta.local_memory_size as usize;
307            }
308            if meta.workgroup_size[0] > 0 {
309                effective_config.workgroup_dim = meta.workgroup_size;
310            }
311        }
312
313        let code_start = if kernel_index < self.kernel_metadata.len() {
314            self.kernel_metadata[kernel_index].code_offset as usize
315        } else {
316            0
317        };
318
319        let mut total_stats = ExecutionStats::default();
320
321        for wg_z in 0..effective_config.grid_dim[2] {
322            for wg_y in 0..effective_config.grid_dim[1] {
323                for wg_x in 0..effective_config.grid_dim[0] {
324                    let mut core = Core::new(
325                        &effective_config,
326                        &self.code[code_start..],
327                        &mut self.device_memory,
328                        [wg_x, wg_y, wg_z],
329                    );
330
331                    let stats = core.run()?;
332                    total_stats.merge(&stats);
333                    total_stats.workgroups_executed += 1;
334                }
335            }
336        }
337
338        Ok(EmulatorResult { stats: total_stats })
339    }
340
341    pub fn device_memory(&self) -> &DeviceMemory {
342        &self.device_memory
343    }
344
345    pub fn device_memory_mut(&mut self) -> &mut DeviceMemory {
346        &mut self.device_memory
347    }
348
349    pub fn kernels(&self) -> &[KernelMetadata] {
350        &self.kernel_metadata
351    }
352}
353
354pub fn load_binary_file(path: &std::path::Path) -> Result<Vec<u8>, EmulatorError> {
355    let mut file = std::fs::File::open(path).map_err(|e| EmulatorError::IoError {
356        message: e.to_string(),
357    })?;
358    let mut buffer = Vec::new();
359    file.read_to_end(&mut buffer)
360        .map_err(|e| EmulatorError::IoError {
361            message: e.to_string(),
362        })?;
363    Ok(buffer)
364}