Skip to main content

veryl_simulator/
ir.rs

1pub(crate) mod context;
2pub(crate) mod declaration;
3mod event;
4mod expression;
5pub(crate) mod inst_layout;
6mod module;
7pub(crate) mod opt;
8pub(crate) mod site_table;
9mod statement;
10pub(crate) mod variable;
11pub(crate) mod write_log;
12
13pub use context::{Context, Conv};
14pub use declaration::ProtoDeclaration;
15pub use event::Event;
16pub use expression::{Expression, ExpressionContext, ProtoDynamicBitSelect, ProtoExpression};
17pub use module::{Module, ProtoModule};
18pub use statement::{
19    CompiledBatchStmt, CompiledBlockStatement, CompiledStmt, ProtoAssignDynamicStatement,
20    ProtoAssignStatement, ProtoForBound, ProtoForRange, ProtoForStatement, ProtoIfStatement,
21    ProtoStatement, ProtoStatementBlock, ProtoStatements, ProtoSystemFunctionCall, RuntimeForBound,
22    RuntimeForRange, Statement, SystemFunctionCall, TbMethodKind, format_assert_message,
23    parse_hex_content, patch_stmt_log_buf,
24};
25pub use variable::{
26    ModuleVariableMeta, ModuleVariables, VarOffset, Variable, VariableElement, VariableMeta,
27    create_variable_meta, native_bytes, read_native_value, read_payload, value_size,
28    write_native_value, write_payload,
29};
30pub use veryl_analyzer::ir::{Op, Type, VarId, VarPath};
31pub use veryl_analyzer::value::Value;
32
33use crate::HashMap;
34use crate::backend::{self, BackendRegistry, CompiledWhole, DispatchOutcome};
35use crate::simulator::SimProfile;
36use crate::simulator_error::SimulatorError;
37use std::sync::Arc;
38use std::sync::OnceLock;
39use veryl_analyzer::ir as air;
40use veryl_analyzer::value::MaskCache;
41use veryl_parser::resource_table::StrId;
42use veryl_parser::token_range::TokenRange;
43
44pub struct Ir {
45    pub name: StrId,
46    pub token: TokenRange,
47    pub ports: HashMap<VarPath, VarId>,
48    pub ff_values: Box<[u8]>,
49    pub comb_values: Box<[u8]>,
50    pub use_4state: bool,
51    pub module_variables: ModuleVariables,
52    pub event_statements: HashMap<Event, Vec<Statement>>,
53    /// Unified comb statements: all port connections, child comb, and internal
54    /// comb combined into a single dependency-sorted list.
55    pub comb_statements: Vec<Statement>,
56    /// Number of eval_comb passes needed for full convergence.
57    /// Pre-computed from backward edges in the sorted comb statement list.
58    pub required_comb_passes: usize,
59    /// FF write site table: compile-time metadata for each FF write site,
60    /// built at ProtoModule conv time.  Consumed by phases that need to
61    /// reason about FF writes statically (write-log buffer sizing, NBA
62    /// invariant checks, per-Inst metadata for MT-ready commit).
63    pub site_table: site_table::SiteTable,
64    /// Per-top-level-Inst FF byte range metadata.  Foundation for
65    /// cache-line aligned padding and per-Inst independent commit.
66    pub inst_layout: inst_layout::InstLayout,
67    /// FF write log buffer.  Sized at Ir construction time from
68    /// `site_table.len()`; FF writes (JIT + interpret) push entries
69    /// during event evaluation and `ff_commit_from_log` applies them
70    /// at cycle end.
71    ///
72    /// Heap-allocated (`Box`) so the buffer's address is stable across
73    /// moves of the surrounding `Ir`/`Simulator` — JIT code holds a raw
74    /// pointer baked into each `Statement::Compiled` at construction.
75    pub write_log_buffer: Box<write_log::WriteLogBuffer>,
76    /// Whether FF classification optimization is disabled.
77    pub disable_ff_opt: bool,
78    /// Diagnostic: number of nontrivial SCCs found in the pre-JIT comb
79    /// graph.  Real combinational loops are rejected by `analyze_dependency`,
80    /// so any non-zero value here indicates duplicate ProtoStatements in
81    /// the simulator IR assembly.  See `Module::nontrivial_comb_scc`.
82    pub nontrivial_comb_scc: usize,
83    /// Whole-comb dispatch handle.  `Some` when a backend (today:
84    /// AOT-C) committed to a one-function compile via
85    /// `Backend::compile_whole_comb`; `settle_comb` invokes its
86    /// `try_dispatch` in place of per-chunk Cranelift.  `None` keeps
87    /// the per-chunk loop.
88    pub whole_comb: Option<Arc<dyn CompiledWhole>>,
89    /// Snapshotted from `Config::aot_c_validate`: when set, `settle_comb` /
90    /// `step` dual-run the AOT-C and Cranelift paths and panic on divergence.
91    pub aot_c_validate: bool,
92    /// Per-event whole-event dispatch handles.  When the current
93    /// event's `try_dispatch` succeeds, `step()` invokes it instead of
94    /// the per-stmt Cranelift dispatch.  Built in `ProtoModule::conv`
95    /// when `Config::aot_c_event` is set and the emitter covered every
96    /// event stmt.
97    pub whole_events: HashMap<Event, Arc<dyn CompiledWhole>>,
98}
99
100impl Ir {
101    pub fn from_module(module: Module, config: &Config, token: TokenRange) -> Ir {
102        let mut ir = Ir {
103            name: module.name,
104            token,
105            ports: module.ports,
106            ff_values: module.ff_values,
107            comb_values: module.comb_values,
108            use_4state: config.use_4state,
109            module_variables: module.module_variables,
110            event_statements: module.event_statements,
111            comb_statements: module.comb_statements,
112            required_comb_passes: module.required_comb_passes,
113            write_log_buffer: {
114                let (narrow_cap, wide_cap) = write_log_capacity(&module.site_table);
115                Box::new(write_log::WriteLogBuffer::with_capacity(
116                    narrow_cap, wide_cap,
117                ))
118            },
119            site_table: module.site_table,
120            inst_layout: module.inst_layout,
121            disable_ff_opt: config.disable_ff_opt,
122            nontrivial_comb_scc: module.nontrivial_comb_scc,
123            whole_comb: module.whole_comb,
124            aot_c_validate: config.aot_c_validate,
125            whole_events: module.whole_events,
126        };
127        // Bake the WriteLogBuffer's heap-stable address into every
128        // JIT-dispatched Compiled/CompiledBatch so emitted code can perform
129        // inline log pushes without a TLS lookup.
130        ir.install_write_log_ptr();
131        ir
132    }
133
134    /// Walk every event/comb statement tree and overwrite the placeholder
135    /// `log_buf` field in `Statement::Compiled` / `Statement::CompiledBatch`
136    /// with the actual heap address of `self.write_log_buffer`.
137    ///
138    /// Called once at the end of `from_module`.  The address is stable
139    /// for `self`'s lifetime because the buffer lives on the heap
140    /// inside a `Box`.
141    fn install_write_log_ptr(&mut self) {
142        let log_buf =
143            (&*self.write_log_buffer) as *const _ as *mut write_log::WriteLogBuffer as *mut u8;
144        for stmts in self.event_statements.values_mut() {
145            for s in stmts {
146                patch_stmt_log_buf(s, log_buf);
147            }
148        }
149        for s in &mut self.comb_statements {
150            patch_stmt_log_buf(s, log_buf);
151        }
152    }
153
154    /// Evaluate comb for `required_comb_passes` passes.
155    ///
156    /// Real combinational loops are rejected by `analyze_dependency`
157    /// (error: `combinational_loop`), so once control reaches this function
158    /// the stmt-level graph is an acyclic DAG whose depth determines how
159    /// many passes are needed to settle.  No iteration-to-convergence is
160    /// required, and no runtime "did anything change?" check is performed.
161    pub fn settle_comb(&self, mask_cache: &mut MaskCache, profile: &mut SimProfile) {
162        #[cfg(feature = "profile")]
163        {
164            profile.settle_comb_count += 1;
165        }
166        let _ = profile; // suppress unused warning when profile feature is off
167
168        // Dispatch: when a whole-comb backend (today: AOT-C) is ready,
169        // invoke it in place of per-chunk Cranelift dispatch.  When
170        // VERYL_AOT_C_VALIDATE=1 (`self.aot_c_validate`) we additionally
171        // dual-run the whole-comb and the per-chunk path and panic on
172        // first divergence.  Both paths fall through to Cranelift if
173        // the whole-comb backend declines (`whole_comb == None`) or
174        // returns `NotReady` (async compile pending).
175        if let Some(whole) = self.whole_comb.as_ref() {
176            // Cache env var lookups in a process-static OnceLock: settle_comb
177            // runs once per cycle, so a per-cycle `std::env::var`/getenv would
178            // be a hot-path cost.
179            static AOT_C_PASSES_OVERRIDE: OnceLock<Option<usize>> = OnceLock::new();
180            let validate = self.aot_c_validate;
181            let env_passes = *AOT_C_PASSES_OVERRIDE.get_or_init(|| {
182                std::env::var("VERYL_AOT_C_PASSES")
183                    .ok()
184                    .and_then(|s| s.parse::<usize>().ok())
185            });
186            let ff_ptr = self.ff_values.as_ptr();
187            let comb_ptr = self.comb_values.as_ptr() as *mut u8;
188            // AOT-C comb eval never writes the log (the emitted C does
189            // `(void)write_log`), so the pointer is unused.  Pass the real
190            // heap-stable buffer address anyway to satisfy the FuncPtr
191            // contract (3rd arg is `*mut u8`).
192            let log_ptr = (&*self.write_log_buffer as *const _ as *const u8) as *mut u8;
193            let passes = env_passes.unwrap_or(self.required_comb_passes).max(1);
194
195            if !validate {
196                // Common case: passes == 1 (no SCC backward edges).
197                for _ in 0..passes {
198                    match whole.try_dispatch(ff_ptr, comb_ptr, log_ptr) {
199                        DispatchOutcome::Done => {}
200                        DispatchOutcome::NotReady => {
201                            // Async compile not finished yet — drop to
202                            // Cranelift for this cycle.
203                            self.run_chunked_settle(mask_cache, profile);
204                            return;
205                        }
206                    }
207                }
208                return;
209            }
210
211            // Validate path: delegate to backend::validate, which
212            // snapshots inputs, runs whole-comb, restores, runs
213            // Cranelift, and diffs.  Panics on divergence.
214            backend::validate::settle_comb(self, whole.as_ref(), passes, mask_cache, profile);
215            return;
216        }
217
218        self.run_chunked_settle(mask_cache, profile);
219    }
220
221    /// Cranelift-only settle path, factored out so the validate mode can
222    /// invoke it after AOT-C eval has run and the buffers have been restored.
223    pub(crate) fn run_chunked_settle(&self, mask_cache: &mut MaskCache, profile: &mut SimProfile) {
224        let _ = profile;
225
226        // `VERYL_MIN_PASSES_OVERRIDE` is still honoured as a debug knob.
227        static MIN_PASSES_OVERRIDE: OnceLock<Option<usize>> = OnceLock::new();
228        let min_override = *MIN_PASSES_OVERRIDE.get_or_init(|| {
229            std::env::var("VERYL_MIN_PASSES_OVERRIDE")
230                .ok()
231                .and_then(|s| s.parse().ok())
232        });
233        let passes = min_override.unwrap_or(self.required_comb_passes);
234        for _ in 0..passes {
235            self.eval_comb_full(mask_cache, profile);
236            #[cfg(feature = "profile")]
237            {
238                profile.comb_eval_count += 1;
239            }
240        }
241    }
242
243    /// Evaluate unified comb once.
244    /// Called by settle_comb() for each required pass.
245    pub fn eval_comb_full(&self, mask_cache: &mut MaskCache, profile: &mut SimProfile) {
246        let _ = profile;
247        #[cfg(feature = "profile")]
248        let start = std::time::Instant::now();
249
250        for x in &self.comb_statements {
251            dispatch_stmt_fast(x, mask_cache);
252        }
253
254        #[cfg(feature = "profile")]
255        {
256            profile.eval_comb_full_ns += start.elapsed().as_nanos() as u64;
257        }
258    }
259
260    /// Number of statements in comb_statements (for profiling).
261    pub fn comb_stmt_count(&self) -> (usize, usize, usize) {
262        let mut binary = 0;
263        let mut interp = 0;
264        let mut total = 0;
265        for s in &self.comb_statements {
266            total += 1;
267            if s.is_compiled() {
268                binary += 1;
269            } else {
270                interp += 1;
271            }
272        }
273        (total, binary, interp)
274    }
275
276    pub fn dump_variables(&self) -> String {
277        format!("{}", self.module_variables)
278    }
279
280    /// Returns (jit_count, total_count) of top-level statements across all events and comb.
281    pub fn jit_stats(&self) -> (usize, usize) {
282        let mut jit = 0;
283        let mut total = 0;
284        for stmts in self.event_statements.values() {
285            for s in stmts {
286                total += 1;
287                if s.is_compiled() {
288                    jit += 1;
289                }
290            }
291        }
292        for s in &self.comb_statements {
293            total += 1;
294            if s.is_compiled() {
295                jit += 1;
296            }
297        }
298        (jit, total)
299    }
300
301    /// Returns detailed stats: (comb_jit, comb_interp, event_jit, event_interp)
302    pub fn detailed_stats(&self) -> (usize, usize, usize, usize) {
303        let mut comb_jit = 0;
304        let mut comb_interp = 0;
305        let mut event_jit = 0;
306        let mut event_interp = 0;
307        for s in &self.comb_statements {
308            if s.is_compiled() {
309                comb_jit += 1;
310            } else {
311                comb_interp += 1;
312            }
313        }
314        for stmts in self.event_statements.values() {
315            for s in stmts {
316                if s.is_compiled() {
317                    event_jit += 1;
318                } else {
319                    event_interp += 1;
320                }
321            }
322        }
323        (comb_jit, comb_interp, event_jit, event_interp)
324    }
325}
326
327/// Inline-friendly dispatch for the per-cycle hot loop.  Handles the
328/// common JIT cases (Compiled / CompiledBatch) with a direct indirect call
329/// and falls back to `Statement::eval_step` for the interpreter path.
330///
331/// Inlining at the call site removes the (otherwise non-inlined)
332/// `Statement::eval_step` function-call frame plus the 10-arm match
333/// jump it performs.
334#[inline(always)]
335pub fn dispatch_stmt_fast(s: &Statement, mask_cache: &mut MaskCache) {
336    match s {
337        Statement::Compiled(c) => unsafe {
338            (c.artifact.func)(c.ff, c.comb, c.log_buf);
339        },
340        Statement::CompiledBatch(c) => unsafe {
341            let f = c.artifact.func;
342            for &(ff, comb) in &c.args {
343                f(ff, comb, c.log_buf);
344            }
345        },
346        _ => {
347            s.eval_step(mask_cache);
348        }
349    }
350}
351
352// SAFETY: Each Ir exclusively owns its ff_values/comb_values buffers.
353// Raw pointers in Statements point into these buffers — no cross-Ir aliasing.
354// `Arc<ChunkArtifact>` handles inside `Statement::Compiled` / `CompiledBlockStatement`
355// keep JIT code pages alive (via the artifact's keepalive field).
356// NOTE: Ir is intentionally NOT Sync. Sharing &Ir across threads would allow
357// concurrent mutation of ff_values/comb_values via interior raw pointers.
358unsafe impl Send for Ir {}
359
360/// Initial WriteLogBuffer capacities derived from the FF write site table.
361/// Returns `(narrow_cap, wide_cap)`.  Narrow FFs (`native_bytes ≤ 8`) emit
362/// at most 2 entries per cycle (payload + 4-state mask); wide FFs emit at
363/// most 2 wide entries per cycle (one per payload/mask).  Each contributes
364/// to its respective pool, with a ×2 over-provisioning headroom for
365/// initial dual-writes and multi-RMW chains.
366fn write_log_capacity(site_table: &site_table::SiteTable) -> (usize, usize) {
367    let mut narrow: usize = 0;
368    let mut wide: usize = 0;
369    let mut any_wide = false;
370    for s in &site_table.sites {
371        let nb = s.native_bytes as usize;
372        if nb <= 8 {
373            narrow += 2 * 2;
374        } else {
375            any_wide = true;
376            // Number of wide entries needed (≤56 byte payload per entry).
377            let chunks = nb.div_ceil(write_log::WRITE_LOG_WIDE_ENTRY_PAYLOAD_BYTES);
378            wide += 2 * chunks * 2;
379        }
380    }
381    // Narrow floor avoids tiny designs ending up with zero capacity; the
382    // wide pool stays empty when no wide sites exist so designs that only
383    // use narrow FFs skip the 64-byte-aligned wide allocation altogether.
384    let narrow_cap = narrow.max(4096);
385    let wide_cap = if any_wide { wide.max(64) } else { 0 };
386    (narrow_cap, wide_cap)
387}
388
389pub fn build_ir(ir: &air::Ir, top: StrId, config: &Config) -> Result<Ir, SimulatorError> {
390    for x in &ir.components {
391        if let air::Component::Module(x) = x
392            && top == x.name
393        {
394            let token = x.token;
395            let mut context = context::Context {
396                config: config.clone(),
397                backends: BackendRegistry::for_config(config),
398                ..Default::default()
399            };
400            let proto: ProtoModule = Conv::conv(&mut context, x)?;
401            let module = proto.instantiate();
402            return Ok(Ir::from_module(module, config, token));
403        }
404    }
405    Err(SimulatorError::TopModuleNotFound {
406        module_name: top.to_string(),
407    })
408}
409
410struct CacheEntry {
411    proto: ProtoModule,
412    token: TokenRange,
413}
414
415/// Cache for `ProtoModule` keyed by top module name.  JIT binaries are
416/// kept alive via shared `Arc<ChunkArtifact>` handles embedded in the
417/// cached `ProtoModule`'s `CompiledBlock` statements, so the cache no
418/// longer needs a separate keepalive vector.
419#[derive(Default)]
420pub struct ProtoModuleCache {
421    entries: HashMap<StrId, CacheEntry>,
422}
423
424pub fn build_ir_cached(
425    ir: &air::Ir,
426    top: StrId,
427    config: &Config,
428    cache: &mut ProtoModuleCache,
429) -> Result<Ir, SimulatorError> {
430    // Cache hit: reuse ProtoModule, just instantiate with fresh buffers
431    if let Some(entry) = cache.entries.get(&top) {
432        let module = entry.proto.instantiate();
433        return Ok(Ir::from_module(module, config, entry.token));
434    }
435
436    // Cache miss: run Conv::conv
437    for x in &ir.components {
438        if let air::Component::Module(x) = x
439            && top == x.name
440        {
441            let token = x.token;
442            let mut context = context::Context {
443                config: config.clone(),
444                backends: BackendRegistry::for_config(config),
445                ..Default::default()
446            };
447
448            let proto: ProtoModule = Conv::conv(&mut context, x)?;
449            let module = proto.instantiate();
450
451            let result = Ir::from_module(module, config, token);
452
453            cache.entries.insert(top, CacheEntry { proto, token });
454
455            return Ok(result);
456        }
457    }
458    Err(SimulatorError::TopModuleNotFound {
459        module_name: top.to_string(),
460    })
461}
462
463#[derive(Clone, Debug, Default)]
464pub struct Config {
465    pub use_4state: bool,
466    pub use_jit: bool,
467    pub dump_cranelift: bool,
468    pub dump_asm: bool,
469    /// Force all always_ff variables to FF (disable is_ff refinement).
470    pub disable_ff_opt: bool,
471    /// `cc` backend: emit comb as C, compile externally, and dispatch the
472    /// `.so` instead of the Cranelift loop (which still covers stmts it can't
473    /// emit, so keep `use_jit` true).  Default false; `--backend cc` enables it.
474    pub aot_c: bool,
475    /// `cc` backend event path: also emit the per-event FF-next + write-log.
476    /// Requires `aot_c`.
477    pub aot_c_event: bool,
478    /// Compile the `.so` on a background thread and hot-swap from Cranelift
479    /// once ready, hiding the cold compile latency.  Requires `aot_c`; forced
480    /// off under `aot_c_validate` (validation must dual-run from cycle 0).
481    pub aot_c_async: bool,
482    /// Dual-run `cc` and Cranelift every cycle, panicking on the first
483    /// divergence (correctness check).  Implies a synchronous compile.
484    pub aot_c_validate: bool,
485    /// Minimum module statement count (comb + event) before `cc` is attempted.
486    /// The external compile is a fixed per-module cost; on tiny modules it is
487    /// pure overhead and floods the host across the fast suite.  Default 0 (no
488    /// floor, so tests exercise the path); `--backend cc` raises it to 256.
489    pub aot_c_min_stmts: usize,
490}
491
492impl Config {
493    /// Apply environment-variable overrides on top of an existing config.
494    pub fn apply_env(&mut self) {
495        if std::env::var("VERYL_DUMP_ASM").ok().as_deref() == Some("1") {
496            self.dump_asm = true;
497        }
498        if std::env::var("VERYL_DUMP_CRANELIFT").ok().as_deref() == Some("1") {
499            self.dump_cranelift = true;
500        }
501        // AOT-C ("cc" backend) env overrides.  The CLI `--backend` is the
502        // primary control; these let callers force a sub-feature on/off (e.g.
503        // bisect a divergence, or disable async for a deterministic profile)
504        // without a flag.  `=1` enables, `=0` disables; anything else leaves
505        // the value untouched.
506        let env_bool = |k: &str| match std::env::var(k).ok().as_deref() {
507            Some("1") => Some(true),
508            Some("0") => Some(false),
509            _ => None,
510        };
511        if let Some(v) = env_bool("VERYL_AOT_C") {
512            self.aot_c = v;
513        }
514        if let Some(v) = env_bool("VERYL_AOT_C_EVENT") {
515            self.aot_c_event = v;
516        }
517        if let Some(v) = env_bool("VERYL_AOT_C_ASYNC") {
518            self.aot_c_async = v;
519        }
520        if let Some(v) = env_bool("VERYL_AOT_C_VALIDATE") {
521            self.aot_c_validate = v;
522        }
523        if let Ok(n) = std::env::var("VERYL_AOT_C_MIN_STMTS")
524            && let Ok(n) = n.parse::<usize>()
525        {
526            self.aot_c_min_stmts = n;
527        }
528    }
529}
530
531// `cc_available()` has moved to `crate::backend::aot_c`.
532
533impl Config {
534    pub fn all() -> Vec<Config> {
535        let mut ret = vec![];
536
537        // `use_jit = true` is meaningful only when the Cranelift backend
538        // is built in; wasm has no chunk backend, so dropping the `true`
539        // arm is purely an optimization (Config::default() already sets
540        // use_jit = false).
541        let jit_options: &[bool] = if cfg!(target_family = "wasm") {
542            &[false]
543        } else {
544            &[false, true]
545        };
546
547        for use_4state in [false, true] {
548            for &use_jit in jit_options {
549                for disable_ff_opt in [false, true] {
550                    ret.push(Config {
551                        use_4state,
552                        use_jit,
553                        disable_ff_opt,
554                        ..Default::default()
555                    });
556                }
557            }
558        }
559
560        // `cc` backend variants: 2-state only, Cranelift fallback for uncovered
561        // stmts (use_jit stays true).  Sync compile — async's swap point varies
562        // with timing, but tests must dual-check cc deterministically vs the
563        // golden output.  Gated on cc_available so cc-less hosts still run.
564        #[cfg(not(target_family = "wasm"))]
565        if backend::aot_c::cc_available() {
566            for disable_ff_opt in [false, true] {
567                ret.push(Config {
568                    use_4state: false,
569                    use_jit: true,
570                    disable_ff_opt,
571                    aot_c: true,
572                    aot_c_event: true,
573                    aot_c_async: false,
574                    ..Default::default()
575                });
576            }
577        }
578
579        ret
580    }
581}
582
583// `lookup_comb_offset` has moved to `backend::validate`.