veryl-simulator 0.20.0

A modern hardware description language
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
use crate::HashMap;
use std::fmt;
use veryl_analyzer::ir as air;
use veryl_analyzer::ir::{Type, VarId, VarPath};
use veryl_analyzer::value::Value;
use veryl_parser::resource_table::StrId;

/// Typed variable offset that encodes buffer identity (FF or Comb).
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum VarOffset {
    Ff(isize),
    Comb(isize),
}

impl VarOffset {
    #[inline]
    pub fn is_ff(&self) -> bool {
        matches!(self, VarOffset::Ff(_))
    }
    #[inline]
    pub fn raw(&self) -> isize {
        match self {
            VarOffset::Ff(o) | VarOffset::Comb(o) => *o,
        }
    }
    #[inline]
    pub fn adjust(&self, ff_delta: isize, comb_delta: isize) -> Self {
        match self {
            VarOffset::Ff(o) => VarOffset::Ff(o + ff_delta),
            VarOffset::Comb(o) => VarOffset::Comb(o + comb_delta),
        }
    }
    #[inline]
    pub fn new(is_ff: bool, offset: isize) -> Self {
        if is_ff {
            VarOffset::Ff(offset)
        } else {
            VarOffset::Comb(offset)
        }
    }
    #[inline]
    pub fn to_pair(&self) -> (bool, isize) {
        (self.is_ff(), self.raw())
    }
}

/// Returns native storage width in bytes: 4 for width <= 32, 8 for 33-64, 16 for 65-128,
/// and 8-byte aligned for >128.
pub fn native_bytes(width: usize) -> usize {
    if width <= 32 {
        4
    } else if width <= 64 {
        8
    } else if width <= 128 {
        16
    } else {
        width.div_ceil(64) * 8
    }
}

/// Comb-only packed-storage native width (1-8 -> 1, 9-16 -> 2, 17-32 -> 4,
/// 33-64 -> 8, 65-128 -> 16, >128 -> 8-byte aligned).  Reduces the working
/// set for narrow comb signals; FF storage stays at the unpacked sizing so
/// ff_swap / ff_commit SIMD layouts are unaffected.
#[inline]
pub fn native_bytes_packed(width: usize) -> usize {
    if width <= 8 {
        1
    } else if width <= 16 {
        2
    } else if width <= 32 {
        4
    } else if width <= 64 {
        8
    } else if width <= 128 {
        16
    } else {
        width.div_ceil(64) * 8
    }
}

/// Storage-context native width selector.  `is_ff = true` -> always
/// `native_bytes(width)`.  `is_ff = false` and `VERYL_NATIVE_U8_COMB=1` ->
/// `native_bytes_packed(width)`; otherwise the same as `native_bytes(width)`.
/// All callers that compute a storage byte width for a variable load/store
/// must use this; op-level intermediate widths keep using `native_bytes`.
#[inline]
pub fn native_bytes_for(width: usize, is_ff: bool) -> usize {
    use std::sync::OnceLock;
    static COMB_PACKED: OnceLock<bool> = OnceLock::new();
    let comb_packed = *COMB_PACKED
        .get_or_init(|| std::env::var("VERYL_NATIVE_U8_COMB").ok().as_deref() == Some("1"));
    if !is_ff && comb_packed {
        native_bytes_packed(width)
    } else {
        native_bytes(width)
    }
}

/// Returns the byte size of a single value slot (payload + optional mask_xz).
pub fn value_size(native_bytes: usize, use_4state: bool) -> usize {
    if use_4state {
        native_bytes * 2
    } else {
        native_bytes
    }
}

/// Read a native-width payload from a byte buffer pointer.
#[inline(always)]
#[allow(clippy::not_unsafe_ptr_arg_deref)]
pub fn read_payload(ptr: *const u8, nb: usize) -> u64 {
    unsafe {
        match nb {
            1 => ptr.read_unaligned() as u64,
            2 => (ptr as *const u16).read_unaligned() as u64,
            4 => (ptr as *const u32).read_unaligned() as u64,
            8 => (ptr as *const u64).read_unaligned(),
            _ => unreachable!("read_payload called with nb={}, expected 1, 2, 4 or 8", nb),
        }
    }
}

/// Read a 128-bit native-width payload from a byte buffer pointer.
#[inline(always)]
pub fn read_payload_128(ptr: *const u8) -> u128 {
    unsafe { (ptr as *const u128).read_unaligned() }
}

/// Write a native-width payload to a byte buffer pointer.
#[inline(always)]
#[allow(clippy::not_unsafe_ptr_arg_deref)]
pub fn write_payload(ptr: *mut u8, nb: usize, val: u64) {
    unsafe {
        match nb {
            1 => ptr.write_unaligned(val as u8),
            2 => (ptr as *mut u16).write_unaligned(val as u16),
            4 => (ptr as *mut u32).write_unaligned(val as u32),
            8 => (ptr as *mut u64).write_unaligned(val),
            _ => unreachable!("write_payload called with nb={}, expected 1, 2, 4 or 8", nb),
        }
    }
}

/// Write a 128-bit native-width payload to a byte buffer pointer.
#[inline(always)]
pub fn write_payload_128(ptr: *mut u8, val: u128) {
    unsafe { (ptr as *mut u128).write_unaligned(val) }
}

/// Read a full Value from native byte storage.
///
/// # Safety
/// `ptr` must point to a valid buffer of at least `nb * (1 + use_4state as usize)` bytes.
pub unsafe fn read_native_value(
    ptr: *const u8,
    nb: usize,
    use_4state: bool,
    width: u32,
    signed: bool,
) -> Value {
    unsafe {
        if nb > 16 {
            let payload = std::slice::from_raw_parts(ptr, nb);
            let mask_xz_slice: &[u8];
            let zeros;
            if use_4state {
                mask_xz_slice = std::slice::from_raw_parts(ptr.add(nb), nb);
            } else {
                zeros = vec![0u8; nb];
                mask_xz_slice = &zeros;
            }
            Value::from_le_bytes(payload, mask_xz_slice, width as usize, signed)
        } else if nb == 16 {
            let payload = read_payload_128(ptr);
            let mask_xz = if use_4state {
                read_payload_128(ptr.add(nb))
            } else {
                0u128
            };
            Value::from_u128(payload, mask_xz, width as usize, signed)
        } else {
            let payload = read_payload(ptr, nb);
            let mask_xz = if use_4state {
                read_payload(ptr.add(nb), nb)
            } else {
                0
            };
            Value::U64(veryl_analyzer::value::ValueU64 {
                payload,
                mask_xz,
                width,
                signed,
            })
        }
    }
}

/// Write a Value into native byte storage.
///
/// # Safety
/// `ptr` must point to a valid buffer of at least `nb * (1 + use_4state as usize)` bytes.
pub unsafe fn write_native_value(ptr: *mut u8, nb: usize, use_4state: bool, val: &Value) {
    unsafe {
        if nb > 16 {
            let payload_buf = std::slice::from_raw_parts_mut(ptr, nb);
            val.write_payload_to_bytes(payload_buf);
            if use_4state {
                let mask_xz_buf = std::slice::from_raw_parts_mut(ptr.add(nb), nb);
                val.write_mask_xz_to_bytes(mask_xz_buf);
            }
        } else if nb == 16 {
            let payload = val.payload_u128();
            write_payload_128(ptr, payload);
            if use_4state {
                let mask_xz = val.mask_xz_u128();
                write_payload_128(ptr.add(nb), mask_xz);
            }
        } else {
            match val {
                Value::U64(v) => {
                    write_payload(ptr, nb, v.payload);
                    if use_4state {
                        write_payload(ptr.add(nb), nb, v.mask_xz);
                    }
                }
                Value::BigUint(_) => {
                    unreachable!("BigUint with nb < 16");
                }
            }
        }
    }
}

#[derive(Clone, Debug)]
pub struct Variable {
    pub path: VarPath,
    pub r#type: Type,
    pub width: usize,
    pub native_bytes: usize,
    pub current_values: Vec<*mut u8>,
    pub next_values: Vec<*mut u8>,
}

impl fmt::Display for Variable {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let mut ret = String::new();

        for (i, &ptr) in self.current_values.iter().enumerate() {
            let value = unsafe {
                read_native_value(ptr, self.native_bytes, false, self.width as u32, false)
            };
            ret.push_str(&format!("{}[{}] = {:x};\n", self.path, i, value));
        }

        ret.trim_end().fmt(f)
    }
}

#[derive(Clone, Debug)]
pub struct VariableElement {
    /// Native storage width in bytes (4 or 8)
    pub native_bytes: usize,
    /// Typed byte offset of current value from ff_buf[0] (Ff) or comb_buf[0] (Comb)
    pub current: VarOffset,
    /// byte offset of next value from ff_buf[0]; meaningful only when is_ff == true
    pub next_offset: isize,
}

impl VariableElement {
    #[inline]
    pub fn is_ff(&self) -> bool {
        self.current.is_ff()
    }
    #[inline]
    pub fn current_offset(&self) -> isize {
        self.current.raw()
    }
}

#[derive(Clone, Debug)]
pub struct VariableMeta {
    pub path: VarPath,
    pub r#type: Type,
    pub width: usize,
    pub native_bytes: usize,
    pub elements: Vec<VariableElement>,
    /// initial value for each element; used when instantiating
    pub initial_values: Vec<Value>,
}

impl VariableMeta {
    /// Returns (base_current_offset, base_next_offset, stride, is_ff) for dynamic indexing.
    pub fn dynamic_index_info(&self) -> Option<(isize, isize, isize, bool)> {
        let first = self.elements.first()?;
        let is_ff = first.is_ff();
        #[cfg(debug_assertions)]
        for (i, elem) in self.elements.iter().enumerate() {
            debug_assert_eq!(
                elem.is_ff(),
                is_ff,
                "dynamic_index_info: mixed FF/comb in array, elem[{}] is_ff={} but elem[0] is_ff={} (path={:?})",
                i,
                elem.is_ff(),
                is_ff,
                self.path,
            );
        }
        let stride = if self.elements.len() > 1 {
            self.elements[1].current_offset() - self.elements[0].current_offset()
        } else {
            // Single-element: compute stride from native_bytes
            // FF: [current vs][next vs] → stride = 2 * vs
            // Comb: [vs] → stride = vs
            // vs is already accounted for in the layout, but for single elements
            // we need to provide a sensible stride for bounds checking.
            // Use the offset gap between current and next as the half-stride for FF.
            if is_ff {
                (first.next_offset - first.current_offset()) * 2
            } else {
                // Cannot determine from single element; use value_size as stride
                // This only matters for dynamic index bounds, so any positive value works.
                first.native_bytes as isize
            }
        };
        Some((first.current_offset(), first.next_offset, stride, is_ff))
    }
}

/// Compute offset-based VariableMeta for each variable using native-width byte storage.
///
/// Returns `(variable_meta, ff_bytes, comb_bytes)`.
/// `ff_bytes` / `comb_bytes` are the number of bytes allocated by this module only
/// (not including the start offset).
/// Iterates variables sorted by VarId so the iteration order is deterministic
/// and matches the buffer allocation order in `fill_buffers`.
pub fn create_variable_meta(
    src: &HashMap<VarId, air::Variable>,
    ff_table: &air::FfTable,
    use_4state: bool,
    ff_start_bytes: isize,
    comb_start_bytes: isize,
) -> Option<(HashMap<VarId, VariableMeta>, usize, usize)> {
    let mut ff_pos: isize = ff_start_bytes;
    let mut comb_pos: isize = comb_start_bytes;

    let mut src_sorted: Vec<_> = src.iter().collect();
    src_sorted.sort_by_key(|(k, _)| **k);

    let mut variables = HashMap::default();

    for (k, v) in src_sorted {
        // `string`-typed params/consts have no well-defined native byte
        // width; they are always comptime-inlined so no runtime storage
        // is needed.
        if matches!(v.kind, air::VarKind::Param | air::VarKind::Const)
            && v.r#type.kind == air::TypeKind::String
        {
            continue;
        }
        let width = v.r#type.total_width()?;

        // Analyzer collapses all-same initial arrays down to a single
        // template value (eval_variable), so derive the real array length
        // from the type.
        let total_array = v.r#type.total_array().unwrap_or(v.value.len());
        let total_array = total_array.max(v.value.len()).max(1);

        // For multi-element variables (arrays), all elements must have the
        // same FF/comb classification. DynamicVariable expressions assume
        // uniform stride in a single buffer; mixed placement is invalid.
        let any_ff = (0..total_array).any(|i| ff_table.is_ff(v.id, i));
        let force_ff = any_ff && total_array > 1;

        // Per-Variable storage destination is uniform: arrays via force_ff,
        // scalars via the single is_ff query.  This drives the storage-side
        // native width selection (comb-only packed under VERYL_NATIVE_U8_COMB).
        let is_ff_var = if total_array > 1 {
            force_ff
        } else {
            ff_table.is_ff(v.id, 0)
        };
        let nb = native_bytes_for(width, is_ff_var);
        let vs = value_size(nb, use_4state);

        // `v.value.len() < total_array` means the analyzer supplied only
        // the template entry — replicate it across all elements.
        let template_mode = v.value.len() < total_array;

        let mut elements = Vec::with_capacity(total_array);
        let mut initial_values = if template_mode {
            Vec::with_capacity(1)
        } else {
            Vec::with_capacity(total_array)
        };

        for i in 0..total_array {
            if force_ff || ff_table.is_ff(v.id, i) {
                let current_offset = ff_pos;
                let next_offset = ff_pos + vs as isize;
                elements.push(VariableElement {
                    native_bytes: nb,
                    current: VarOffset::Ff(current_offset),
                    next_offset,
                });
                ff_pos += (vs * 2) as isize; // current + next
            } else {
                let current_offset = comb_pos;
                elements.push(VariableElement {
                    native_bytes: nb,
                    current: VarOffset::Comb(current_offset),
                    next_offset: 0,
                });
                comb_pos += vs as isize;
            }

            if template_mode && i > 0 {
                continue;
            }
            let raw = if let Some(val) = v.value.get(i) {
                val.clone()
            } else if let Some(template) = v.value.first() {
                template.clone()
            } else {
                Value::new_x(width, v.r#type.signed)
            };
            let mut val = raw;
            if !use_4state {
                val.clear_xz();
            }
            initial_values.push(val);
        }

        let meta = VariableMeta {
            path: v.path.clone(),
            r#type: v.r#type.clone(),
            width,
            native_bytes: nb,
            elements,
            initial_values,
        };
        variables.insert(*k, meta);
    }

    #[cfg(debug_assertions)]
    {
        let ff_end = ff_pos;
        let comb_end = comb_pos;
        for meta in variables.values() {
            for elem in &meta.elements {
                let off = elem.current_offset();
                match elem.current {
                    VarOffset::Ff(_) => debug_assert!(
                        off >= ff_start_bytes && off < ff_end,
                        "FF offset {} out of range [{}, {})",
                        off,
                        ff_start_bytes,
                        ff_end
                    ),
                    VarOffset::Comb(_) => debug_assert!(
                        off >= comb_start_bytes && off < comb_end,
                        "Comb offset {} out of range [{}, {})",
                        off,
                        comb_start_bytes,
                        comb_end
                    ),
                }
            }
        }
    }

    Some((
        variables,
        (ff_pos - ff_start_bytes) as usize,
        (comb_pos - comb_start_bytes) as usize,
    ))
}

/// Hierarchical variable metadata tree: each module has its own variable_meta
/// and a list of child modules.
#[derive(Clone, Debug)]
pub struct ModuleVariableMeta {
    pub name: StrId,
    pub variable_meta: HashMap<VarId, VariableMeta>,
    pub children: Vec<ModuleVariableMeta>,
}

/// Hierarchical variable tree with resolved pointers into the flat buffers.
#[derive(Clone, Debug)]
pub struct ModuleVariables {
    pub name: StrId,
    pub variables: HashMap<VarId, Variable>,
    pub children: Vec<ModuleVariables>,
}

impl fmt::Display for ModuleVariables {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        self.fmt_with_indent(f, 0)
    }
}

impl ModuleVariables {
    fn fmt_with_indent(&self, f: &mut fmt::Formatter<'_>, indent: usize) -> fmt::Result {
        let prefix = "  ".repeat(indent);
        writeln!(f, "{}module {}:", prefix, self.name)?;
        let mut variables: Vec<_> = self.variables.iter().collect();
        variables.sort_by(|a, b| a.0.cmp(b.0));
        for (_, x) in variables {
            for line in format!("{}", x).lines() {
                writeln!(f, "{}  {}", prefix, line)?;
            }
        }
        for child in &self.children {
            child.fmt_with_indent(f, indent + 1)?;
        }
        Ok(())
    }
}