Skip to main content

dsfb_gpu_debug_core/
event.rs

1//! The canonical trace-event record.
2//!
3//! `TraceEvent` is the input atom of the DSFB-GPU-Debug pipeline. Each
4//! record represents one observable point in the debug catalog — a span
5//! emitted by a tracing system, a latency sample, an error stamp. The shape
6//! is intentionally narrow: we keep only the fields the downstream pipeline
7//! actually reads, so the canonical byte form is small and the hash chain
8//! stays cheap to compute.
9//!
10//! Layout decisions:
11//!
12//! * `#[repr(C)]` so the struct can cross the FFI boundary into CUDA
13//!   without per-field marshaling. The CUDA-side mirror in
14//!   `cuda/layout.cuh` (introduced in Section H) lays the fields out
15//!   identically.
16//! * Field order is part of the in-crate canonical trace contract. Changing
17//!   this order would change the catalog hash and therefore the case-file
18//!   hash, so the order is not to be reshuffled.
19//! * Widths chosen for the bounded v0 demo: `latency_us: u32` covers up to
20//!   ~71 minutes of latency (way beyond the contract clamp of 32 767 ms),
21//!   `entity_id` and `route_id` are `u32` to leave room without paying for
22//!   `u64`, and the 16-bit status/error/kind/flags fields keep the total
23//!   record size small.
24//!
25//! Stability: this type is deserialized from the canonical JSON fixture and
26//! re-serialized into the case file's `input_catalog_hash`. The
27//! serialization order is fixed by `serialize::write_event` (canonical key
28//! ordering); modifying field names or order requires bumping the contract
29//! version.
30
31/// A single observable trace point.
32///
33/// All fields are unsigned; the deserializer rejects negative literals.
34/// The constructors offered here are intentionally limited — callers should
35/// build events via the fixture synthesizer or by parsing canonical JSON,
36/// not by direct field assignment, so the bit-stability rules around field
37/// widths are honored automatically.
38#[repr(C)]
39#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, Default)]
40pub struct TraceEvent {
41    /// Wall-clock timestamp, nanoseconds since the synthetic epoch (event
42    /// 0). The pipeline derives the window index from this field via
43    /// integer division by the contract's `window_size_ms * 1_000_000`.
44    pub ts_ns: u64,
45    /// The entity (service, process, sensor) the event belongs to.
46    /// Bounded by `n_entities` in the contract.
47    pub entity_id: u32,
48    /// The route within the entity (e.g. an HTTP endpoint or a sensor
49    /// channel). Not used by the v0 pipeline for windowing but carried
50    /// into the canonical bytes so the hash chain reflects the full
51    /// fixture shape.
52    pub route_id: u32,
53    /// Tracing span identifier. Carried for fidelity of the catalog hash;
54    /// not consumed by the residual/sign stages.
55    pub span_id: u64,
56    /// Parent span identifier, or `0` for a root.
57    pub parent_span_id: u64,
58    /// Observed latency in microseconds. Clamped to
59    /// `contract.numeric.latency_clamp_ms * 1000` at the residual boundary
60    /// to keep Q16.16 quantization in range.
61    pub latency_us: u32,
62    /// HTTP-style status code (or a domain-specific equivalent). Carried
63    /// through to the canonical bytes; the v0 pipeline checks for the
64    /// error class via `error_code` not `status_code`.
65    pub status_code: u16,
66    /// Domain error code, with `0` meaning no error. Drives the error-rate
67    /// derivative of the residual.
68    pub error_code: u16,
69    /// Free-form event-type discriminator (request, response, log,
70    /// sample). Not consumed in v0 but preserved in the catalog hash.
71    pub event_kind: u16,
72    /// Bit-field for miscellaneous flags (sampled, replayed, simulated).
73    /// Preserved verbatim for the catalog hash.
74    pub flags: u16,
75}
76
77impl TraceEvent {
78    /// Construct a fully-specified `TraceEvent`. Used by the fixture
79    /// synthesizer and tests. There is no builder pattern on purpose —
80    /// the type is small enough that positional construction is more
81    /// auditable than a fluent builder.
82    #[must_use]
83    #[allow(clippy::too_many_arguments)]
84    pub const fn new(
85        ts_ns: u64,
86        entity_id: u32,
87        route_id: u32,
88        span_id: u64,
89        parent_span_id: u64,
90        latency_us: u32,
91        status_code: u16,
92        error_code: u16,
93        event_kind: u16,
94        flags: u16,
95    ) -> Self {
96        Self {
97            ts_ns,
98            entity_id,
99            route_id,
100            span_id,
101            parent_span_id,
102            latency_us,
103            status_code,
104            error_code,
105            event_kind,
106            flags,
107        }
108    }
109
110    /// The window index this event falls into, given a window size in
111    /// nanoseconds. Integer division; ties go to the lower window.
112    #[must_use]
113    pub const fn window_index(&self, window_size_ns: u64) -> u32 {
114        // Saturate at u32::MAX rather than wrapping if the synthetic clock
115        // ever exceeds 2^32 windows; the contract's bounded fixtures stay
116        // well under that.
117        let raw = self.ts_ns / window_size_ns;
118        if raw > u32::MAX as u64 {
119            u32::MAX
120        } else {
121            raw as u32
122        }
123    }
124}
125
126/// R.11c — compact GPU-ingest projection of `TraceEvent`. Carries
127/// only the four fields the `window_feature_kernel_structured`
128/// kernel actually reads (`ts_ns`, `entity_id`, `latency_us`, and
129/// the `error_code != 0` flag), packed into 16 bytes. Throughput
130/// dispatches H2D this projection instead of the 48-byte audit-
131/// grade `TraceEvent`, cutting PCIe payload ~3× at full scale
132/// (192 MB → 64 MB at K=128 256×4096).
133///
134/// **Audit invariance**: the 48-byte `TraceEvent` byte form, the
135/// Audit-mode FFI, and every D16 audit-chain golden hash are
136/// untouched. The projection is opt-in to the D64 throughput path.
137///
138/// **Provenance**: the projection is a deterministic function of
139/// `TraceEvent[]`. An auditor can re-pack the events via
140/// `GpuTraceEventCompact::from_trace_event` and verify the recorded
141/// `compact_event_projection_hash` (the SHA-256 over the packed
142/// byte stream) matches. The hash is surfaced through the dispatch
143/// diagnostic so a verifier can confirm the compact bytes weren't
144/// silently substituted between catalog ingest and the kernel.
145///
146/// **Byte layout (16 bytes, 8-byte aligned, repr(C))**:
147///   offset  field                  type
148///        0  `ts_ns`                u64
149///        8  `entity_and_error`     u32   (low 31 bits = entity_id,
150///                                         high bit = error_code != 0)
151///       12  `latency_us`           u32
152///
153/// `entity_id` is bounded to 31 bits (max 2^31 − 1). All fixtures
154/// the v0 plan supports cap `n_entities` at < 2^15.
155#[repr(C)]
156#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, Default)]
157pub struct GpuTraceEventCompact {
158    /// Wall-clock timestamp, nanoseconds since the synthetic epoch.
159    pub ts_ns: u64,
160    /// Bit-packed `(entity_id, error_flag)`. Low 31 bits carry
161    /// `entity_id`; bit 31 is `1` iff the original event's
162    /// `error_code` was non-zero.
163    pub entity_and_error: u32,
164    /// Observed latency in microseconds. Carried as the full
165    /// `u32` from `TraceEvent` so any clamp behaviour upstream
166    /// is preserved byte-for-byte.
167    pub latency_us: u32,
168}
169
170impl GpuTraceEventCompact {
171    /// Fixed byte width of one compact event. Mirrored on the GPU
172    /// side as `cuda/layout.cuh::GpuTraceEventCompact`.
173    pub const SIZE: usize = 16;
174    /// Bit-31 flag in `entity_and_error` indicating
175    /// `TraceEvent::error_code != 0`.
176    pub const ERROR_BIT: u32 = 1u32 << 31;
177    /// Bit-mask for the `entity_id` field within
178    /// `entity_and_error`.
179    pub const ENTITY_MASK: u32 = 0x7FFF_FFFF;
180
181    /// Pack a single `TraceEvent` into the compact projection. The
182    /// resulting bytes are a deterministic function of the input;
183    /// two events that compare equal under
184    /// `PartialEq<TraceEvent>` produce identical compact bytes.
185    #[must_use]
186    pub const fn from_trace_event(ev: &TraceEvent) -> Self {
187        let entity_bits = ev.entity_id & Self::ENTITY_MASK;
188        let error_bit = if ev.error_code != 0 {
189            Self::ERROR_BIT
190        } else {
191            0
192        };
193        Self {
194            ts_ns: ev.ts_ns,
195            entity_and_error: entity_bits | error_bit,
196            latency_us: ev.latency_us,
197        }
198    }
199
200    /// Recover the entity id stored in the low 31 bits of
201    /// `entity_and_error`.
202    #[must_use]
203    pub const fn entity_id(&self) -> u32 {
204        self.entity_and_error & Self::ENTITY_MASK
205    }
206
207    /// `true` iff the original `TraceEvent::error_code` was
208    /// non-zero. Stored as bit 31 of `entity_and_error` so the
209    /// compact projection holds in 16 bytes.
210    #[must_use]
211    pub const fn error_nonzero(&self) -> bool {
212        (self.entity_and_error & Self::ERROR_BIT) != 0
213    }
214}
215
216/// R.11c — deterministically pack `events` into the compact GPU
217/// projection. Throughput-dispatch helper; the audit path uses the
218/// full 48-byte `TraceEvent` slice and is unchanged.
219#[cfg(feature = "std")]
220#[must_use]
221pub fn pack_compact_event_projection(events: &[TraceEvent]) -> std::vec::Vec<GpuTraceEventCompact> {
222    events
223        .iter()
224        .map(GpuTraceEventCompact::from_trace_event)
225        .collect()
226}
227
228/// R.11c — SHA-256 over a compact-event slice's canonical byte
229/// form. Used as the throughput-path provenance anchor: a verifier
230/// re-packs `TraceEvent[]` via [`pack_compact_event_projection`]
231/// and checks the recorded hash matches.
232///
233/// The canonical byte form is little-endian per field
234/// (`ts_ns` u64 → 8 LE bytes, `entity_and_error` u32 → 4 LE bytes,
235/// `latency_us` u32 → 4 LE bytes), concatenated in cell order.
236/// Serialised explicitly via `to_le_bytes` so the hash is
237/// reproducible without relying on the host's in-memory layout
238/// and stays compatible with `forbid(unsafe_code)`.
239#[cfg(feature = "std")]
240#[must_use]
241pub fn compact_event_projection_hash(compact: &[GpuTraceEventCompact]) -> [u8; 32] {
242    let mut buf: std::vec::Vec<u8> =
243        std::vec::Vec::with_capacity(compact.len() * GpuTraceEventCompact::SIZE);
244    for ev in compact {
245        buf.extend_from_slice(&ev.ts_ns.to_le_bytes());
246        buf.extend_from_slice(&ev.entity_and_error.to_le_bytes());
247        buf.extend_from_slice(&ev.latency_us.to_le_bytes());
248    }
249    crate::hash::sha256(&buf)
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255
256    #[test]
257    fn window_index_partitions_at_boundaries() {
258        let ev = TraceEvent {
259            ts_ns: 1_500_000_000,
260            ..TraceEvent::default()
261        };
262        // 1.5 seconds at a 1-second window → index 1.
263        assert_eq!(ev.window_index(1_000_000_000), 1);
264
265        let edge = TraceEvent {
266            ts_ns: 2_000_000_000,
267            ..TraceEvent::default()
268        };
269        // Exact boundary lands in the next window per integer-division semantics.
270        assert_eq!(edge.window_index(1_000_000_000), 2);
271    }
272
273    #[test]
274    fn window_index_floors_at_zero() {
275        let ev = TraceEvent::default();
276        assert_eq!(ev.window_index(1_000_000_000), 0);
277    }
278
279    #[test]
280    fn struct_size_is_stable() {
281        // The size of the canonical record is part of the prior-art posture
282        // because the CUDA layout in `cuda/layout.cuh` mirrors it. If this
283        // assertion changes, the CUDA mirror needs to change with it. The
284        // expected size is the sum of declared field widths plus trailing
285        // alignment padding to a multiple of 8 (the largest alignment).
286        //
287        // Field sizes (bytes): 8+4+4+8+8+4+2+2+2+2 = 44, padded to 48.
288        assert_eq!(core::mem::size_of::<TraceEvent>(), 48);
289    }
290}