dsfb_gpu_debug_core/event.rs
1//! The canonical trace-event record.
2//!
3//! `TraceEvent` is the input atom of the DSFB-GPU-Debug pipeline. Each
4//! record represents one observable point in the debug catalog — a span
5//! emitted by a tracing system, a latency sample, an error stamp. The shape
6//! is intentionally narrow: we keep only the fields the downstream pipeline
7//! actually reads, so the canonical byte form is small and the hash chain
8//! stays cheap to compute.
9//!
10//! Layout decisions:
11//!
12//! * `#[repr(C)]` so the struct can cross the FFI boundary into CUDA
13//! without per-field marshaling. The CUDA-side mirror in
14//! `cuda/layout.cuh` (introduced in Section H) lays the fields out
15//! identically.
16//! * Field order is part of the in-crate canonical trace contract. Changing
17//! this order would change the catalog hash and therefore the case-file
18//! hash, so the order is not to be reshuffled.
19//! * Widths chosen for the bounded v0 demo: `latency_us: u32` covers up to
20//! ~71 minutes of latency (way beyond the contract clamp of 32 767 ms),
21//! `entity_id` and `route_id` are `u32` to leave room without paying for
22//! `u64`, and the 16-bit status/error/kind/flags fields keep the total
23//! record size small.
24//!
25//! Stability: this type is deserialized from the canonical JSON fixture and
26//! re-serialized into the case file's `input_catalog_hash`. The
27//! serialization order is fixed by `serialize::write_event` (canonical key
28//! ordering); modifying field names or order requires bumping the contract
29//! version.
30
31/// A single observable trace point.
32///
33/// All fields are unsigned; the deserializer rejects negative literals.
34/// The constructors offered here are intentionally limited — callers should
35/// build events via the fixture synthesizer or by parsing canonical JSON,
36/// not by direct field assignment, so the bit-stability rules around field
37/// widths are honored automatically.
38#[repr(C)]
39#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, Default)]
40pub struct TraceEvent {
41 /// Wall-clock timestamp, nanoseconds since the synthetic epoch (event
42 /// 0). The pipeline derives the window index from this field via
43 /// integer division by the contract's `window_size_ms * 1_000_000`.
44 pub ts_ns: u64,
45 /// The entity (service, process, sensor) the event belongs to.
46 /// Bounded by `n_entities` in the contract.
47 pub entity_id: u32,
48 /// The route within the entity (e.g. an HTTP endpoint or a sensor
49 /// channel). Not used by the v0 pipeline for windowing but carried
50 /// into the canonical bytes so the hash chain reflects the full
51 /// fixture shape.
52 pub route_id: u32,
53 /// Tracing span identifier. Carried for fidelity of the catalog hash;
54 /// not consumed by the residual/sign stages.
55 pub span_id: u64,
56 /// Parent span identifier, or `0` for a root.
57 pub parent_span_id: u64,
58 /// Observed latency in microseconds. Clamped to
59 /// `contract.numeric.latency_clamp_ms * 1000` at the residual boundary
60 /// to keep Q16.16 quantization in range.
61 pub latency_us: u32,
62 /// HTTP-style status code (or a domain-specific equivalent). Carried
63 /// through to the canonical bytes; the v0 pipeline checks for the
64 /// error class via `error_code` not `status_code`.
65 pub status_code: u16,
66 /// Domain error code, with `0` meaning no error. Drives the error-rate
67 /// derivative of the residual.
68 pub error_code: u16,
69 /// Free-form event-type discriminator (request, response, log,
70 /// sample). Not consumed in v0 but preserved in the catalog hash.
71 pub event_kind: u16,
72 /// Bit-field for miscellaneous flags (sampled, replayed, simulated).
73 /// Preserved verbatim for the catalog hash.
74 pub flags: u16,
75}
76
77impl TraceEvent {
78 /// Construct a fully-specified `TraceEvent`. Used by the fixture
79 /// synthesizer and tests. There is no builder pattern on purpose —
80 /// the type is small enough that positional construction is more
81 /// auditable than a fluent builder.
82 #[must_use]
83 #[allow(clippy::too_many_arguments)]
84 pub const fn new(
85 ts_ns: u64,
86 entity_id: u32,
87 route_id: u32,
88 span_id: u64,
89 parent_span_id: u64,
90 latency_us: u32,
91 status_code: u16,
92 error_code: u16,
93 event_kind: u16,
94 flags: u16,
95 ) -> Self {
96 Self {
97 ts_ns,
98 entity_id,
99 route_id,
100 span_id,
101 parent_span_id,
102 latency_us,
103 status_code,
104 error_code,
105 event_kind,
106 flags,
107 }
108 }
109
110 /// The window index this event falls into, given a window size in
111 /// nanoseconds. Integer division; ties go to the lower window.
112 #[must_use]
113 pub const fn window_index(&self, window_size_ns: u64) -> u32 {
114 // Saturate at u32::MAX rather than wrapping if the synthetic clock
115 // ever exceeds 2^32 windows; the contract's bounded fixtures stay
116 // well under that.
117 let raw = self.ts_ns / window_size_ns;
118 if raw > u32::MAX as u64 {
119 u32::MAX
120 } else {
121 raw as u32
122 }
123 }
124}
125
126/// R.11c — compact GPU-ingest projection of `TraceEvent`. Carries
127/// only the four fields the `window_feature_kernel_structured`
128/// kernel actually reads (`ts_ns`, `entity_id`, `latency_us`, and
129/// the `error_code != 0` flag), packed into 16 bytes. Throughput
130/// dispatches H2D this projection instead of the 48-byte audit-
131/// grade `TraceEvent`, cutting PCIe payload ~3× at full scale
132/// (192 MB → 64 MB at K=128 256×4096).
133///
134/// **Audit invariance**: the 48-byte `TraceEvent` byte form, the
135/// Audit-mode FFI, and every D16 audit-chain golden hash are
136/// untouched. The projection is opt-in to the D64 throughput path.
137///
138/// **Provenance**: the projection is a deterministic function of
139/// `TraceEvent[]`. An auditor can re-pack the events via
140/// `GpuTraceEventCompact::from_trace_event` and verify the recorded
141/// `compact_event_projection_hash` (the SHA-256 over the packed
142/// byte stream) matches. The hash is surfaced through the dispatch
143/// diagnostic so a verifier can confirm the compact bytes weren't
144/// silently substituted between catalog ingest and the kernel.
145///
146/// **Byte layout (16 bytes, 8-byte aligned, repr(C))**:
147/// offset field type
148/// 0 `ts_ns` u64
149/// 8 `entity_and_error` u32 (low 31 bits = entity_id,
150/// high bit = error_code != 0)
151/// 12 `latency_us` u32
152///
153/// `entity_id` is bounded to 31 bits (max 2^31 − 1). All fixtures
154/// the v0 plan supports cap `n_entities` at < 2^15.
155#[repr(C)]
156#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, Default)]
157pub struct GpuTraceEventCompact {
158 /// Wall-clock timestamp, nanoseconds since the synthetic epoch.
159 pub ts_ns: u64,
160 /// Bit-packed `(entity_id, error_flag)`. Low 31 bits carry
161 /// `entity_id`; bit 31 is `1` iff the original event's
162 /// `error_code` was non-zero.
163 pub entity_and_error: u32,
164 /// Observed latency in microseconds. Carried as the full
165 /// `u32` from `TraceEvent` so any clamp behaviour upstream
166 /// is preserved byte-for-byte.
167 pub latency_us: u32,
168}
169
170impl GpuTraceEventCompact {
171 /// Fixed byte width of one compact event. Mirrored on the GPU
172 /// side as `cuda/layout.cuh::GpuTraceEventCompact`.
173 pub const SIZE: usize = 16;
174 /// Bit-31 flag in `entity_and_error` indicating
175 /// `TraceEvent::error_code != 0`.
176 pub const ERROR_BIT: u32 = 1u32 << 31;
177 /// Bit-mask for the `entity_id` field within
178 /// `entity_and_error`.
179 pub const ENTITY_MASK: u32 = 0x7FFF_FFFF;
180
181 /// Pack a single `TraceEvent` into the compact projection. The
182 /// resulting bytes are a deterministic function of the input;
183 /// two events that compare equal under
184 /// `PartialEq<TraceEvent>` produce identical compact bytes.
185 #[must_use]
186 pub const fn from_trace_event(ev: &TraceEvent) -> Self {
187 let entity_bits = ev.entity_id & Self::ENTITY_MASK;
188 let error_bit = if ev.error_code != 0 {
189 Self::ERROR_BIT
190 } else {
191 0
192 };
193 Self {
194 ts_ns: ev.ts_ns,
195 entity_and_error: entity_bits | error_bit,
196 latency_us: ev.latency_us,
197 }
198 }
199
200 /// Recover the entity id stored in the low 31 bits of
201 /// `entity_and_error`.
202 #[must_use]
203 pub const fn entity_id(&self) -> u32 {
204 self.entity_and_error & Self::ENTITY_MASK
205 }
206
207 /// `true` iff the original `TraceEvent::error_code` was
208 /// non-zero. Stored as bit 31 of `entity_and_error` so the
209 /// compact projection holds in 16 bytes.
210 #[must_use]
211 pub const fn error_nonzero(&self) -> bool {
212 (self.entity_and_error & Self::ERROR_BIT) != 0
213 }
214}
215
216/// R.11c — deterministically pack `events` into the compact GPU
217/// projection. Throughput-dispatch helper; the audit path uses the
218/// full 48-byte `TraceEvent` slice and is unchanged.
219#[cfg(feature = "std")]
220#[must_use]
221pub fn pack_compact_event_projection(events: &[TraceEvent]) -> std::vec::Vec<GpuTraceEventCompact> {
222 events
223 .iter()
224 .map(GpuTraceEventCompact::from_trace_event)
225 .collect()
226}
227
228/// R.11c — SHA-256 over a compact-event slice's canonical byte
229/// form. Used as the throughput-path provenance anchor: a verifier
230/// re-packs `TraceEvent[]` via [`pack_compact_event_projection`]
231/// and checks the recorded hash matches.
232///
233/// The canonical byte form is little-endian per field
234/// (`ts_ns` u64 → 8 LE bytes, `entity_and_error` u32 → 4 LE bytes,
235/// `latency_us` u32 → 4 LE bytes), concatenated in cell order.
236/// Serialised explicitly via `to_le_bytes` so the hash is
237/// reproducible without relying on the host's in-memory layout
238/// and stays compatible with `forbid(unsafe_code)`.
239#[cfg(feature = "std")]
240#[must_use]
241pub fn compact_event_projection_hash(compact: &[GpuTraceEventCompact]) -> [u8; 32] {
242 let mut buf: std::vec::Vec<u8> =
243 std::vec::Vec::with_capacity(compact.len() * GpuTraceEventCompact::SIZE);
244 for ev in compact {
245 buf.extend_from_slice(&ev.ts_ns.to_le_bytes());
246 buf.extend_from_slice(&ev.entity_and_error.to_le_bytes());
247 buf.extend_from_slice(&ev.latency_us.to_le_bytes());
248 }
249 crate::hash::sha256(&buf)
250}
251
252#[cfg(test)]
253mod tests {
254 use super::*;
255
256 #[test]
257 fn window_index_partitions_at_boundaries() {
258 let ev = TraceEvent {
259 ts_ns: 1_500_000_000,
260 ..TraceEvent::default()
261 };
262 // 1.5 seconds at a 1-second window → index 1.
263 assert_eq!(ev.window_index(1_000_000_000), 1);
264
265 let edge = TraceEvent {
266 ts_ns: 2_000_000_000,
267 ..TraceEvent::default()
268 };
269 // Exact boundary lands in the next window per integer-division semantics.
270 assert_eq!(edge.window_index(1_000_000_000), 2);
271 }
272
273 #[test]
274 fn window_index_floors_at_zero() {
275 let ev = TraceEvent::default();
276 assert_eq!(ev.window_index(1_000_000_000), 0);
277 }
278
279 #[test]
280 fn struct_size_is_stable() {
281 // The size of the canonical record is part of the prior-art posture
282 // because the CUDA layout in `cuda/layout.cuh` mirrors it. If this
283 // assertion changes, the CUDA mirror needs to change with it. The
284 // expected size is the sum of declared field widths plus trailing
285 // alignment padding to a multiple of 8 (the largest alignment).
286 //
287 // Field sizes (bytes): 8+4+4+8+8+4+2+2+2+2 = 44, padded to 48.
288 assert_eq!(core::mem::size_of::<TraceEvent>(), 48);
289 }
290}