Skip to main content

vortex_trace/
lib.rs

1//! `vortex-trace` — Structured event tracing for deterministic simulation replay.
2//!
3//! Every significant event in the simulation is captured as a [`TraceEvent`].
4//! Given the same seed, the trace is bit-for-bit identical — enabling:
5//! - Post-mortem debugging of failed seeds
6//! - Differential replay (compare two runs)
7//! - Human-readable dump for manual inspection
8
9pub mod determinism;
10pub mod diagnosis;
11pub mod minimize;
12pub mod replay;
13mod stats;
14
15pub use determinism::{DeterminismResult, compare_traces, verify_determinism};
16pub use diagnosis::{CausalEvent, DiagnosisReport, FaultCause, ViolationInfo, diagnose};
17pub use minimize::{MinimizedTrace, minimize_faults, minimize_ticks};
18pub use stats::SimStats;
19
20use serde::{Deserialize, Serialize};
21use vortex_core::NodeId;
22
23/// A unique, monotonically increasing event identifier within a trace.
24pub type EventId = u64;
25
26/// A structured event captured during simulation.
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct TraceEvent {
29    /// Global event sequence number.
30    pub event_id: EventId,
31    /// Simulation tick when this event occurred.
32    pub tick: u64,
33    /// Node that originated this event (0 = cluster-level).
34    pub node_id: NodeId,
35    /// The event payload.
36    pub kind: TraceEventKind,
37}
38
39/// The specific kind of trace event.
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub enum TraceEventKind {
42    /// A message was sent from one node to another.
43    MessageSent {
44        to: NodeId,
45        msg_type: String,
46        size_bytes: usize,
47    },
48    /// A message was delivered to a node.
49    MessageDelivered {
50        from: NodeId,
51        msg_type: String,
52        size_bytes: usize,
53    },
54    /// A message was dropped.
55    MessageDropped {
56        from: NodeId,
57        to: NodeId,
58        reason: String,
59    },
60    /// A timer fired.
61    TimerFired { timer_type: String },
62    /// A state transition occurred.
63    StateTransition {
64        from_state: String,
65        to_state: String,
66        metadata: String,
67    },
68    /// A fault was injected.
69    FaultInjected { fault_type: String, details: String },
70    /// A fault was healed.
71    FaultHealed { fault_type: String, details: String },
72    /// A storage operation was applied.
73    StorageOp { op_type: String, key_count: usize },
74    /// A custom application event.
75    Custom { tag: String, data: String },
76}
77
78/// Collects trace events during a simulation run.
79///
80/// Single-threaded by design — the simulation loop is single-threaded.
81pub struct SimTrace {
82    events: Vec<TraceEvent>,
83    next_event_id: EventId,
84}
85
86impl SimTrace {
87    /// Create an empty trace.
88    pub fn new() -> Self {
89        Self {
90            events: Vec::new(),
91            next_event_id: 0,
92        }
93    }
94
95    /// Record a new event.
96    pub fn record(&mut self, tick: u64, node_id: NodeId, kind: TraceEventKind) {
97        self.events.push(TraceEvent {
98            event_id: self.next_event_id,
99            tick,
100            node_id,
101            kind,
102        });
103        self.next_event_id += 1;
104    }
105
106    /// Number of events recorded.
107    pub fn len(&self) -> usize {
108        self.events.len()
109    }
110
111    /// Whether the trace is empty.
112    pub fn is_empty(&self) -> bool {
113        self.events.is_empty()
114    }
115
116    /// Get all events.
117    pub fn events(&self) -> &[TraceEvent] {
118        &self.events
119    }
120
121    /// Get events filtered by node ID.
122    pub fn events_for_node(&self, node_id: NodeId) -> Vec<&TraceEvent> {
123        self.events
124            .iter()
125            .filter(|e| e.node_id == node_id)
126            .collect()
127    }
128
129    /// Get events matching a predicate.
130    pub fn events_matching<F: Fn(&TraceEventKind) -> bool>(&self, f: F) -> Vec<&TraceEvent> {
131        self.events.iter().filter(|e| f(&e.kind)).collect()
132    }
133
134    /// Filter events by tick range [start, end] (inclusive).
135    pub fn events_between(&self, start_tick: u64, end_tick: u64) -> Vec<&TraceEvent> {
136        self.events
137            .iter()
138            .filter(|e| e.tick >= start_tick && e.tick <= end_tick)
139            .collect()
140    }
141
142    /// Get the last N events.
143    pub fn last_n(&self, n: usize) -> &[TraceEvent] {
144        let start = self.events.len().saturating_sub(n);
145        &self.events[start..]
146    }
147
148    /// Follow the causal chain backward from an event.
149    pub fn causal_chain(&self, event_id: EventId) -> Vec<&TraceEvent> {
150        let mut chain = Vec::new();
151        let mut current_id = event_id;
152        let mut visited_nodes: std::collections::HashSet<NodeId> = std::collections::HashSet::new();
153
154        let start = match self.events.iter().find(|e| e.event_id == current_id) {
155            Some(e) => e,
156            None => return chain,
157        };
158
159        chain.push(start);
160        visited_nodes.insert(start.node_id);
161        let mut current_tick = start.tick;
162
163        for event in self.events.iter().rev() {
164            if event.tick > current_tick || event.event_id >= current_id {
165                continue;
166            }
167            if visited_nodes.contains(&event.node_id) {
168                chain.push(event);
169                current_id = event.event_id;
170                current_tick = event.tick;
171                if let TraceEventKind::MessageDelivered { from, .. } = &event.kind {
172                    visited_nodes.insert(*from);
173                }
174                if chain.len() >= 100 {
175                    break;
176                }
177            }
178        }
179        chain
180    }
181
182    /// Dump as human-readable text.
183    pub fn dump_text(&self) -> String {
184        let mut out = String::new();
185        for event in &self.events {
186            let kind_str = match &event.kind {
187                TraceEventKind::MessageSent {
188                    to,
189                    msg_type,
190                    size_bytes,
191                } => format!("MSG_SENT to={to} type={msg_type} size={size_bytes}"),
192                TraceEventKind::MessageDelivered {
193                    from,
194                    msg_type,
195                    size_bytes,
196                } => format!("MSG_RECV from={from} type={msg_type} size={size_bytes}"),
197                TraceEventKind::MessageDropped { from, to, reason } => {
198                    format!("MSG_DROP {from}->{to} reason={reason}")
199                }
200                TraceEventKind::TimerFired { timer_type } => format!("TIMER {timer_type}"),
201                TraceEventKind::StateTransition {
202                    from_state,
203                    to_state,
204                    metadata,
205                } => format!("STATE {from_state}->{to_state} {metadata}"),
206                TraceEventKind::FaultInjected {
207                    fault_type,
208                    details,
209                } => format!("FAULT+ {fault_type}: {details}"),
210                TraceEventKind::FaultHealed {
211                    fault_type,
212                    details,
213                } => format!("FAULT- {fault_type}: {details}"),
214                TraceEventKind::StorageOp { op_type, key_count } => {
215                    format!("STORAGE {op_type} keys={key_count}")
216                }
217                TraceEventKind::Custom { tag, data } => format!("CUSTOM {tag}: {data}"),
218            };
219            out.push_str(&format!(
220                "[t={:06} e={:06} n={}] {}\n",
221                event.tick, event.event_id, event.node_id, kind_str
222            ));
223        }
224        out
225    }
226
227    /// Dump as JSON.
228    pub fn dump_json(&self) -> String {
229        serde_json::to_string_pretty(&self.events).unwrap_or_else(|_| "[]".to_string())
230    }
231
232    /// Dump as JSON Lines (one JSON object per line).
233    pub fn dump_jsonl(&self) -> String {
234        let mut out = String::new();
235        for event in &self.events {
236            if let Ok(json) = serde_json::to_string(event) {
237                out.push_str(&json);
238                out.push('\n');
239            }
240        }
241        out
242    }
243
244    /// Clear the trace.
245    pub fn clear(&mut self) {
246        self.events.clear();
247        self.next_event_id = 0;
248    }
249}
250
251impl Default for SimTrace {
252    fn default() -> Self {
253        Self::new()
254    }
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260
261    #[test]
262    fn test_record_and_len() {
263        let mut trace = SimTrace::new();
264        assert!(trace.is_empty());
265        trace.record(
266            1,
267            1,
268            TraceEventKind::TimerFired {
269                timer_type: "election".into(),
270            },
271        );
272        trace.record(
273            2,
274            2,
275            TraceEventKind::StorageOp {
276                op_type: "put".into(),
277                key_count: 3,
278            },
279        );
280        assert_eq!(trace.len(), 2);
281        assert_eq!(trace.events()[0].event_id, 0);
282        assert_eq!(trace.events()[1].event_id, 1);
283    }
284
285    #[test]
286    fn test_events_for_node() {
287        let mut trace = SimTrace::new();
288        trace.record(
289            1,
290            1,
291            TraceEventKind::TimerFired {
292                timer_type: "a".into(),
293            },
294        );
295        trace.record(
296            2,
297            2,
298            TraceEventKind::TimerFired {
299                timer_type: "b".into(),
300            },
301        );
302        trace.record(
303            3,
304            1,
305            TraceEventKind::TimerFired {
306                timer_type: "c".into(),
307            },
308        );
309        let node1 = trace.events_for_node(1);
310        assert_eq!(node1.len(), 2);
311    }
312
313    #[test]
314    fn test_events_matching() {
315        let mut trace = SimTrace::new();
316        trace.record(
317            1,
318            1,
319            TraceEventKind::TimerFired {
320                timer_type: "x".into(),
321            },
322        );
323        trace.record(
324            2,
325            1,
326            TraceEventKind::StorageOp {
327                op_type: "put".into(),
328                key_count: 1,
329            },
330        );
331        trace.record(
332            3,
333            2,
334            TraceEventKind::TimerFired {
335                timer_type: "y".into(),
336            },
337        );
338        let timers = trace.events_matching(|k| matches!(k, TraceEventKind::TimerFired { .. }));
339        assert_eq!(timers.len(), 2);
340    }
341
342    #[test]
343    fn test_dump_text() {
344        let mut trace = SimTrace::new();
345        trace.record(
346            10,
347            1,
348            TraceEventKind::FaultInjected {
349                fault_type: "partition".into(),
350                details: "1<->2".into(),
351            },
352        );
353        let text = trace.dump_text();
354        assert!(text.contains("FAULT+ partition"));
355        assert!(text.contains("[t=000010"));
356    }
357
358    #[test]
359    fn test_json_roundtrip() {
360        let mut trace = SimTrace::new();
361        trace.record(
362            1,
363            1,
364            TraceEventKind::MessageSent {
365                to: 2,
366                msg_type: "AppendEntries".into(),
367                size_bytes: 128,
368            },
369        );
370        let json = trace.dump_json();
371        let parsed: Vec<TraceEvent> = serde_json::from_str(&json).unwrap();
372        assert_eq!(parsed.len(), 1);
373        assert_eq!(parsed[0].tick, 1);
374    }
375
376    #[test]
377    fn test_deterministic_traces() {
378        fn build() -> String {
379            let mut trace = SimTrace::new();
380            trace.record(
381                1,
382                1,
383                TraceEventKind::StateTransition {
384                    from_state: "Follower".into(),
385                    to_state: "Leader".into(),
386                    metadata: "term=1".into(),
387                },
388            );
389            trace.dump_json()
390        }
391        assert_eq!(build(), build());
392    }
393
394    #[test]
395    fn test_events_between() {
396        let mut trace = SimTrace::new();
397        for i in 0..10 {
398            trace.record(
399                i * 10,
400                1,
401                TraceEventKind::TimerFired {
402                    timer_type: format!("t{i}"),
403                },
404            );
405        }
406        let between = trace.events_between(20, 50);
407        assert_eq!(between.len(), 4); // ticks 20, 30, 40, 50
408    }
409
410    #[test]
411    fn test_last_n() {
412        let mut trace = SimTrace::new();
413        for i in 0..10 {
414            trace.record(
415                i,
416                1,
417                TraceEventKind::TimerFired {
418                    timer_type: format!("t{i}"),
419                },
420            );
421        }
422        let last3 = trace.last_n(3);
423        assert_eq!(last3.len(), 3);
424        assert_eq!(last3[0].tick, 7);
425    }
426
427    #[test]
428    fn test_clear() {
429        let mut trace = SimTrace::new();
430        trace.record(
431            1,
432            1,
433            TraceEventKind::TimerFired {
434                timer_type: "x".into(),
435            },
436        );
437        trace.clear();
438        assert!(trace.is_empty());
439    }
440}