1pub mod determinism;
10pub mod diagnosis;
11pub mod minimize;
12pub mod replay;
13mod stats;
14
15pub use determinism::{DeterminismResult, compare_traces, verify_determinism};
16pub use diagnosis::{CausalEvent, DiagnosisReport, FaultCause, ViolationInfo, diagnose};
17pub use minimize::{MinimizedTrace, minimize_faults, minimize_ticks};
18pub use stats::SimStats;
19
20use serde::{Deserialize, Serialize};
21use vortex_core::NodeId;
22
23pub type EventId = u64;
25
26#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct TraceEvent {
29 pub event_id: EventId,
31 pub tick: u64,
33 pub node_id: NodeId,
35 pub kind: TraceEventKind,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub enum TraceEventKind {
42 MessageSent {
44 to: NodeId,
45 msg_type: String,
46 size_bytes: usize,
47 },
48 MessageDelivered {
50 from: NodeId,
51 msg_type: String,
52 size_bytes: usize,
53 },
54 MessageDropped {
56 from: NodeId,
57 to: NodeId,
58 reason: String,
59 },
60 TimerFired { timer_type: String },
62 StateTransition {
64 from_state: String,
65 to_state: String,
66 metadata: String,
67 },
68 FaultInjected { fault_type: String, details: String },
70 FaultHealed { fault_type: String, details: String },
72 StorageOp { op_type: String, key_count: usize },
74 Custom { tag: String, data: String },
76}
77
78pub struct SimTrace {
82 events: Vec<TraceEvent>,
83 next_event_id: EventId,
84}
85
86impl SimTrace {
87 pub fn new() -> Self {
89 Self {
90 events: Vec::new(),
91 next_event_id: 0,
92 }
93 }
94
95 pub fn record(&mut self, tick: u64, node_id: NodeId, kind: TraceEventKind) {
97 self.events.push(TraceEvent {
98 event_id: self.next_event_id,
99 tick,
100 node_id,
101 kind,
102 });
103 self.next_event_id += 1;
104 }
105
106 pub fn len(&self) -> usize {
108 self.events.len()
109 }
110
111 pub fn is_empty(&self) -> bool {
113 self.events.is_empty()
114 }
115
116 pub fn events(&self) -> &[TraceEvent] {
118 &self.events
119 }
120
121 pub fn events_for_node(&self, node_id: NodeId) -> Vec<&TraceEvent> {
123 self.events
124 .iter()
125 .filter(|e| e.node_id == node_id)
126 .collect()
127 }
128
129 pub fn events_matching<F: Fn(&TraceEventKind) -> bool>(&self, f: F) -> Vec<&TraceEvent> {
131 self.events.iter().filter(|e| f(&e.kind)).collect()
132 }
133
134 pub fn events_between(&self, start_tick: u64, end_tick: u64) -> Vec<&TraceEvent> {
136 self.events
137 .iter()
138 .filter(|e| e.tick >= start_tick && e.tick <= end_tick)
139 .collect()
140 }
141
142 pub fn last_n(&self, n: usize) -> &[TraceEvent] {
144 let start = self.events.len().saturating_sub(n);
145 &self.events[start..]
146 }
147
148 pub fn causal_chain(&self, event_id: EventId) -> Vec<&TraceEvent> {
150 let mut chain = Vec::new();
151 let mut current_id = event_id;
152 let mut visited_nodes: std::collections::HashSet<NodeId> = std::collections::HashSet::new();
153
154 let start = match self.events.iter().find(|e| e.event_id == current_id) {
155 Some(e) => e,
156 None => return chain,
157 };
158
159 chain.push(start);
160 visited_nodes.insert(start.node_id);
161 let mut current_tick = start.tick;
162
163 for event in self.events.iter().rev() {
164 if event.tick > current_tick || event.event_id >= current_id {
165 continue;
166 }
167 if visited_nodes.contains(&event.node_id) {
168 chain.push(event);
169 current_id = event.event_id;
170 current_tick = event.tick;
171 if let TraceEventKind::MessageDelivered { from, .. } = &event.kind {
172 visited_nodes.insert(*from);
173 }
174 if chain.len() >= 100 {
175 break;
176 }
177 }
178 }
179 chain
180 }
181
182 pub fn dump_text(&self) -> String {
184 let mut out = String::new();
185 for event in &self.events {
186 let kind_str = match &event.kind {
187 TraceEventKind::MessageSent {
188 to,
189 msg_type,
190 size_bytes,
191 } => format!("MSG_SENT to={to} type={msg_type} size={size_bytes}"),
192 TraceEventKind::MessageDelivered {
193 from,
194 msg_type,
195 size_bytes,
196 } => format!("MSG_RECV from={from} type={msg_type} size={size_bytes}"),
197 TraceEventKind::MessageDropped { from, to, reason } => {
198 format!("MSG_DROP {from}->{to} reason={reason}")
199 }
200 TraceEventKind::TimerFired { timer_type } => format!("TIMER {timer_type}"),
201 TraceEventKind::StateTransition {
202 from_state,
203 to_state,
204 metadata,
205 } => format!("STATE {from_state}->{to_state} {metadata}"),
206 TraceEventKind::FaultInjected {
207 fault_type,
208 details,
209 } => format!("FAULT+ {fault_type}: {details}"),
210 TraceEventKind::FaultHealed {
211 fault_type,
212 details,
213 } => format!("FAULT- {fault_type}: {details}"),
214 TraceEventKind::StorageOp { op_type, key_count } => {
215 format!("STORAGE {op_type} keys={key_count}")
216 }
217 TraceEventKind::Custom { tag, data } => format!("CUSTOM {tag}: {data}"),
218 };
219 out.push_str(&format!(
220 "[t={:06} e={:06} n={}] {}\n",
221 event.tick, event.event_id, event.node_id, kind_str
222 ));
223 }
224 out
225 }
226
227 pub fn dump_json(&self) -> String {
229 serde_json::to_string_pretty(&self.events).unwrap_or_else(|_| "[]".to_string())
230 }
231
232 pub fn dump_jsonl(&self) -> String {
234 let mut out = String::new();
235 for event in &self.events {
236 if let Ok(json) = serde_json::to_string(event) {
237 out.push_str(&json);
238 out.push('\n');
239 }
240 }
241 out
242 }
243
244 pub fn clear(&mut self) {
246 self.events.clear();
247 self.next_event_id = 0;
248 }
249}
250
251impl Default for SimTrace {
252 fn default() -> Self {
253 Self::new()
254 }
255}
256
257#[cfg(test)]
258mod tests {
259 use super::*;
260
261 #[test]
262 fn test_record_and_len() {
263 let mut trace = SimTrace::new();
264 assert!(trace.is_empty());
265 trace.record(
266 1,
267 1,
268 TraceEventKind::TimerFired {
269 timer_type: "election".into(),
270 },
271 );
272 trace.record(
273 2,
274 2,
275 TraceEventKind::StorageOp {
276 op_type: "put".into(),
277 key_count: 3,
278 },
279 );
280 assert_eq!(trace.len(), 2);
281 assert_eq!(trace.events()[0].event_id, 0);
282 assert_eq!(trace.events()[1].event_id, 1);
283 }
284
285 #[test]
286 fn test_events_for_node() {
287 let mut trace = SimTrace::new();
288 trace.record(
289 1,
290 1,
291 TraceEventKind::TimerFired {
292 timer_type: "a".into(),
293 },
294 );
295 trace.record(
296 2,
297 2,
298 TraceEventKind::TimerFired {
299 timer_type: "b".into(),
300 },
301 );
302 trace.record(
303 3,
304 1,
305 TraceEventKind::TimerFired {
306 timer_type: "c".into(),
307 },
308 );
309 let node1 = trace.events_for_node(1);
310 assert_eq!(node1.len(), 2);
311 }
312
313 #[test]
314 fn test_events_matching() {
315 let mut trace = SimTrace::new();
316 trace.record(
317 1,
318 1,
319 TraceEventKind::TimerFired {
320 timer_type: "x".into(),
321 },
322 );
323 trace.record(
324 2,
325 1,
326 TraceEventKind::StorageOp {
327 op_type: "put".into(),
328 key_count: 1,
329 },
330 );
331 trace.record(
332 3,
333 2,
334 TraceEventKind::TimerFired {
335 timer_type: "y".into(),
336 },
337 );
338 let timers = trace.events_matching(|k| matches!(k, TraceEventKind::TimerFired { .. }));
339 assert_eq!(timers.len(), 2);
340 }
341
342 #[test]
343 fn test_dump_text() {
344 let mut trace = SimTrace::new();
345 trace.record(
346 10,
347 1,
348 TraceEventKind::FaultInjected {
349 fault_type: "partition".into(),
350 details: "1<->2".into(),
351 },
352 );
353 let text = trace.dump_text();
354 assert!(text.contains("FAULT+ partition"));
355 assert!(text.contains("[t=000010"));
356 }
357
358 #[test]
359 fn test_json_roundtrip() {
360 let mut trace = SimTrace::new();
361 trace.record(
362 1,
363 1,
364 TraceEventKind::MessageSent {
365 to: 2,
366 msg_type: "AppendEntries".into(),
367 size_bytes: 128,
368 },
369 );
370 let json = trace.dump_json();
371 let parsed: Vec<TraceEvent> = serde_json::from_str(&json).unwrap();
372 assert_eq!(parsed.len(), 1);
373 assert_eq!(parsed[0].tick, 1);
374 }
375
376 #[test]
377 fn test_deterministic_traces() {
378 fn build() -> String {
379 let mut trace = SimTrace::new();
380 trace.record(
381 1,
382 1,
383 TraceEventKind::StateTransition {
384 from_state: "Follower".into(),
385 to_state: "Leader".into(),
386 metadata: "term=1".into(),
387 },
388 );
389 trace.dump_json()
390 }
391 assert_eq!(build(), build());
392 }
393
394 #[test]
395 fn test_events_between() {
396 let mut trace = SimTrace::new();
397 for i in 0..10 {
398 trace.record(
399 i * 10,
400 1,
401 TraceEventKind::TimerFired {
402 timer_type: format!("t{i}"),
403 },
404 );
405 }
406 let between = trace.events_between(20, 50);
407 assert_eq!(between.len(), 4); }
409
410 #[test]
411 fn test_last_n() {
412 let mut trace = SimTrace::new();
413 for i in 0..10 {
414 trace.record(
415 i,
416 1,
417 TraceEventKind::TimerFired {
418 timer_type: format!("t{i}"),
419 },
420 );
421 }
422 let last3 = trace.last_n(3);
423 assert_eq!(last3.len(), 3);
424 assert_eq!(last3[0].tick, 7);
425 }
426
427 #[test]
428 fn test_clear() {
429 let mut trace = SimTrace::new();
430 trace.record(
431 1,
432 1,
433 TraceEventKind::TimerFired {
434 timer_type: "x".into(),
435 },
436 );
437 trace.clear();
438 assert!(trace.is_empty());
439 }
440}