aprender-profile 0.29.0

Pure Rust system call tracer with source-aware correlation for Rust binaries
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
//! Critical path analysis for distributed traces (Sprint 41)
//!
//! This module implements longest path algorithms on directed acyclic graphs (DAGs)
//! to identify performance bottlenecks in distributed systems.
//!
//! # Background
//!
//! The **critical path** is the longest path through the execution graph, representing
//! the theoretical minimum execution time. Any optimization that doesn't reduce time
//! on the critical path will not improve end-to-end latency.
//!
//! # Algorithm: Longest Path via Dynamic Programming
//!
//! For a DAG, the longest path can be computed in O(V + E) time using dynamic programming:
//!
//! ```text
//! 1. Topological sort of nodes
//! 2. For each node v in topological order:
//!    dist[v] = max(dist[parent] + weight(parent, v)) for all parents
//! 3. Critical path = path with maximum dist
//! ```
//!
//! # Example Trace
//!
//! ```text
//! Root (1000ns)
//! ├─ Child1 (500ns)   ← Not on critical path
//! └─ Child2 (2000ns)  ← On critical path
//!    └─ Grandchild (1500ns) ← On critical path
//!
//! Critical path: Root → Child2 → Grandchild
//! Total duration: 1000 + 2000 + 1500 = 4500ns
//! ```
//!
//! # Peer-Reviewed Foundation
//!
//! - **Tarjan (1972). "Depth-First Search and Linear Graph Algorithms."**
//!   - Finding: DAG topological sort in O(V+E)
//!   - Application: Critical path via topological ordering
//!
//! - **Sigelman et al. (2010). "Dapper: Large-Scale Distributed Tracing." Google.**
//!   - Finding: Critical path identifies 80% of latency opportunities
//!   - Application: Focus optimization efforts on critical spans
//!
//! - **Sambasivan et al. (2011). "Diagnosing Performance Changes." CMU.**
//!   - Finding: 70% of performance bugs are on critical path
//!   - Application: Anti-pattern detection on critical path
//!
//! # Example
//!
//! ```
//! use renacer::causal_graph::CausalGraph;
//! use renacer::critical_path::find_critical_path;
//! use renacer::span_record::{SpanRecord, SpanKind, StatusCode};
//! use std::collections::HashMap;
//!
//! # fn main() -> anyhow::Result<()> {
//! // Build spans
//! let root = SpanRecord::new(
//!     [1; 16], [1; 8], None,
//!     "root".to_string(), SpanKind::Internal,
//!     0, 1000, 0,
//!     StatusCode::Ok, String::new(),
//!     HashMap::new(), HashMap::new(),
//!     1234, 5678,
//! );
//!
//! let child = SpanRecord::new(
//!     [1; 16], [2; 8], Some([1; 8]),
//!     "child".to_string(), SpanKind::Internal,
//!     1000, 3000, 1,
//!     StatusCode::Ok, String::new(),
//!     HashMap::new(), HashMap::new(),
//!     1234, 5678,
//! );
//!
//! // Build graph
//! let graph = CausalGraph::from_spans(&[root, child])?;
//!
//! // Find critical path
//! let result = find_critical_path(&graph)?;
//!
//! println!("Critical path duration: {}ns", result.total_duration);
//! println!("Critical path length: {} spans", result.path.len());
//! # Ok(())
//! # }
//! ```

use crate::causal_graph::CausalGraph;
use anyhow::{Context, Result};
use std::collections::HashMap;
use trueno_graph::NodeId;

/// Result of critical path analysis
///
/// This contains the critical path (longest path through the execution graph)
/// and associated metrics.
#[derive(Debug, Clone, PartialEq)]
pub struct CriticalPathResult {
    /// Nodes on the critical path (in order from root to leaf)
    pub path: Vec<NodeId>,

    /// Total duration of the critical path (nanoseconds)
    pub total_duration: u64,

    /// Per-node durations on the critical path
    pub node_durations: HashMap<NodeId, u64>,

    /// Span names on the critical path (for debugging)
    pub span_names: Vec<String>,
}

impl CriticalPathResult {
    /// Get the percentage of total execution time spent on critical path
    ///
    /// # Arguments
    ///
    /// * `total_trace_duration` - Total duration of the entire trace
    ///
    /// # Returns
    ///
    /// Percentage (0.0 to 100.0)
    pub fn critical_path_percentage(&self, total_trace_duration: u64) -> f64 {
        if total_trace_duration == 0 {
            return 0.0;
        }
        (self.total_duration as f64 / total_trace_duration as f64) * 100.0
    }

    /// Get the longest span on the critical path (biggest bottleneck)
    pub fn longest_span(&self) -> Option<(NodeId, u64)> {
        self.node_durations
            .iter()
            .max_by_key(|(_, &duration)| duration)
            .map(|(&node, &duration)| (node, duration))
    }

    /// Check if a specific node is on the critical path
    pub fn is_on_critical_path(&self, node: NodeId) -> bool {
        self.path.contains(&node)
    }
}

/// Find the critical path (longest path) through the execution graph
///
/// This uses dynamic programming on the DAG to compute the longest path from
/// any root to any leaf.
///
/// # Arguments
///
/// * `graph` - The causal graph to analyze
///
/// # Returns
///
/// The critical path result, or an error if the graph is invalid.
///
/// # Performance
///
/// - Time complexity: O(V + E)
/// - Space complexity: O(V)
/// - Target: <100ms for 1K spans
///
/// # Example
///
/// ```
/// use renacer::causal_graph::CausalGraph;
/// use renacer::critical_path::find_critical_path;
/// use renacer::span_record::{SpanRecord, SpanKind, StatusCode};
/// use std::collections::HashMap;
///
/// # fn main() -> anyhow::Result<()> {
/// let root = SpanRecord::new(
///     [1; 16], [1; 8], None,
///     "root".to_string(), SpanKind::Internal,
///     0, 1000, 0,
///     StatusCode::Ok, String::new(),
///     HashMap::new(), HashMap::new(),
///     1234, 5678,
/// );
///
/// let graph = CausalGraph::from_spans(&[root])?;
/// let result = find_critical_path(&graph)?;
///
/// assert_eq!(result.path.len(), 1);
/// assert_eq!(result.total_duration, 1000);
/// # Ok(())
/// # }
/// ```
pub fn find_critical_path(graph: &CausalGraph) -> Result<CriticalPathResult> {
    // Handle empty graph
    if graph.node_count() == 0 {
        return Ok(CriticalPathResult {
            path: Vec::new(),
            total_duration: 0,
            node_durations: HashMap::new(),
            span_names: Vec::new(),
        });
    }

    // Step 1: Topological sort (we'll use DFS-based approach)
    let topo_order = topological_sort(graph)?;

    // Step 2: Dynamic programming - compute longest path to each node
    let mut dist: HashMap<NodeId, u64> = HashMap::new();
    let mut parent: HashMap<NodeId, Option<NodeId>> = HashMap::new();

    // Initialize roots with their own durations
    for &root in graph.roots() {
        let span = graph.get_span(root).context("Root span not found in metadata")?;
        dist.insert(root, span.duration_nanos);
        parent.insert(root, None);
    }

    // Process nodes in topological order
    for &node in &topo_order {
        // Get children of this node
        let children = graph.children(node)?;

        for (child, _edge_weight) in children {
            let child_span = graph.get_span(child).context("Child span not found in metadata")?;

            // dist[child] = max(dist[parent] + child.duration)
            let new_dist = dist.get(&node).unwrap_or(&0) + child_span.duration_nanos;

            if new_dist > *dist.get(&child).unwrap_or(&0) {
                dist.insert(child, new_dist);
                parent.insert(child, Some(node));
            }
        }
    }

    // Step 3: Find the node with maximum distance (end of critical path)
    let (&critical_end, &total_duration) =
        dist.iter().max_by_key(|(_, &d)| d).context("No paths found in graph")?;

    // Step 4: Reconstruct the critical path by following parent pointers
    let (path, node_durations, span_names) = reconstruct_path(graph, critical_end, &parent);

    Ok(CriticalPathResult { path, total_duration, node_durations, span_names })
}

/// Reconstruct the critical path by walking parent pointers from end to root.
///
/// Returns `(path, node_durations, span_names)` in root-to-leaf order.
fn reconstruct_path(
    graph: &CausalGraph,
    start: NodeId,
    parent: &HashMap<NodeId, Option<NodeId>>,
) -> (Vec<NodeId>, HashMap<NodeId, u64>, Vec<String>) {
    let mut path = Vec::new();
    let mut node_durations = HashMap::new();
    let mut span_names = Vec::new();
    let mut current = start;

    loop {
        path.push(current);

        if let Some(span) = graph.get_span(current) {
            node_durations.insert(current, span.duration_nanos);
            span_names.push(span.span_name.clone());
        }

        match parent.get(&current) {
            Some(Some(p)) => current = *p,
            _ => break,
        }
    }

    path.reverse();
    span_names.reverse();

    (path, node_durations, span_names)
}

/// Perform topological sort on the graph using DFS
///
/// # Returns
///
/// Nodes in topological order, or error if graph has cycles.
fn topological_sort(graph: &CausalGraph) -> Result<Vec<NodeId>> {
    // Validate DAG
    if !graph.is_dag()? {
        anyhow::bail!("Graph contains cycles - cannot compute critical path");
    }

    let mut visited = std::collections::HashSet::new();
    let mut stack = Vec::new();

    // DFS from each root
    for &root in graph.roots() {
        if !visited.contains(&root) {
            dfs_topo(graph, root, &mut visited, &mut stack)?;
        }
    }

    // Stack has nodes in reverse topological order
    stack.reverse();
    Ok(stack)
}

/// DFS helper for topological sort
fn dfs_topo(
    graph: &CausalGraph,
    node: NodeId,
    visited: &mut std::collections::HashSet<NodeId>,
    stack: &mut Vec<NodeId>,
) -> Result<()> {
    visited.insert(node);

    let children = graph.children(node)?;
    for (child, _) in children {
        if !visited.contains(&child) {
            dfs_topo(graph, child, visited, stack)?;
        }
    }

    stack.push(node);
    Ok(())
}

/// Find all critical paths if there are multiple paths with the same maximum duration
///
/// This is useful when there are multiple equally-long paths through the graph.
///
/// # Arguments
///
/// * `graph` - The causal graph to analyze
/// * `tolerance_ns` - Tolerance for considering paths "equal" (in nanoseconds)
///
/// # Returns
///
/// All critical paths within tolerance of the longest path.
pub fn find_all_critical_paths(
    graph: &CausalGraph,
    _tolerance_ns: u64,
) -> Result<Vec<CriticalPathResult>> {
    // For now, just return the single longest path
    // Future: Implement multi-path finding if needed
    Ok(vec![find_critical_path(graph)?])
}

// Compile-time thread-safety verification (Sprint 59)
static_assertions::assert_impl_all!(CriticalPathResult: Send, Sync);

#[cfg(test)]
mod tests {
    use super::*;
    use crate::span_record::{SpanKind, SpanRecord, StatusCode};
    use std::collections::HashMap;

    fn create_span(
        span_id: u8,
        parent_id: Option<u8>,
        logical_clock: u64,
        duration_nanos: u64,
    ) -> SpanRecord {
        SpanRecord::new(
            [1; 16],
            [span_id; 8],
            parent_id.map(|p| [p; 8]),
            format!("span_{}", span_id),
            SpanKind::Internal,
            logical_clock * 1000,
            logical_clock * 1000 + duration_nanos,
            logical_clock,
            StatusCode::Ok,
            String::new(),
            HashMap::new(),
            HashMap::new(),
            1234,
            5678,
        )
    }

    #[test]
    fn test_empty_graph() {
        let graph = CausalGraph::from_spans(&[]).expect("test");
        let result = find_critical_path(&graph).expect("test");

        assert_eq!(result.path.len(), 0);
        assert_eq!(result.total_duration, 0);
    }

    #[test]
    fn test_single_span() {
        let root = create_span(1, None, 0, 1000);
        let graph = CausalGraph::from_spans(&[root]).expect("test");

        let result = find_critical_path(&graph).expect("test");

        assert_eq!(result.path.len(), 1);
        assert_eq!(result.total_duration, 1000);
        assert_eq!(result.span_names, vec!["span_1"]);
    }

    #[test]
    fn test_linear_path() {
        let root = create_span(1, None, 0, 1000);
        let child = create_span(2, Some(1), 1, 500);
        let grandchild = create_span(3, Some(2), 2, 700);

        let graph = CausalGraph::from_spans(&[root, child, grandchild]).expect("test");
        let result = find_critical_path(&graph).expect("test");

        assert_eq!(result.path.len(), 3);
        assert_eq!(result.total_duration, 1000 + 500 + 700);
        assert_eq!(result.span_names, vec!["span_1", "span_2", "span_3"]);
    }

    #[test]
    fn test_branching_path() {
        // Root with two children - one longer than the other
        let root = create_span(1, None, 0, 1000);
        let child_short = create_span(2, Some(1), 1, 500);
        let child_long = create_span(3, Some(1), 2, 2000);

        let graph = CausalGraph::from_spans(&[root, child_short, child_long]).expect("test");
        let result = find_critical_path(&graph).expect("test");

        // Critical path should go through the longer child
        assert_eq!(result.path.len(), 2);
        assert_eq!(result.total_duration, 1000 + 2000);
        assert!(result.span_names.contains(&"span_3".to_string()));
    }

    #[test]
    fn test_complex_tree() {
        // Root → (Child1, Child2)
        // Child1 → Grandchild1
        // Child2 (longer) → Grandchild2
        let root = create_span(1, None, 0, 1000);
        let child1 = create_span(2, Some(1), 1, 500);
        let child2 = create_span(3, Some(1), 2, 800);
        let grandchild1 = create_span(4, Some(2), 3, 300);
        let grandchild2 = create_span(5, Some(3), 4, 1200);

        let graph = CausalGraph::from_spans(&[root, child1, child2, grandchild1, grandchild2])
            .expect("test");
        let result = find_critical_path(&graph).expect("test");

        // Critical path: root → child2 → grandchild2
        assert_eq!(result.path.len(), 3);
        assert_eq!(result.total_duration, 1000 + 800 + 1200);
        assert_eq!(result.span_names, vec!["span_1", "span_3", "span_5"]);
    }

    #[test]
    fn test_longest_span() {
        let root = create_span(1, None, 0, 500);
        let child = create_span(2, Some(1), 1, 2000); // Longest
        let grandchild = create_span(3, Some(2), 2, 300);

        let graph = CausalGraph::from_spans(&[root, child, grandchild]).expect("test");
        let result = find_critical_path(&graph).expect("test");

        let (longest_node, duration) = result.longest_span().expect("test");
        assert_eq!(duration, 2000);
        assert_eq!(result.node_durations.get(&longest_node), Some(&2000));
    }

    #[test]
    fn test_is_on_critical_path() {
        let root = create_span(1, None, 0, 1000);
        let child1 = create_span(2, Some(1), 1, 500);
        let child2 = create_span(3, Some(1), 2, 2000);

        let graph = CausalGraph::from_spans(&[root, child1, child2]).expect("test");
        let result = find_critical_path(&graph).expect("test");

        assert!(result.is_on_critical_path(NodeId(0))); // root
        assert!(result.is_on_critical_path(NodeId(2))); // child2
        assert!(!result.is_on_critical_path(NodeId(1))); // child1 (shorter)
    }

    #[test]
    fn test_critical_path_percentage() {
        let root = create_span(1, None, 0, 1000);
        let child = create_span(2, Some(1), 1, 2000);

        let graph = CausalGraph::from_spans(&[root, child]).expect("test");
        let result = find_critical_path(&graph).expect("test");

        // Critical path is 3000ns, if total trace is 5000ns
        let percentage = result.critical_path_percentage(5000);
        assert_eq!(percentage, 60.0);
    }
}