Skip to main content

oximedia_graph/
profiling.rs

1//! Graph execution profiling.
2//!
3//! Provides per-node timing statistics and port throughput metrics.
4
5#![allow(dead_code)]
6
7use std::collections::HashMap;
8
9// ─────────────────────────────────────────────────────────────────────────────
10// NodeProfile
11// ─────────────────────────────────────────────────────────────────────────────
12
13/// Per-node profiling statistics.
14#[derive(Debug, Clone)]
15pub struct NodeProfile {
16    /// Unique node identifier.
17    pub node_id: String,
18    /// Average execution duration in microseconds.
19    pub avg_duration_us: u64,
20    /// Maximum observed execution duration in microseconds.
21    pub max_duration_us: u64,
22    /// Total number of times the node was executed.
23    pub call_count: u64,
24    /// Cumulative execution time in microseconds.
25    pub total_us: u64,
26}
27
28impl NodeProfile {
29    /// Create a new empty profile for a node.
30    #[must_use]
31    pub fn new(node_id: impl Into<String>) -> Self {
32        Self {
33            node_id: node_id.into(),
34            avg_duration_us: 0,
35            max_duration_us: 0,
36            call_count: 0,
37            total_us: 0,
38        }
39    }
40
41    /// Simplified standard deviation: `(max - avg) / 2`.
42    #[must_use]
43    pub fn std_dev_us(&self) -> u64 {
44        self.max_duration_us.saturating_sub(self.avg_duration_us) / 2
45    }
46
47    /// Record a new sample.
48    fn record(&mut self, duration_us: u64) {
49        self.total_us += duration_us;
50        self.call_count += 1;
51        self.avg_duration_us = self.total_us / self.call_count;
52        if duration_us > self.max_duration_us {
53            self.max_duration_us = duration_us;
54        }
55    }
56}
57
58// ─────────────────────────────────────────────────────────────────────────────
59// GraphProfiler
60// ─────────────────────────────────────────────────────────────────────────────
61
62/// Collects per-node timing samples during graph execution.
63#[derive(Debug, Default)]
64pub struct GraphProfiler {
65    profiles: HashMap<String, NodeProfile>,
66}
67
68impl GraphProfiler {
69    /// Create a new, empty profiler.
70    #[must_use]
71    pub fn new() -> Self {
72        Self {
73            profiles: HashMap::new(),
74        }
75    }
76
77    /// Record an execution duration for `node_id`.
78    pub fn record(&mut self, node_id: &str, duration_us: u64) {
79        let profile = self
80            .profiles
81            .entry(node_id.to_string())
82            .or_insert_with(|| NodeProfile::new(node_id));
83        profile.record(duration_us);
84    }
85
86    /// Retrieve the profile for a specific node.
87    #[must_use]
88    pub fn profile(&self, node_id: &str) -> Option<&NodeProfile> {
89        self.profiles.get(node_id)
90    }
91
92    /// Return the `n` hottest (highest total execution time) node profiles,
93    /// sorted descending by `total_us`.
94    #[must_use]
95    pub fn hottest_nodes(&self, n: usize) -> Vec<&NodeProfile> {
96        let mut profiles: Vec<&NodeProfile> = self.profiles.values().collect();
97        profiles.sort_by(|a, b| b.total_us.cmp(&a.total_us));
98        profiles.truncate(n);
99        profiles
100    }
101
102    /// Return all profiles as an unsorted slice.
103    #[must_use]
104    pub fn all_profiles(&self) -> Vec<&NodeProfile> {
105        self.profiles.values().collect()
106    }
107}
108
109// ─────────────────────────────────────────────────────────────────────────────
110// PortThroughput
111// ─────────────────────────────────────────────────────────────────────────────
112
113/// Throughput statistics for a single port.
114#[derive(Debug, Clone)]
115pub struct PortThroughput {
116    /// Port identifier (e.g. `"node_a:output_0"`).
117    pub port_id: String,
118    /// Total bytes transferred through this port.
119    pub bytes_transferred: u64,
120    /// Total number of frames transferred.
121    pub frames_transferred: u64,
122    /// Average frame size in bytes.
123    pub avg_frame_size: u64,
124}
125
126impl PortThroughput {
127    /// Create a new port throughput record.
128    #[must_use]
129    pub fn new(
130        port_id: impl Into<String>,
131        bytes_transferred: u64,
132        frames_transferred: u64,
133    ) -> Self {
134        let avg_frame_size = bytes_transferred
135            .checked_div(frames_transferred)
136            .unwrap_or(0);
137        Self {
138            port_id: port_id.into(),
139            bytes_transferred,
140            frames_transferred,
141            avg_frame_size,
142        }
143    }
144}
145
146// ─────────────────────────────────────────────────────────────────────────────
147// GraphProfilingReport
148// ─────────────────────────────────────────────────────────────────────────────
149
150/// A complete profiling report for a graph execution run.
151#[derive(Debug, Clone)]
152pub struct GraphProfilingReport {
153    /// Per-node profiles sorted by total execution time (descending).
154    pub node_profiles: Vec<NodeProfile>,
155    /// Per-port throughput records.
156    pub port_throughputs: Vec<PortThroughput>,
157    /// Wall-clock duration of the entire execution in microseconds.
158    pub total_duration_us: u64,
159    /// Estimated CPU efficiency: `sum(node_total_us) / total_duration_us * 100`.
160    pub cpu_efficiency_pct: f32,
161}
162
163impl GraphProfilingReport {
164    /// Generate a profiling report from a [`GraphProfiler`].
165    ///
166    /// `port_throughputs` and `total_duration_us` are passed externally as the
167    /// profiler itself does not measure wall time or port traffic.
168    #[must_use]
169    #[allow(clippy::manual_checked_ops)]
170    pub fn generate(profiler: &GraphProfiler) -> Self {
171        let mut node_profiles: Vec<NodeProfile> = profiler.profiles.values().cloned().collect();
172        node_profiles.sort_by(|a, b| b.total_us.cmp(&a.total_us));
173
174        let total_node_us: u64 = node_profiles.iter().map(|p| p.total_us).sum();
175        let total_duration_us = total_node_us; // wall time = sum of node times (sequential baseline)
176
177        let cpu_efficiency_pct = if total_duration_us > 0 {
178            (total_node_us as f64 / total_duration_us as f64 * 100.0) as f32
179        } else {
180            100.0
181        };
182
183        Self {
184            node_profiles,
185            port_throughputs: vec![],
186            total_duration_us,
187            cpu_efficiency_pct,
188        }
189    }
190
191    /// Generate a full report with explicit port throughputs and wall-clock
192    /// duration.
193    #[must_use]
194    #[allow(clippy::manual_checked_ops)]
195    pub fn generate_full(
196        profiler: &GraphProfiler,
197        port_throughputs: Vec<PortThroughput>,
198        total_duration_us: u64,
199    ) -> Self {
200        let mut report = Self::generate(profiler);
201        report.port_throughputs = port_throughputs;
202
203        let total_node_us: u64 = report.node_profiles.iter().map(|p| p.total_us).sum();
204        report.total_duration_us = total_duration_us;
205        report.cpu_efficiency_pct = if total_duration_us > 0 {
206            (total_node_us as f64 / total_duration_us as f64 * 100.0).min(100.0) as f32
207        } else {
208            100.0
209        };
210
211        report
212    }
213}
214
215// ─────────────────────────────────────────────────────────────────────────────
216// Unit tests
217// ─────────────────────────────────────────────────────────────────────────────
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222
223    // ── NodeProfile ───────────────────────────────────────────────────────────
224
225    #[test]
226    fn test_node_profile_initial_state() {
227        let p = NodeProfile::new("node_a");
228        assert_eq!(p.node_id, "node_a");
229        assert_eq!(p.call_count, 0);
230        assert_eq!(p.total_us, 0);
231        assert_eq!(p.avg_duration_us, 0);
232        assert_eq!(p.max_duration_us, 0);
233    }
234
235    #[test]
236    fn test_node_profile_record_updates_stats() {
237        let mut p = NodeProfile::new("n");
238        p.record(100);
239        p.record(200);
240        assert_eq!(p.call_count, 2);
241        assert_eq!(p.total_us, 300);
242        assert_eq!(p.avg_duration_us, 150);
243        assert_eq!(p.max_duration_us, 200);
244    }
245
246    #[test]
247    fn test_node_profile_std_dev() {
248        let mut p = NodeProfile::new("n");
249        p.record(100);
250        p.record(300); // avg=200, max=300 → std_dev=(300-200)/2=50
251        assert_eq!(p.std_dev_us(), 50);
252    }
253
254    #[test]
255    fn test_node_profile_std_dev_zero_when_equal() {
256        let mut p = NodeProfile::new("n");
257        p.record(100);
258        assert_eq!(p.std_dev_us(), 0); // avg == max when single sample
259    }
260
261    // ── GraphProfiler ─────────────────────────────────────────────────────────
262
263    #[test]
264    fn test_profiler_record_creates_profile() {
265        let mut profiler = GraphProfiler::new();
266        profiler.record("node_x", 500);
267        assert!(profiler.profile("node_x").is_some());
268        assert_eq!(
269            profiler
270                .profile("node_x")
271                .expect("profile should succeed")
272                .call_count,
273            1
274        );
275    }
276
277    #[test]
278    fn test_profiler_missing_node_returns_none() {
279        let profiler = GraphProfiler::new();
280        assert!(profiler.profile("nonexistent").is_none());
281    }
282
283    #[test]
284    fn test_profiler_multiple_records() {
285        let mut profiler = GraphProfiler::new();
286        for us in [100, 200, 300] {
287            profiler.record("n", us);
288        }
289        let p = profiler.profile("n").expect("profile should succeed");
290        assert_eq!(p.call_count, 3);
291        assert_eq!(p.total_us, 600);
292    }
293
294    #[test]
295    fn test_profiler_hottest_nodes_sorted() {
296        let mut profiler = GraphProfiler::new();
297        profiler.record("slow", 1000);
298        profiler.record("fast", 10);
299        profiler.record("medium", 500);
300        let hot = profiler.hottest_nodes(2);
301        assert_eq!(hot[0].node_id, "slow");
302        assert_eq!(hot[1].node_id, "medium");
303    }
304
305    #[test]
306    fn test_profiler_hottest_n_clamped() {
307        let mut profiler = GraphProfiler::new();
308        profiler.record("a", 100);
309        // Requesting more than available should return all.
310        let hot = profiler.hottest_nodes(10);
311        assert_eq!(hot.len(), 1);
312    }
313
314    // ── PortThroughput ────────────────────────────────────────────────────────
315
316    #[test]
317    fn test_port_throughput_avg_frame_size() {
318        let pt = PortThroughput::new("port_0", 1024, 4);
319        assert_eq!(pt.avg_frame_size, 256);
320    }
321
322    #[test]
323    fn test_port_throughput_zero_frames() {
324        let pt = PortThroughput::new("port_0", 0, 0);
325        assert_eq!(pt.avg_frame_size, 0);
326    }
327
328    // ── GraphProfilingReport ──────────────────────────────────────────────────
329
330    #[test]
331    fn test_report_generate_empty_profiler() {
332        let profiler = GraphProfiler::new();
333        let report = GraphProfilingReport::generate(&profiler);
334        assert!(report.node_profiles.is_empty());
335        assert_eq!(report.total_duration_us, 0);
336        assert!((report.cpu_efficiency_pct - 100.0).abs() < 0.01);
337    }
338
339    #[test]
340    fn test_report_generate_sorted_profiles() {
341        let mut profiler = GraphProfiler::new();
342        profiler.record("cheap", 50);
343        profiler.record("expensive", 5000);
344        let report = GraphProfilingReport::generate(&profiler);
345        assert_eq!(report.node_profiles[0].node_id, "expensive");
346    }
347
348    #[test]
349    fn test_report_generate_full() {
350        let mut profiler = GraphProfiler::new();
351        profiler.record("n", 1000);
352        let pt = PortThroughput::new("p0", 4096, 8);
353        let report = GraphProfilingReport::generate_full(&profiler, vec![pt], 2000);
354        assert_eq!(report.port_throughputs.len(), 1);
355        assert_eq!(report.total_duration_us, 2000);
356        // cpu efficiency = 1000/2000 * 100 = 50 %
357        assert!((report.cpu_efficiency_pct - 50.0).abs() < 1.0);
358    }
359}