Skip to main content

trueno/brick/profiler/
exec_graph_ext.rs

1//! Execution graph methods for BrickProfiler (PAR-201).
2//!
3//! Extracted from mod.rs to keep file sizes manageable.
4//! Contains all `graph_*` methods for execution path tracking.
5
6use super::BrickProfiler;
7use crate::brick::exec_graph::{BrickId, ExecutionNode, ExecutionNodeId};
8
9impl BrickProfiler {
10    // ========================================================================
11    // PAR-201: Execution Path Graph
12    // ========================================================================
13
14    /// Enable execution graph tracking.
15    ///
16    /// When enabled, the profiler records the execution hierarchy:
17    /// - Layer → Brick → Kernel relationships
18    /// - PTX hashes for kernel identity
19    /// - Timing data per node
20    pub fn enable_graph(&mut self) {
21        self.graph_enabled = true;
22    }
23
24    /// Disable execution graph tracking.
25    pub fn disable_graph(&mut self) {
26        self.graph_enabled = false;
27    }
28
29    /// Check if execution graph tracking is enabled.
30    #[must_use]
31    pub fn is_graph_enabled(&self) -> bool {
32        self.graph_enabled
33    }
34
35    /// Get the execution graph (immutable).
36    #[must_use]
37    pub fn execution_graph(&self) -> &crate::brick::exec_graph::ExecutionGraph {
38        &self.execution_graph
39    }
40
41    /// Get the execution graph (mutable).
42    pub fn execution_graph_mut(&mut self) -> &mut crate::brick::exec_graph::ExecutionGraph {
43        &mut self.execution_graph
44    }
45
46    /// Push a scope for hierarchical graph recording.
47    ///
48    /// # Example
49    ///
50    /// ```rust,ignore
51    /// profiler.enable_graph();
52    /// profiler.graph_push_scope(ExecutionNode::Layer { index: 0 });
53    /// // ... record bricks and kernels ...
54    /// profiler.graph_pop_scope();
55    /// ```
56    pub fn graph_push_scope(&mut self, node: ExecutionNode) -> Option<ExecutionNodeId> {
57        if !self.graph_enabled {
58            return None;
59        }
60        debug_assert!(
61            self.execution_graph.num_nodes() < 100_000,
62            "CB-BUDGET: execution graph has {} nodes, exceeds 100k budget",
63            self.execution_graph.num_nodes()
64        );
65        Some(self.execution_graph.push_scope(node))
66    }
67
68    /// Pop the current scope.
69    pub fn graph_pop_scope(&mut self) -> Option<ExecutionNodeId> {
70        if !self.graph_enabled {
71            return None;
72        }
73        self.execution_graph.pop_scope()
74    }
75
76    /// Record a brick in the execution graph.
77    ///
78    /// This should be called after `stop_brick()` with the timing data.
79    pub fn graph_record_brick(
80        &mut self,
81        brick_id: BrickId,
82        timing_ns: u64,
83        elements: u64,
84    ) -> Option<ExecutionNodeId> {
85        if !self.graph_enabled {
86            return None;
87        }
88        let node = ExecutionNode::Brick { id: brick_id, timing_ns, elements };
89        Some(self.execution_graph.add_node_in_scope(node))
90    }
91
92    /// Record a kernel launch in the execution graph.
93    ///
94    /// # Arguments
95    /// - `name`: Kernel name (e.g., "batched_q4k_gemv")
96    /// - `ptx_hash`: FNV-1a hash of PTX source for identity
97    /// - `grid`: Grid dimensions (blocks)
98    /// - `block`: Block dimensions (threads)
99    /// - `shared_mem`: Shared memory bytes
100    pub fn graph_record_kernel(
101        &mut self,
102        name: &str,
103        ptx_hash: u64,
104        grid: (u32, u32, u32),
105        block: (u32, u32, u32),
106        shared_mem: u32,
107    ) -> Option<ExecutionNodeId> {
108        if !self.graph_enabled {
109            return None;
110        }
111        Some(self.execution_graph.record_kernel_launch(name, ptx_hash, grid, block, shared_mem))
112    }
113
114    /// Export execution graph to DOT format for visualization.
115    ///
116    /// Use with Graphviz: `dot -Tsvg output.dot -o graph.svg`
117    #[must_use]
118    pub fn graph_to_dot(&self) -> String {
119        self.execution_graph.to_dot()
120    }
121
122    /// Export execution graph to trueno-graph CsrGraph.
123    #[cfg(feature = "execution-graph")]
124    #[must_use]
125    pub fn graph_to_csr(&self) -> trueno_graph::CsrGraph {
126        self.execution_graph.to_csr()
127    }
128
129    /// Clear the execution graph.
130    pub fn graph_clear(&mut self) {
131        self.execution_graph.clear();
132    }
133
134    /// Check if the execution graph scope stack is balanced.
135    #[must_use]
136    pub fn graph_is_scope_balanced(&self) -> bool {
137        self.execution_graph.is_scope_balanced()
138    }
139}