trueno/brick/profiler/exec_graph_ext.rs
1//! Execution graph methods for BrickProfiler (PAR-201).
2//!
3//! Extracted from mod.rs to keep file sizes manageable.
4//! Contains all `graph_*` methods for execution path tracking.
5
6use super::BrickProfiler;
7use crate::brick::exec_graph::{BrickId, ExecutionNode, ExecutionNodeId};
8
9impl BrickProfiler {
10 // ========================================================================
11 // PAR-201: Execution Path Graph
12 // ========================================================================
13
14 /// Enable execution graph tracking.
15 ///
16 /// When enabled, the profiler records the execution hierarchy:
17 /// - Layer → Brick → Kernel relationships
18 /// - PTX hashes for kernel identity
19 /// - Timing data per node
20 pub fn enable_graph(&mut self) {
21 self.graph_enabled = true;
22 }
23
24 /// Disable execution graph tracking.
25 pub fn disable_graph(&mut self) {
26 self.graph_enabled = false;
27 }
28
29 /// Check if execution graph tracking is enabled.
30 #[must_use]
31 pub fn is_graph_enabled(&self) -> bool {
32 self.graph_enabled
33 }
34
35 /// Get the execution graph (immutable).
36 #[must_use]
37 pub fn execution_graph(&self) -> &crate::brick::exec_graph::ExecutionGraph {
38 &self.execution_graph
39 }
40
41 /// Get the execution graph (mutable).
42 pub fn execution_graph_mut(&mut self) -> &mut crate::brick::exec_graph::ExecutionGraph {
43 &mut self.execution_graph
44 }
45
46 /// Push a scope for hierarchical graph recording.
47 ///
48 /// # Example
49 ///
50 /// ```rust,ignore
51 /// profiler.enable_graph();
52 /// profiler.graph_push_scope(ExecutionNode::Layer { index: 0 });
53 /// // ... record bricks and kernels ...
54 /// profiler.graph_pop_scope();
55 /// ```
56 pub fn graph_push_scope(&mut self, node: ExecutionNode) -> Option<ExecutionNodeId> {
57 if !self.graph_enabled {
58 return None;
59 }
60 debug_assert!(
61 self.execution_graph.num_nodes() < 100_000,
62 "CB-BUDGET: execution graph has {} nodes, exceeds 100k budget",
63 self.execution_graph.num_nodes()
64 );
65 Some(self.execution_graph.push_scope(node))
66 }
67
68 /// Pop the current scope.
69 pub fn graph_pop_scope(&mut self) -> Option<ExecutionNodeId> {
70 if !self.graph_enabled {
71 return None;
72 }
73 self.execution_graph.pop_scope()
74 }
75
76 /// Record a brick in the execution graph.
77 ///
78 /// This should be called after `stop_brick()` with the timing data.
79 pub fn graph_record_brick(
80 &mut self,
81 brick_id: BrickId,
82 timing_ns: u64,
83 elements: u64,
84 ) -> Option<ExecutionNodeId> {
85 if !self.graph_enabled {
86 return None;
87 }
88 let node = ExecutionNode::Brick { id: brick_id, timing_ns, elements };
89 Some(self.execution_graph.add_node_in_scope(node))
90 }
91
92 /// Record a kernel launch in the execution graph.
93 ///
94 /// # Arguments
95 /// - `name`: Kernel name (e.g., "batched_q4k_gemv")
96 /// - `ptx_hash`: FNV-1a hash of PTX source for identity
97 /// - `grid`: Grid dimensions (blocks)
98 /// - `block`: Block dimensions (threads)
99 /// - `shared_mem`: Shared memory bytes
100 pub fn graph_record_kernel(
101 &mut self,
102 name: &str,
103 ptx_hash: u64,
104 grid: (u32, u32, u32),
105 block: (u32, u32, u32),
106 shared_mem: u32,
107 ) -> Option<ExecutionNodeId> {
108 if !self.graph_enabled {
109 return None;
110 }
111 Some(self.execution_graph.record_kernel_launch(name, ptx_hash, grid, block, shared_mem))
112 }
113
114 /// Export execution graph to DOT format for visualization.
115 ///
116 /// Use with Graphviz: `dot -Tsvg output.dot -o graph.svg`
117 #[must_use]
118 pub fn graph_to_dot(&self) -> String {
119 self.execution_graph.to_dot()
120 }
121
122 /// Export execution graph to trueno-graph CsrGraph.
123 #[cfg(feature = "execution-graph")]
124 #[must_use]
125 pub fn graph_to_csr(&self) -> trueno_graph::CsrGraph {
126 self.execution_graph.to_csr()
127 }
128
129 /// Clear the execution graph.
130 pub fn graph_clear(&mut self) {
131 self.execution_graph.clear();
132 }
133
134 /// Check if the execution graph scope stack is balanced.
135 #[must_use]
136 pub fn graph_is_scope_balanced(&self) -> bool {
137 self.execution_graph.is_scope_balanced()
138 }
139}