1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
//! Execution graph methods for BrickProfiler (PAR-201).
//!
//! Extracted from mod.rs to keep file sizes manageable.
//! Contains all `graph_*` methods for execution path tracking.
use super::BrickProfiler;
use crate::brick::exec_graph::{BrickId, ExecutionNode, ExecutionNodeId};
impl BrickProfiler {
// ========================================================================
// PAR-201: Execution Path Graph
// ========================================================================
/// Enable execution graph tracking.
///
/// When enabled, the profiler records the execution hierarchy:
/// - Layer → Brick → Kernel relationships
/// - PTX hashes for kernel identity
/// - Timing data per node
pub fn enable_graph(&mut self) {
self.graph_enabled = true;
}
/// Disable execution graph tracking.
pub fn disable_graph(&mut self) {
self.graph_enabled = false;
}
/// Check if execution graph tracking is enabled.
#[must_use]
pub fn is_graph_enabled(&self) -> bool {
self.graph_enabled
}
/// Get the execution graph (immutable).
#[must_use]
pub fn execution_graph(&self) -> &crate::brick::exec_graph::ExecutionGraph {
&self.execution_graph
}
/// Get the execution graph (mutable).
pub fn execution_graph_mut(&mut self) -> &mut crate::brick::exec_graph::ExecutionGraph {
&mut self.execution_graph
}
/// Push a scope for hierarchical graph recording.
///
/// # Example
///
/// ```rust,ignore
/// profiler.enable_graph();
/// profiler.graph_push_scope(ExecutionNode::Layer { index: 0 });
/// // ... record bricks and kernels ...
/// profiler.graph_pop_scope();
/// ```
pub fn graph_push_scope(&mut self, node: ExecutionNode) -> Option<ExecutionNodeId> {
if !self.graph_enabled {
return None;
}
debug_assert!(
self.execution_graph.num_nodes() < 100_000,
"CB-BUDGET: execution graph has {} nodes, exceeds 100k budget",
self.execution_graph.num_nodes()
);
Some(self.execution_graph.push_scope(node))
}
/// Pop the current scope.
pub fn graph_pop_scope(&mut self) -> Option<ExecutionNodeId> {
if !self.graph_enabled {
return None;
}
self.execution_graph.pop_scope()
}
/// Record a brick in the execution graph.
///
/// This should be called after `stop_brick()` with the timing data.
pub fn graph_record_brick(
&mut self,
brick_id: BrickId,
timing_ns: u64,
elements: u64,
) -> Option<ExecutionNodeId> {
if !self.graph_enabled {
return None;
}
let node = ExecutionNode::Brick { id: brick_id, timing_ns, elements };
Some(self.execution_graph.add_node_in_scope(node))
}
/// Record a kernel launch in the execution graph.
///
/// # Arguments
/// - `name`: Kernel name (e.g., "batched_q4k_gemv")
/// - `ptx_hash`: FNV-1a hash of PTX source for identity
/// - `grid`: Grid dimensions (blocks)
/// - `block`: Block dimensions (threads)
/// - `shared_mem`: Shared memory bytes
pub fn graph_record_kernel(
&mut self,
name: &str,
ptx_hash: u64,
grid: (u32, u32, u32),
block: (u32, u32, u32),
shared_mem: u32,
) -> Option<ExecutionNodeId> {
if !self.graph_enabled {
return None;
}
Some(self.execution_graph.record_kernel_launch(name, ptx_hash, grid, block, shared_mem))
}
/// Export execution graph to DOT format for visualization.
///
/// Use with Graphviz: `dot -Tsvg output.dot -o graph.svg`
#[must_use]
pub fn graph_to_dot(&self) -> String {
self.execution_graph.to_dot()
}
/// Export execution graph to trueno-graph CsrGraph.
#[cfg(feature = "execution-graph")]
#[must_use]
pub fn graph_to_csr(&self) -> trueno_graph::CsrGraph {
self.execution_graph.to_csr()
}
/// Clear the execution graph.
pub fn graph_clear(&mut self) {
self.execution_graph.clear();
}
/// Check if the execution graph scope stack is balanced.
#[must_use]
pub fn graph_is_scope_balanced(&self) -> bool {
self.execution_graph.is_scope_balanced()
}
}