Skip to main content

trueno/brick/exec_graph/node/
execution.rs

1#![allow(missing_docs)]
2//! Execution Path Graph Types (PAR-201)
3//!
4//! Node, edge, and transfer types for the execution hierarchy.
5
6use super::BrickId;
7
8/// Node ID in the execution graph.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
10pub struct ExecutionNodeId(pub u32);
11
12impl ExecutionNodeId {
13    /// Maximum node ID budget (100k nodes).
14    pub const MAX_BUDGET: u32 = 100_000;
15
16    /// Validate this node ID is within budget.
17    #[inline]
18    pub fn validate(self) -> bool {
19        debug_assert!(
20            self.0 < Self::MAX_BUDGET,
21            "CB-BUDGET: node id {} exceeds max budget {}",
22            self.0,
23            Self::MAX_BUDGET
24        );
25        self.0 < Self::MAX_BUDGET
26    }
27}
28
29/// Execution graph node types.
30///
31/// PAR-201: Represents different levels of the execution hierarchy.
32#[derive(Debug, Clone)]
33pub enum ExecutionNode {
34    /// High-level brick (BrickId from v2)
35    Brick { id: BrickId, timing_ns: u64, elements: u64 },
36    /// GPU kernel launch
37    Kernel {
38        name: String,
39        /// FNV-1a hash of PTX source for identity
40        ptx_hash: u64,
41        /// Grid dimensions (blocks)
42        grid: (u32, u32, u32),
43        /// Block dimensions (threads)
44        block: (u32, u32, u32),
45        /// Shared memory bytes
46        shared_mem: u32,
47        /// Kernel execution time in nanoseconds (Phase 9: for CPA)
48        timing_ns: Option<u64>,
49        /// Arithmetic intensity (FLOPs/byte) for roofline analysis (Phase 9)
50        arithmetic_intensity: Option<f32>,
51        /// Achieved throughput in TFLOP/s (Phase 9)
52        achieved_tflops: Option<f32>,
53    },
54    /// Memory transfer operation (Phase 9: data movement topology)
55    Transfer {
56        /// Source location description
57        src: String,
58        /// Destination location description
59        dst: String,
60        /// Bytes transferred
61        bytes: u64,
62        /// Transfer direction
63        direction: TransferDirection,
64        /// Transfer time in nanoseconds
65        timing_ns: Option<u64>,
66    },
67    /// Rust function (from DWARF or manual annotation)
68    Function { name: String, file: Option<String>, line: Option<u32> },
69    /// Transformer layer grouping
70    Layer { index: u32 },
71    /// Phase 11 (E.9.4): Async task metrics for poll efficiency tracking
72    AsyncTask {
73        /// Task name for identification
74        name: String,
75        /// Number of times poll() was called
76        poll_count: u64,
77        /// Number of times poll() returned Pending
78        yield_count: u64,
79        /// Total time spent in poll() (nanoseconds)
80        total_poll_ns: u64,
81    },
82}
83
84impl ExecutionNode {
85    /// Get the display name of this node.
86    pub fn name(&self) -> String {
87        match self {
88            Self::Brick { id, .. } => id.name().to_string(),
89            Self::Kernel { name, .. } => name.clone(),
90            Self::Function { name, .. } => name.clone(),
91            Self::Layer { index } => format!("Layer{}", index),
92            Self::Transfer { src, dst, direction, .. } => {
93                let dir = match direction {
94                    TransferDirection::H2D => "H2D",
95                    TransferDirection::D2H => "D2H",
96                    TransferDirection::D2D => "D2D",
97                };
98                format!("{}:{}->{}", dir, src, dst)
99            }
100            Self::AsyncTask { name, .. } => name.clone(),
101        }
102    }
103
104    /// Check if this is a kernel node.
105    pub fn is_kernel(&self) -> bool {
106        matches!(self, Self::Kernel { .. })
107    }
108
109    /// Check if this is a brick node.
110    pub fn is_brick(&self) -> bool {
111        matches!(self, Self::Brick { .. })
112    }
113
114    /// Check if this is a transfer node.
115    pub fn is_transfer(&self) -> bool {
116        matches!(self, Self::Transfer { .. })
117    }
118
119    /// Get timing if available (bricks, kernels, and transfers).
120    pub fn timing_ns(&self) -> Option<u64> {
121        match self {
122            Self::Brick { timing_ns, .. } => Some(*timing_ns),
123            Self::Kernel { timing_ns, .. } => *timing_ns,
124            Self::Transfer { timing_ns, .. } => *timing_ns,
125            _ => None,
126        }
127    }
128
129    /// Get PTX hash if available (kernels only).
130    pub fn ptx_hash(&self) -> Option<u64> {
131        match self {
132            Self::Kernel { ptx_hash, .. } => Some(*ptx_hash),
133            _ => None,
134        }
135    }
136
137    /// Get arithmetic intensity if available (kernels only, Phase 9).
138    pub fn arithmetic_intensity(&self) -> Option<f32> {
139        match self {
140            Self::Kernel { arithmetic_intensity, .. } => *arithmetic_intensity,
141            _ => None,
142        }
143    }
144
145    /// Get achieved TFLOP/s if available (kernels only, Phase 9).
146    pub fn achieved_tflops(&self) -> Option<f32> {
147        match self {
148            Self::Kernel { achieved_tflops, .. } => *achieved_tflops,
149            _ => None,
150        }
151    }
152
153    /// Get transfer bytes if available (transfers only, Phase 9).
154    pub fn transfer_bytes(&self) -> Option<u64> {
155        match self {
156            Self::Transfer { bytes, .. } => Some(*bytes),
157            _ => None,
158        }
159    }
160}
161
162/// Edge types in execution graph.
163///
164/// PAR-201: Describes relationships between execution nodes.
165/// Phase 9 (E.7.12): Added DependsOn and Transfer for advanced profiling.
166#[derive(Debug, Clone, PartialEq)]
167pub enum EdgeType {
168    /// Function calls function
169    Calls,
170    /// Brick contains sub-operations
171    Contains,
172    /// Function launches GPU kernel
173    Launches,
174    /// Temporal sequence (A happens before B)
175    Sequence,
176    /// Dependency edge for critical path analysis (CUDA events, stream sync)
177    /// PAR-201 Phase 9: CPA requires tracking true dependencies vs containment
178    DependsOn,
179    /// Data transfer edge with byte count (H2D/D2H/D2D)
180    /// PAR-201 Phase 9: For data movement topology and ping-pong detection
181    Transfer {
182        /// Bytes transferred
183        bytes: u64,
184        /// Transfer direction
185        direction: TransferDirection,
186    },
187}
188
189/// Direction of memory transfer.
190///
191/// PAR-201 Phase 9: Used with EdgeType::Transfer for data movement analysis.
192#[derive(Debug, Clone, Copy, PartialEq, Eq)]
193pub enum TransferDirection {
194    /// Host to Device
195    H2D,
196    /// Device to Host
197    D2H,
198    /// Device to Device
199    D2D,
200}
201
202/// An edge in the execution graph.
203#[derive(Debug, Clone)]
204pub struct ExecutionEdge {
205    /// Source node ID
206    pub src: ExecutionNodeId,
207    /// Destination node ID
208    pub dst: ExecutionNodeId,
209    /// Edge type
210    pub edge_type: EdgeType,
211    /// Optional weight (e.g., call count, timing)
212    pub weight: f32,
213}