use std::collections::HashMap;
use crate::brick::exec_graph::node::{
EdgeType, ExecutionEdge, ExecutionNode, ExecutionNodeId, TransferDirection,
};
#[derive(Debug, Default)]
pub struct ExecutionGraph {
pub(crate) nodes: Vec<ExecutionNode>,
pub(crate) edges: Vec<ExecutionEdge>,
pub(crate) scope_stack: Vec<ExecutionNodeId>,
pub(crate) name_to_id: HashMap<String, ExecutionNodeId>,
}
impl ExecutionGraph {
pub fn new() -> Self {
Self::default()
}
pub fn add_node(&mut self, node: ExecutionNode) -> ExecutionNodeId {
let id = ExecutionNodeId(self.nodes.len() as u32);
let name = node.name();
self.name_to_id.insert(name, id);
self.nodes.push(node);
id
}
pub fn add_edge(&mut self, src: ExecutionNodeId, dst: ExecutionNodeId, edge_type: EdgeType) {
debug_assert!(
(src.0 as usize) < self.nodes.len(),
"CB-BUDGET: src node {} does not exist (graph has {} nodes)",
src.0,
self.nodes.len()
);
debug_assert!(
(dst.0 as usize) < self.nodes.len(),
"CB-BUDGET: dst node {} does not exist (graph has {} nodes)",
dst.0,
self.nodes.len()
);
self.edges.push(ExecutionEdge { src, dst, edge_type, weight: 1.0 });
}
pub fn add_weighted_edge(
&mut self,
src: ExecutionNodeId,
dst: ExecutionNodeId,
edge_type: EdgeType,
weight: f32,
) {
self.edges.push(ExecutionEdge { src, dst, edge_type, weight });
}
pub fn push_scope(&mut self, node: ExecutionNode) -> ExecutionNodeId {
let id = self.add_node(node);
if let Some(&parent) = self.scope_stack.last() {
self.add_edge(parent, id, EdgeType::Contains);
}
self.scope_stack.push(id);
id
}
pub fn pop_scope(&mut self) -> Option<ExecutionNodeId> {
self.scope_stack.pop()
}
pub fn current_scope(&self) -> Option<ExecutionNodeId> {
self.scope_stack.last().copied()
}
pub fn add_node_in_scope(&mut self, node: ExecutionNode) -> ExecutionNodeId {
let id = self.add_node(node);
if let Some(&parent) = self.scope_stack.last() {
self.add_edge(parent, id, EdgeType::Contains);
}
id
}
pub fn record_kernel_launch(
&mut self,
name: &str,
ptx_hash: u64,
grid: (u32, u32, u32),
block: (u32, u32, u32),
shared_mem: u32,
) -> ExecutionNodeId {
debug_assert!(grid.0 > 0 && grid.1 > 0 && grid.2 > 0, "CB-BUDGET: grid dims must be > 0");
debug_assert!(
block.0 > 0 && block.1 > 0 && block.2 > 0,
"CB-BUDGET: block dims must be > 0"
);
let kernel = ExecutionNode::Kernel {
name: name.to_string(),
ptx_hash,
grid,
block,
shared_mem,
timing_ns: None,
arithmetic_intensity: None,
achieved_tflops: None,
};
let kernel_id = self.add_node(kernel);
if let Some(&parent) = self.scope_stack.last() {
self.add_edge(parent, kernel_id, EdgeType::Launches);
}
kernel_id
}
#[allow(clippy::too_many_arguments)]
pub fn record_kernel_launch_with_metrics(
&mut self,
name: &str,
ptx_hash: u64,
grid: (u32, u32, u32),
block: (u32, u32, u32),
shared_mem: u32,
timing_ns: u64,
arithmetic_intensity: f32,
achieved_tflops: f32,
) -> ExecutionNodeId {
let kernel = ExecutionNode::Kernel {
name: name.to_string(),
ptx_hash,
grid,
block,
shared_mem,
timing_ns: Some(timing_ns),
arithmetic_intensity: Some(arithmetic_intensity),
achieved_tflops: Some(achieved_tflops),
};
let kernel_id = self.add_node(kernel);
if let Some(&parent) = self.scope_stack.last() {
self.add_edge(parent, kernel_id, EdgeType::Launches);
}
kernel_id
}
pub fn record_transfer(
&mut self,
src: &str,
dst: &str,
bytes: u64,
direction: TransferDirection,
timing_ns: Option<u64>,
) -> ExecutionNodeId {
let transfer = ExecutionNode::Transfer {
src: src.to_string(),
dst: dst.to_string(),
bytes,
direction,
timing_ns,
};
let transfer_id = self.add_node(transfer);
if let Some(&parent) = self.scope_stack.last() {
self.add_edge(parent, transfer_id, EdgeType::Contains);
}
transfer_id
}
pub fn add_dependency(&mut self, from: ExecutionNodeId, to: ExecutionNodeId) {
self.add_edge(from, to, EdgeType::DependsOn);
}
pub fn node(&self, id: ExecutionNodeId) -> Option<&ExecutionNode> {
self.nodes.get(id.0 as usize)
}
pub fn node_by_name(&self, name: &str) -> Option<(ExecutionNodeId, &ExecutionNode)> {
self.name_to_id.get(name).and_then(|&id| self.nodes.get(id.0 as usize).map(|n| (id, n)))
}
pub fn nodes(&self) -> &[ExecutionNode] {
&self.nodes
}
pub fn edges(&self) -> &[ExecutionEdge] {
&self.edges
}
pub fn num_nodes(&self) -> usize {
self.nodes.len()
}
pub fn num_edges(&self) -> usize {
self.edges.len()
}
pub fn outgoing_edges(&self, node: ExecutionNodeId) -> impl Iterator<Item = &ExecutionEdge> {
self.edges.iter().filter(move |e| e.src == node)
}
pub fn incoming_edges(&self, node: ExecutionNodeId) -> impl Iterator<Item = &ExecutionEdge> {
self.edges.iter().filter(move |e| e.dst == node)
}
pub fn kernel_nodes(&self) -> impl Iterator<Item = (ExecutionNodeId, &ExecutionNode)> {
self.nodes
.iter()
.enumerate()
.filter(|(_, n)| n.is_kernel())
.map(|(i, n)| (ExecutionNodeId(i as u32), n))
}
pub fn slowest_kernel(&self) -> Option<(ExecutionNodeId, &ExecutionNode, u64)> {
let mut slowest: Option<(ExecutionNodeId, &ExecutionNode, u64)> = None;
for (id, node) in self.nodes.iter().enumerate() {
if let ExecutionNode::Brick { timing_ns, .. } = node {
let node_id = ExecutionNodeId(id as u32);
let has_kernel =
self.outgoing_edges(node_id).any(|e| e.edge_type == EdgeType::Launches);
if has_kernel {
match &slowest {
None => slowest = Some((node_id, node, *timing_ns)),
Some((_, _, t)) if *timing_ns > *t => {
slowest = Some((node_id, node, *timing_ns))
}
Some(_) => {} }
}
}
}
slowest
}
pub fn clear(&mut self) {
self.nodes.clear();
self.edges.clear();
self.scope_stack.clear();
self.name_to_id.clear();
}
pub fn is_scope_balanced(&self) -> bool {
self.scope_stack.is_empty()
}
}