#[test]
fn test_execution_node_function_formatting() {
use crate::brick::exec_graph::{ExecutionGraph, ExecutionNode};
let mut graph = ExecutionGraph::new();
let func1 = graph.add_node(ExecutionNode::Function {
name: "test_func".to_string(),
file: Some("src/main.rs".to_string()),
line: Some(42),
});
let func2 = graph.add_node(ExecutionNode::Function {
name: "anonymous".to_string(),
file: None,
line: None,
});
let func3 = graph.add_node(ExecutionNode::Function {
name: "partial".to_string(),
file: Some("lib.rs".to_string()),
line: None,
});
let ascii = graph.to_ascii_tree();
assert!(ascii.contains("test_func"), "Should contain function name");
assert!(ascii.contains("anonymous"), "Should contain anonymous function");
assert!(graph.node(func1).is_some());
assert!(graph.node(func2).is_some());
assert!(graph.node(func3).is_some());
}
#[test]
fn test_execution_node_transfer_formatting() {
use crate::brick::exec_graph::{ExecutionGraph, ExecutionNode, TransferDirection};
let mut graph = ExecutionGraph::new();
let t1 = graph.add_node(ExecutionNode::Transfer {
src: "CPU".to_string(),
dst: "GPU".to_string(),
bytes: 1024 * 1024, direction: TransferDirection::H2D,
timing_ns: Some(5000),
});
let t2 = graph.add_node(ExecutionNode::Transfer {
src: "GPU".to_string(),
dst: "CPU".to_string(),
bytes: 512,
direction: TransferDirection::D2H,
timing_ns: None,
});
let t3 = graph.add_node(ExecutionNode::Transfer {
src: "GPU0".to_string(),
dst: "GPU1".to_string(),
bytes: 256,
direction: TransferDirection::D2D,
timing_ns: Some(100),
});
let ascii = graph.to_ascii_tree();
assert!(ascii.contains("CPU"), "Should contain CPU");
assert!(ascii.contains("GPU"), "Should contain GPU");
assert!(graph.node(t1).is_some());
assert!(graph.node(t2).is_some());
assert!(graph.node(t3).is_some());
}
#[test]
fn test_slowest_kernel_edge_cases() {
use crate::brick::exec_graph::{BrickId, ExecutionGraph, ExecutionNode};
let mut graph = ExecutionGraph::new();
graph.add_node(ExecutionNode::Brick { id: BrickId::RmsNorm, timing_ns: 100, elements: 1 });
graph.add_node(ExecutionNode::Brick {
id: BrickId::AttentionScore,
timing_ns: 50, elements: 1,
});
graph.add_node(ExecutionNode::Brick {
id: BrickId::GateProjection,
timing_ns: 200, elements: 1,
});
graph.add_node(ExecutionNode::Layer { index: 0 });
let slowest = graph.slowest_kernel();
assert!(slowest.is_none(), "No kernels added, should be None");
}
#[test]
fn test_execution_node_async_task_formatting() {
use crate::brick::exec_graph::{ExecutionGraph, ExecutionNode};
let mut graph = ExecutionGraph::new();
let task1 = graph.add_node(ExecutionNode::AsyncTask {
name: "load_weights".to_string(),
poll_count: 5,
yield_count: 3,
total_poll_ns: 10000,
});
let task2 = graph.add_node(ExecutionNode::AsyncTask {
name: "prefetch".to_string(),
poll_count: 1,
yield_count: 0,
total_poll_ns: 500,
});
let ascii = graph.to_ascii_tree();
assert!(ascii.contains("load_weights") || !ascii.is_empty());
assert!(graph.node(task1).is_some());
assert!(graph.node(task2).is_some());
}
#[test]
fn test_to_dot_all_node_types() {
use crate::brick::exec_graph::{BrickId, ExecutionGraph, ExecutionNode, TransferDirection};
let mut graph = ExecutionGraph::new();
let layer = graph.push_scope(ExecutionNode::Layer { index: 0 });
let brick = graph.add_node_in_scope(ExecutionNode::Brick {
id: BrickId::DownProjection,
timing_ns: 5000,
elements: 1024,
});
let kernel = graph.add_node_in_scope(ExecutionNode::Kernel {
name: "matmul_f32".to_string(),
ptx_hash: 0x1234567890abcdef,
grid: (32, 1, 1),
block: (256, 1, 1),
shared_mem: 1024,
timing_ns: Some(2500),
arithmetic_intensity: Some(1.5),
achieved_tflops: Some(0.8),
});
let func = graph.add_node_in_scope(ExecutionNode::Function {
name: "compute".to_string(),
file: Some("src/ops.rs".to_string()),
line: Some(100),
});
let transfer = graph.add_node_in_scope(ExecutionNode::Transfer {
src: "RAM".to_string(),
dst: "VRAM".to_string(),
bytes: 4096,
direction: TransferDirection::H2D,
timing_ns: Some(1000),
});
let async_task = graph.add_node_in_scope(ExecutionNode::AsyncTask {
name: "io_wait".to_string(),
poll_count: 3,
yield_count: 1,
total_poll_ns: 500,
});
graph.pop_scope();
let dot = graph.to_dot();
assert!(dot.contains("digraph ExecutionGraph"), "Should have digraph header");
assert!(dot.contains("Layer 0"), "Should contain layer");
assert!(dot.contains("matmul_f32"), "Should contain kernel name");
let _ = (layer, brick, kernel, func, transfer, async_task);
}
#[test]
fn test_slowest_kernel_with_kernels() {
use crate::brick::exec_graph::{BrickId, EdgeType, ExecutionGraph, ExecutionNode};
let mut graph = ExecutionGraph::new();
let brick1 = graph.add_node(ExecutionNode::Brick {
id: BrickId::RmsNorm,
timing_ns: 100, elements: 1,
});
let brick2 = graph.add_node(ExecutionNode::Brick {
id: BrickId::AttentionScore,
timing_ns: 500, elements: 1,
});
let brick3 = graph.add_node(ExecutionNode::Brick {
id: BrickId::GateProjection,
timing_ns: 200, elements: 1,
});
let kernel1 = graph.add_node(ExecutionNode::Kernel {
name: "kernel_fast".to_string(),
ptx_hash: 0x1111,
grid: (1, 1, 1),
block: (32, 1, 1),
shared_mem: 0,
timing_ns: Some(50),
arithmetic_intensity: None,
achieved_tflops: None,
});
let kernel2 = graph.add_node(ExecutionNode::Kernel {
name: "kernel_slow".to_string(),
ptx_hash: 0x2222,
grid: (1, 1, 1),
block: (32, 1, 1),
shared_mem: 0,
timing_ns: Some(250),
arithmetic_intensity: None,
achieved_tflops: None,
});
let kernel3 = graph.add_node(ExecutionNode::Kernel {
name: "kernel_medium".to_string(),
ptx_hash: 0x3333,
grid: (1, 1, 1),
block: (32, 1, 1),
shared_mem: 0,
timing_ns: Some(100),
arithmetic_intensity: None,
achieved_tflops: None,
});
graph.add_edge(brick1, kernel1, EdgeType::Launches);
graph.add_edge(brick2, kernel2, EdgeType::Launches);
graph.add_edge(brick3, kernel3, EdgeType::Launches);
let slowest = graph.slowest_kernel();
assert!(slowest.is_some(), "Should find slowest brick with kernel");
let (_, node, timing) = slowest.unwrap();
assert_eq!(timing, 500, "Slowest brick should have timing 500");
assert!(node.is_brick(), "Should be a brick node (not kernel)");
}