use tracing::debug;
use crate::error::DetectionError;
use crate::system_io::{Interconnect, InterconnectKind};
use super::command::{DEFAULT_TIMEOUT, run_tool};
const NVLINK_ARGS: &[&str] = &["nvlink", "-s"];
const TOPO_ARGS: &[&str] = &["topo", "-m"];
const ROCM_TOPO_ARGS: &[&str] = &["--showtopo"];
pub(crate) fn detect_interconnects(warnings: &mut Vec<DetectionError>) -> Vec<Interconnect> {
let mut interconnects = Vec::new();
detect_infiniband(&mut interconnects, warnings);
detect_nvlink(&mut interconnects, warnings);
detect_nvswitch(&mut interconnects, warnings);
detect_xgmi(&mut interconnects, warnings);
detect_tpu_ici(&mut interconnects);
interconnects
}
#[cfg(feature = "async-detect")]
pub(crate) async fn detect_interconnects_async() -> (Vec<Interconnect>, Vec<DetectionError>) {
let mut interconnects = Vec::new();
let mut warnings = Vec::new();
detect_infiniband(&mut interconnects, &mut warnings);
match super::command::run_tool_async("nvidia-smi", NVLINK_ARGS, DEFAULT_TIMEOUT).await {
Ok(output) => parse_nvlink_output(&output.stdout, &mut interconnects),
Err(DetectionError::ToolNotFound { .. }) => {}
Err(e) => warnings.push(e),
}
match super::command::run_tool_async("nvidia-smi", TOPO_ARGS, DEFAULT_TIMEOUT).await {
Ok(output) => parse_nvswitch_topo(&output.stdout, &mut interconnects),
Err(DetectionError::ToolNotFound { .. }) => {
detect_nvswitch_sysfs(&mut interconnects);
}
Err(e) => {
warnings.push(e);
detect_nvswitch_sysfs(&mut interconnects);
}
}
match super::command::run_tool_async("rocm-smi", ROCM_TOPO_ARGS, DEFAULT_TIMEOUT).await {
Ok(output) => parse_xgmi_topo(&output.stdout, &mut interconnects),
Err(DetectionError::ToolNotFound { .. }) => {
detect_xgmi_sysfs(&mut interconnects);
}
Err(e) => {
warnings.push(e);
detect_xgmi_sysfs(&mut interconnects);
}
}
detect_tpu_ici(&mut interconnects);
(interconnects, warnings)
}
fn detect_infiniband(interconnects: &mut Vec<Interconnect>, _warnings: &mut Vec<DetectionError>) {
let ib_dir = std::path::Path::new("/sys/class/infiniband");
let Ok(entries) = std::fs::read_dir(ib_dir) else {
return;
};
for entry in entries.flatten() {
let dev_name = entry.file_name().to_string_lossy().to_string();
let port_dir = entry.path().join("ports/1");
if !port_dir.exists() {
continue;
}
let state =
super::read_sysfs_string(&port_dir.join("state"), 256).map(|s| s.trim().to_string());
let rate_str = super::read_sysfs_string(&port_dir.join("rate"), 256).unwrap_or_default();
let bandwidth_gbps = parse_ib_rate(rate_str.trim());
let link_layer =
super::read_sysfs_string(&port_dir.join("link_layer"), 256).unwrap_or_default();
let kind = if link_layer.trim().eq_ignore_ascii_case("Ethernet") {
let is_v2 = detect_roce_version(&port_dir);
if is_v2 {
InterconnectKind::RoCEv2
} else {
InterconnectKind::RoCE
}
} else {
InterconnectKind::InfiniBand
};
debug!(name = %dev_name, %kind, bandwidth_gbps, "interconnect detected");
interconnects.push(Interconnect {
kind,
name: dev_name,
bandwidth_gbps,
state,
});
}
}
#[must_use]
#[inline]
pub fn parse_ib_rate(s: &str) -> f64 {
if let Some(gb_str) = s.split_whitespace().next()
&& let Ok(gbits) = gb_str.parse::<f64>()
{
gbits / crate::units::GBITS_PER_GBYTE } else {
0.0
}
}
fn detect_nvlink(interconnects: &mut Vec<Interconnect>, warnings: &mut Vec<DetectionError>) {
let output = match run_tool("nvidia-smi", NVLINK_ARGS, DEFAULT_TIMEOUT) {
Ok(o) => o,
Err(DetectionError::ToolNotFound { .. }) => return,
Err(e) => {
warnings.push(e);
return;
}
};
parse_nvlink_output(&output.stdout, interconnects);
}
pub fn parse_nvlink_output(stdout: &str, interconnects: &mut Vec<Interconnect>) {
let mut current_gpu = String::new();
let mut link_count = 0u32;
let mut link_bw = 0.0f64;
for line in stdout.lines() {
let trimmed = line.trim();
if trimmed.starts_with("GPU ") {
if link_count > 0 {
interconnects.push(Interconnect {
kind: InterconnectKind::NVLink,
name: std::mem::take(&mut current_gpu),
bandwidth_gbps: link_bw * link_count as f64,
state: Some(format!("{} links", link_count)),
});
}
current_gpu = trimmed.to_string();
link_count = 0;
link_bw = 0.0;
} else if trimmed.starts_with("Link ") {
link_count = link_count.saturating_add(1).min(256);
if let Some(bw_part) = trimmed.split(':').nth(1)
&& let Some(bw_str) = bw_part.split_whitespace().next()
&& let Ok(bw) = bw_str.parse::<f64>()
{
link_bw = bw;
}
}
}
if link_count > 0 {
interconnects.push(Interconnect {
kind: InterconnectKind::NVLink,
name: current_gpu,
bandwidth_gbps: link_bw * link_count as f64,
state: Some(format!("{} links", link_count)),
});
}
}
fn detect_nvswitch(interconnects: &mut Vec<Interconnect>, warnings: &mut Vec<DetectionError>) {
if detect_nvswitch_sysfs(interconnects) {
return;
}
let output = match run_tool("nvidia-smi", TOPO_ARGS, DEFAULT_TIMEOUT) {
Ok(o) => o,
Err(DetectionError::ToolNotFound { .. }) => return,
Err(e) => {
warnings.push(e);
return;
}
};
parse_nvswitch_topo(&output.stdout, interconnects);
}
fn detect_nvswitch_sysfs(interconnects: &mut Vec<Interconnect>) -> bool {
let nvswitch_dir = std::path::Path::new("/sys/devices/virtual/nvidia-nvswitch");
let Ok(nv_entries) = std::fs::read_dir(nvswitch_dir) else {
return false;
};
let mut found = false;
for entry in nv_entries.flatten() {
let name = entry.file_name().to_string_lossy().to_string();
if !name.starts_with("nvswitch") {
continue;
}
let bandwidth_gbps = 900.0;
debug!(name = %name, bandwidth_gbps, "NVSwitch detected via sysfs");
interconnects.push(Interconnect {
kind: InterconnectKind::NVSwitch,
name,
bandwidth_gbps,
state: Some("sysfs".into()),
});
found = true;
}
found
}
pub fn parse_nvswitch_topo(stdout: &str, interconnects: &mut Vec<Interconnect>) {
let mut max_nv_links = 0u32;
let mut gpu_count = 0u32;
for line in stdout.lines() {
let trimmed = line.trim();
if !trimmed.starts_with("GPU") {
continue;
}
let mut tokens = trimmed.split_whitespace();
let first = tokens.next().unwrap_or_default();
if let Some(second) = tokens.next()
&& second.starts_with("GPU")
{
continue; }
if !first
.strip_prefix("GPU")
.is_some_and(|rest| rest.chars().all(|c| c.is_ascii_digit()))
{
continue;
}
gpu_count += 1;
for token in trimmed.split_whitespace() {
if let Some(n_str) = token.strip_prefix("NV")
&& let Ok(n) = n_str.parse::<u32>()
{
max_nv_links = max_nv_links.max(n);
}
}
}
if max_nv_links >= 8 && gpu_count >= 2 {
let bandwidth_gbps = max_nv_links as f64 * 25.0;
debug!(
max_nv_links,
gpu_count, bandwidth_gbps, "NVSwitch detected via nvidia-smi topo"
);
interconnects.push(Interconnect {
kind: InterconnectKind::NVSwitch,
name: format!("NVSwitch ({gpu_count} GPUs)"),
bandwidth_gbps,
state: Some(format!("NV{max_nv_links}")),
});
}
}
fn detect_xgmi(interconnects: &mut Vec<Interconnect>, warnings: &mut Vec<DetectionError>) {
if detect_xgmi_sysfs(interconnects) {
return;
}
let output = match run_tool("rocm-smi", ROCM_TOPO_ARGS, DEFAULT_TIMEOUT) {
Ok(o) => o,
Err(DetectionError::ToolNotFound { .. }) => return,
Err(e) => {
warnings.push(e);
return;
}
};
parse_xgmi_topo(&output.stdout, interconnects);
}
fn detect_xgmi_sysfs(interconnects: &mut Vec<Interconnect>) -> bool {
let drm = std::path::Path::new("/sys/class/drm");
let Ok(drm_entries) = std::fs::read_dir(drm) else {
return false;
};
let mut hive_gpus: Vec<(String, Vec<String>)> = Vec::with_capacity(2);
for entry in drm_entries.flatten() {
let name = entry.file_name();
let name_str = name.to_string_lossy();
if !name_str.starts_with("card") || name_str.contains('-') {
continue;
}
let device_dir = entry.path().join("device");
let driver_link = device_dir.join("driver");
let driver_name = std::fs::read_link(&driver_link)
.ok()
.and_then(|p| p.file_name().map(|n| n.to_string_lossy().into_owned()));
if driver_name.as_deref() != Some("amdgpu") {
continue;
}
let hive_id = super::read_sysfs_string(&device_dir.join("xgmi_hive_info"), 256)
.or_else(|| super::read_sysfs_string(&device_dir.join("xgmi_hive_id"), 256));
if let Some(hive) = hive_id {
let hive = hive.trim().to_string();
if !hive.is_empty() && hive != "0" && hive != "0x0" {
if let Some((_, gpus)) = hive_gpus.iter_mut().find(|(h, _)| h == &hive) {
gpus.push(name_str.to_string());
} else {
hive_gpus.push((hive, vec![name_str.to_string()]));
}
}
}
}
let mut found = false;
for (hive_id, gpus) in &hive_gpus {
if gpus.len() < 2 {
continue;
}
let bandwidth_gbps = 400.0;
debug!(
hive_id,
gpu_count = gpus.len(),
bandwidth_gbps,
"AMD XGMI hive detected via sysfs"
);
interconnects.push(Interconnect {
kind: InterconnectKind::XgmiInfinityFabric,
name: format!("XGMI hive {} ({} GPUs)", hive_id, gpus.len()),
bandwidth_gbps,
state: Some(format!("{} GPUs", gpus.len())),
});
found = true;
}
found
}
pub fn parse_xgmi_topo(stdout: &str, interconnects: &mut Vec<Interconnect>) {
let mut xgmi_gpu_count = 0u32;
let mut in_link_section = false;
for line in stdout.lines() {
let trimmed = line.trim();
if trimmed.contains("Link Type") || trimmed.contains("Topology Information") {
in_link_section = true;
continue;
}
if in_link_section && trimmed.starts_with("====") {
if xgmi_gpu_count > 0 {
break;
}
continue;
}
if !in_link_section {
continue;
}
if trimmed.starts_with("GPU") && trimmed.contains("XGMI") {
xgmi_gpu_count += 1;
}
}
if xgmi_gpu_count >= 2 {
let bandwidth_gbps = 400.0; debug!(
xgmi_gpu_count,
bandwidth_gbps, "XGMI detected via rocm-smi --showtopo"
);
interconnects.push(Interconnect {
kind: InterconnectKind::XgmiInfinityFabric,
name: format!("XGMI ({xgmi_gpu_count} GPUs)"),
bandwidth_gbps,
state: Some(format!("{xgmi_gpu_count} GPUs")),
});
}
}
fn detect_tpu_ici(interconnects: &mut Vec<Interconnect>) {
let mut total_chips = 0u32;
let mut version_str = String::new();
for device_id in super::iter_dev_devices("accel") {
let base = format!("/sys/class/accel/accel{device_id}/device");
let driver_link = format!("{base}/driver");
if let Ok(target) = std::fs::read_link(&driver_link)
&& target.to_string_lossy().contains("amdxdna")
{
continue;
}
let ver_path = format!("{base}/tpu_version");
let ver = super::read_sysfs_string(std::path::Path::new(&ver_path), 256);
if ver.is_none() {
continue; }
if version_str.is_empty()
&& let Some(ref v) = ver
{
version_str = v.trim().to_string();
}
let chip_path = format!("{base}/chip_count");
if let Some(count_str) = super::read_sysfs_string(std::path::Path::new(&chip_path), 64)
&& let Ok(n) = count_str.trim().parse::<u32>()
&& n > 0
{
total_chips += n;
} else {
total_chips += 1;
}
}
if total_chips < 2 {
return;
}
let per_chip_gbps = if version_str.contains("v5p") {
409.6
} else if version_str.contains("v5e") || version_str.contains("v5litepod") {
204.8
} else if version_str.contains("v4") {
192.0
} else {
204.8 };
let bandwidth_gbps = per_chip_gbps * total_chips as f64;
debug!(
total_chips,
version = %version_str,
bandwidth_gbps,
"Google ICI detected"
);
interconnects.push(Interconnect {
kind: InterconnectKind::Ici,
name: format!("ICI ({total_chips} chips)"),
bandwidth_gbps,
state: Some(format!("{total_chips} chips, {version_str}")),
});
}
fn detect_roce_version(port_dir: &std::path::Path) -> bool {
let types_dir = port_dir.join("gid_attrs").join("types");
let Ok(type_entries) = std::fs::read_dir(&types_dir) else {
return false;
};
for entry in type_entries.flatten() {
if let Some(content) = super::read_sysfs_string(&entry.path(), 256)
&& content.trim().contains("RoCE v2")
{
return true;
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_ib_rate_hdr() {
assert!((parse_ib_rate("200 Gb/sec (4X HDR)") - 25.0).abs() < 0.01);
}
#[test]
fn parse_ib_rate_ndr() {
assert!((parse_ib_rate("400 Gb/sec (4X NDR)") - 50.0).abs() < 0.01);
}
#[test]
fn parse_ib_rate_empty() {
assert_eq!(parse_ib_rate(""), 0.0);
}
#[test]
fn parse_nvswitch_topo_dgx_h100() {
let output = "\
\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\t
GPU0\t X \tNV18\tNV18\tNV18\tNV18\tNV18\tNV18\tNV18\t
GPU1\tNV18\t X \tNV18\tNV18\tNV18\tNV18\tNV18\tNV18\t
GPU2\tNV18\tNV18\t X \tNV18\tNV18\tNV18\tNV18\tNV18\t
GPU3\tNV18\tNV18\tNV18\t X \tNV18\tNV18\tNV18\tNV18\t
GPU4\tNV18\tNV18\tNV18\tNV18\t X \tNV18\tNV18\tNV18\t
GPU5\tNV18\tNV18\tNV18\tNV18\tNV18\t X \tNV18\tNV18\t
GPU6\tNV18\tNV18\tNV18\tNV18\tNV18\tNV18\t X \tNV18\t
GPU7\tNV18\tNV18\tNV18\tNV18\tNV18\tNV18\tNV18\t X \t
";
let mut interconnects = Vec::new();
parse_nvswitch_topo(output, &mut interconnects);
assert_eq!(interconnects.len(), 1);
assert_eq!(interconnects[0].kind, InterconnectKind::NVSwitch);
assert_eq!(interconnects[0].bandwidth_gbps, 18.0 * 25.0);
assert_eq!(interconnects[0].state.as_deref(), Some("NV18"));
}
#[test]
fn parse_nvswitch_topo_dgx_a100() {
let output = "\
\tGPU0\tGPU1\tGPU2\tGPU3\tGPU4\tGPU5\tGPU6\tGPU7\t
GPU0\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t
GPU1\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t
GPU2\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\tNV12\t
GPU3\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\tNV12\t
GPU4\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\tNV12\t
GPU5\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\tNV12\t
GPU6\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \tNV12\t
GPU7\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\tNV12\t X \t
";
let mut interconnects = Vec::new();
parse_nvswitch_topo(output, &mut interconnects);
assert_eq!(interconnects.len(), 1);
assert_eq!(interconnects[0].kind, InterconnectKind::NVSwitch);
assert_eq!(interconnects[0].bandwidth_gbps, 12.0 * 25.0);
}
#[test]
fn parse_nvswitch_topo_no_nvswitch() {
let output = "\
\tGPU0\tGPU1\t
GPU0\t X \tSYS\t
GPU1\tSYS\t X \t
";
let mut interconnects = Vec::new();
parse_nvswitch_topo(output, &mut interconnects);
assert!(interconnects.is_empty());
}
#[test]
fn parse_nvswitch_topo_low_nvlink_no_switch() {
let output = "\
\tGPU0\tGPU1\t
GPU0\t X \tNV2\t
GPU1\tNV2\t X \t
";
let mut interconnects = Vec::new();
parse_nvswitch_topo(output, &mut interconnects);
assert!(interconnects.is_empty());
}
#[test]
fn parse_nvswitch_topo_empty() {
let mut interconnects = Vec::new();
parse_nvswitch_topo("", &mut interconnects);
assert!(interconnects.is_empty());
}
#[test]
fn parse_nvswitch_topo_single_gpu() {
let output = "GPU0\t X \t\n";
let mut interconnects = Vec::new();
parse_nvswitch_topo(output, &mut interconnects);
assert!(interconnects.is_empty());
}
#[test]
fn parse_xgmi_topo_two_gpus() {
let output = "\
========================= Link Type between two GPUs =========================
GPU0 GPU1
GPU0 0 XGMI
GPU1 XGMI 0
";
let mut interconnects = Vec::new();
parse_xgmi_topo(output, &mut interconnects);
assert_eq!(interconnects.len(), 1);
assert_eq!(interconnects[0].kind, InterconnectKind::XgmiInfinityFabric);
assert_eq!(interconnects[0].state.as_deref(), Some("2 GPUs"));
}
#[test]
fn parse_xgmi_topo_four_gpus() {
let output = "\
========================= Topology Information =========================
GPU0 GPU1 GPU2 GPU3
GPU0 0 XGMI XGMI XGMI
GPU1 XGMI 0 XGMI XGMI
GPU2 XGMI XGMI 0 XGMI
GPU3 XGMI XGMI XGMI 0
";
let mut interconnects = Vec::new();
parse_xgmi_topo(output, &mut interconnects);
assert_eq!(interconnects.len(), 1);
assert_eq!(interconnects[0].state.as_deref(), Some("4 GPUs"));
}
#[test]
fn parse_xgmi_topo_no_xgmi() {
let output = "\
========================= Topology Information =========================
GPU0 GPU1
GPU0 0 PCIE
GPU1 PCIE 0
";
let mut interconnects = Vec::new();
parse_xgmi_topo(output, &mut interconnects);
assert!(interconnects.is_empty());
}
#[test]
fn parse_xgmi_topo_empty() {
let mut interconnects = Vec::new();
parse_xgmi_topo("", &mut interconnects);
assert!(interconnects.is_empty());
}
#[test]
fn parse_xgmi_topo_single_gpu() {
let output = "\
========================= Link Type between two GPUs =========================
GPU0
GPU0 0
";
let mut interconnects = Vec::new();
parse_xgmi_topo(output, &mut interconnects);
assert!(interconnects.is_empty());
}
#[test]
fn parse_nvswitch_topo_mixed_nv_counts() {
let output = "\
\tGPU0\tGPU1\tGPU2\tGPU3\t
GPU0\t X \tNV12\tNV4\tNV4\t
GPU1\tNV12\t X \tNV4\tNV4\t
GPU2\tNV4\tNV4\t X \tNV12\t
GPU3\tNV4\tNV4\tNV12\t X \t
";
let mut interconnects = Vec::new();
parse_nvswitch_topo(output, &mut interconnects);
assert_eq!(interconnects.len(), 1);
assert_eq!(interconnects[0].bandwidth_gbps, 12.0 * 25.0);
}
#[test]
fn parse_nvswitch_topo_header_not_counted_as_gpu() {
let output = "\
\tGPU0\tGPU1\t
GPU0\t X \tNV12\t
GPU1\tNV12\t X \t
";
let mut interconnects = Vec::new();
parse_nvswitch_topo(output, &mut interconnects);
assert_eq!(interconnects.len(), 1);
assert!(interconnects[0].name.contains("2 GPUs"));
}
#[test]
fn parse_nvlink_output_zero_bandwidth() {
let output = "\
GPU 0: NVIDIA RTX 4090 (UUID: GPU-aaa)
Link 0:
Link 1: GB/s
";
let mut interconnects = Vec::new();
parse_nvlink_output(output, &mut interconnects);
assert_eq!(interconnects.len(), 1);
assert_eq!(interconnects[0].bandwidth_gbps, 0.0);
}
#[test]
fn parse_xgmi_topo_multi_section_output() {
let output = "\
===================== Inter Node Access (different P2P protocols) ===========
GPU0 GPU1
GPU0 0 15
GPU1 15 0
========================= Link Type between two GPUs =========================
GPU0 GPU1
GPU0 0 XGMI
GPU1 XGMI 0
========================= Weight between two GPUs =========================
GPU0 GPU1
GPU0 0 15
GPU1 15 0
";
let mut interconnects = Vec::new();
parse_xgmi_topo(output, &mut interconnects);
assert_eq!(interconnects.len(), 1);
assert_eq!(interconnects[0].state.as_deref(), Some("2 GPUs"));
}
#[test]
fn parse_nvswitch_topo_gpu_count_correct_with_full_header() {
let output = "\
GPU0 GPU1 CPU Affinity NUMA Affinity GPU NUMA ID
GPU0 X NV12 0-15 0 N/A
GPU1 NV12 X 0-15 0 N/A
Legend:
X = Self
SYS = Connection traversing PCIe as well as the SMP interconnect
NV# = Connection traversing a bonded set of # NVLinks
";
let mut interconnects = Vec::new();
parse_nvswitch_topo(output, &mut interconnects);
assert_eq!(interconnects.len(), 1);
assert!(interconnects[0].name.contains("2 GPUs"));
}
}