use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use std::process::Command;
use crate::backend::BackendCapabilities;
use crate::backend::hardware::{DeviceCapabilities, DeviceKind, HardwareTarget, MemorySpace};
use crate::object::Representation;
use crate::{Error, Result};
pub const ROCM_HIP_HARDWARE_CONTRACT_ARTIFACT: &str = "tokitai-rocm-hip-hardware-contract";
pub const ROCM_HIP_HARDWARE_CONTRACT_VERSION: u32 = 1;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct RocmHipToolchain {
pub hipcc_path: String,
pub hip_version: Option<String>,
pub clang_version: Option<String>,
pub driver_version: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct RocmHipDevice {
pub index: usize,
pub marketing_name: String,
pub gfx: String,
pub compute_units: Option<u32>,
pub vram_bytes: Option<u64>,
pub node_id: Option<u32>,
pub selected: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RocmHipCapabilityReport {
pub artifact: String,
pub version: u32,
pub available: bool,
pub selected_device: Option<RocmHipDevice>,
pub devices: Vec<RocmHipDevice>,
pub toolchain: RocmHipToolchain,
pub source_commands: Vec<String>,
pub capability_fingerprint: String,
pub evidence: Vec<String>,
pub fallback_behavior: Vec<String>,
pub non_claims: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RocmHipCommandOutput {
pub command: String,
pub status_success: bool,
pub stdout: String,
pub stderr: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RocmPortabilityMatrix {
pub artifact: String,
pub version: u32,
pub rows: Vec<RocmPortabilityMatrixRow>,
pub claim_allowed: bool,
pub claim_blocker: String,
pub required_reviewed_device_count: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RocmPortabilityMatrixRow {
pub device_name: String,
pub gfx: String,
pub hip_version: String,
pub driver_version: String,
pub capability_fingerprint: String,
pub status: String,
pub claim_allowed: bool,
pub blocker: String,
}
impl RocmHipCapabilityReport {
pub fn unavailable(reason: impl Into<String>) -> Self {
let reason = reason.into();
let toolchain = RocmHipToolchain {
hipcc_path: "/opt/rocm/bin/hipcc".to_string(),
hip_version: None,
clang_version: None,
driver_version: None,
};
let mut report = Self {
artifact: ROCM_HIP_HARDWARE_CONTRACT_ARTIFACT.to_string(),
version: ROCM_HIP_HARDWARE_CONTRACT_VERSION,
available: false,
selected_device: None,
devices: Vec::new(),
toolchain,
source_commands: default_source_commands(),
capability_fingerprint: String::new(),
evidence: vec![format!("ROCm/HIP unavailable: {reason}")],
fallback_behavior: fallback_behavior(),
non_claims: non_claims(),
};
report.capability_fingerprint = report.compute_fingerprint();
report
}
pub fn backend_capabilities(&self) -> BackendCapabilities {
let mut semantic_degradations = vec![
"hardware_contract_only:no_kernel_admission".to_string(),
"requires_cpu_oracle_for_future_hip_lowerings".to_string(),
"unsupported:padic:fixed_precision".to_string(),
"unsupported:sheaf:finite_site".to_string(),
];
if !self.available {
semantic_degradations.push("unavailable:rocm_hip_not_detected".to_string());
}
BackendCapabilities {
name: "rocm_hip_hardware_contract".to_string(),
exact: false,
deterministic: false,
supported_representations: vec![Representation::dense_cpu().id().0],
supported_domains: vec!["hardware:rocm_hip".to_string()],
semantic_degradations,
}
}
pub fn device_capabilities(&self) -> DeviceCapabilities {
let target = if let Some(device) = &self.selected_device {
HardwareTarget {
id: format!("rocm_hip:{}:{}", device.index, device.gfx),
kind: DeviceKind::Gpu,
memory_space: MemorySpace::Device,
}
} else {
HardwareTarget {
id: "rocm_hip:unavailable".to_string(),
kind: DeviceKind::Gpu,
memory_space: MemorySpace::Device,
}
};
DeviceCapabilities::from_backend(target, self.backend_capabilities())
}
pub fn to_markdown(&self) -> String {
let mut lines = vec![
"# ROCm/HIP Hardware Contract".to_string(),
String::new(),
format!("artifact: {}", self.artifact),
format!("version: {}", self.version),
format!("available: {}", self.available),
format!("fingerprint: {}", self.capability_fingerprint),
format!("hipcc: {}", self.toolchain.hipcc_path),
format!(
"hip_version: {}",
self.toolchain.hip_version.as_deref().unwrap_or("unknown")
),
format!(
"driver_version: {}",
self.toolchain
.driver_version
.as_deref()
.unwrap_or("unknown")
),
String::new(),
"| Index | Selected | Marketing name | GFX | CUs | VRAM bytes | Node |".to_string(),
"| --- | --- | --- | --- | --- | --- | --- |".to_string(),
];
for device in &self.devices {
lines.push(format!(
"| {} | {} | {} | {} | {} | {} | {} |",
device.index,
device.selected,
md(&device.marketing_name),
md(&device.gfx),
option_u32(device.compute_units),
option_u64(device.vram_bytes),
option_u32(device.node_id)
));
}
lines.push(String::new());
lines.push("## Evidence".to_string());
for item in &self.evidence {
lines.push(format!("- {item}"));
}
lines.push(String::new());
lines.push("## Fallback Behavior".to_string());
for item in &self.fallback_behavior {
lines.push(format!("- {item}"));
}
lines.push(String::new());
lines.push("## Non-Claims".to_string());
for item in &self.non_claims {
lines.push(format!("- {item}"));
}
lines.join("\n")
}
pub fn to_json(&self) -> String {
let devices = self
.devices
.iter()
.map(RocmHipDevice::to_json)
.collect::<Vec<_>>()
.join(",");
format!(
"{{\"artifact\":{},\"version\":{},\"available\":{},\"selected_device\":{},\"devices\":[{}],\"toolchain\":{},\"source_commands\":{},\"capability_fingerprint\":{},\"evidence\":{},\"fallback_behavior\":{},\"non_claims\":{}}}",
json_string(&self.artifact),
self.version,
self.available,
self.selected_device
.as_ref()
.map_or_else(|| "null".to_string(), RocmHipDevice::to_json),
devices,
self.toolchain.to_json(),
json_array(&self.source_commands),
json_string(&self.capability_fingerprint),
json_array(&self.evidence),
json_array(&self.fallback_behavior),
json_array(&self.non_claims)
)
}
fn compute_fingerprint(&self) -> String {
let mut hasher = DefaultHasher::new();
self.available.hash(&mut hasher);
self.devices.hash(&mut hasher);
self.toolchain.hash(&mut hasher);
self.source_commands.hash(&mut hasher);
format!("rocmhip-{:016x}", hasher.finish())
}
}
impl RocmPortabilityMatrix {
pub fn from_capability_report(report: &RocmHipCapabilityReport) -> Self {
let hip_version = report
.toolchain
.hip_version
.clone()
.unwrap_or_else(|| "unknown".to_string());
let driver_version = report
.toolchain
.driver_version
.clone()
.unwrap_or_else(|| "unknown".to_string());
let rows = if report.devices.is_empty() {
vec![RocmPortabilityMatrixRow {
device_name: "unavailable".to_string(),
gfx: "unavailable".to_string(),
hip_version,
driver_version,
capability_fingerprint: report.capability_fingerprint.clone(),
status: "unavailable".to_string(),
claim_allowed: false,
blocker: "ROCm/HIP unavailable; portability support cannot be claimed".to_string(),
}]
} else {
report
.devices
.iter()
.map(|device| RocmPortabilityMatrixRow {
device_name: device.marketing_name.clone(),
gfx: device.gfx.clone(),
hip_version: hip_version.clone(),
driver_version: driver_version.clone(),
capability_fingerprint: report.capability_fingerprint.clone(),
status: if device.selected {
"local_selected_device".to_string()
} else {
"detected_not_selected".to_string()
},
claim_allowed: false,
blocker:
"single-host or unreviewed device evidence is not portable ROCm support"
.to_string(),
})
.collect()
};
let reviewed = rows.iter().filter(|row| row.claim_allowed).count();
Self {
artifact: "tokitai-rocm-portability-matrix".to_string(),
version: 1,
rows,
claim_allowed: reviewed >= 2,
claim_blocker:
"portable ROCm support requires at least two reviewed passing device/compiler combinations"
.to_string(),
required_reviewed_device_count: 2,
}
}
pub fn to_markdown(&self) -> String {
let mut lines = vec![
"# ROCm Portability Matrix".to_string(),
String::new(),
format!("artifact: {}", self.artifact),
format!("version: {}", self.version),
format!("claim_allowed: {}", self.claim_allowed),
format!("claim_blocker: {}", self.claim_blocker),
String::new(),
"| Device | GFX | HIP | Driver | Fingerprint | Status | Claim allowed | Blocker |"
.to_string(),
"| --- | --- | --- | --- | --- | --- | --- | --- |".to_string(),
];
for row in &self.rows {
lines.push(format!(
"| {} | {} | {} | {} | {} | {} | {} | {} |",
md(&row.device_name),
md(&row.gfx),
md(&row.hip_version),
md(&row.driver_version),
md(&row.capability_fingerprint),
md(&row.status),
row.claim_allowed,
md(&row.blocker)
));
}
lines.join("\n")
}
}
impl RocmHipDevice {
fn to_json(&self) -> String {
format!(
"{{\"index\":{},\"marketing_name\":{},\"gfx\":{},\"compute_units\":{},\"vram_bytes\":{},\"node_id\":{},\"selected\":{}}}",
self.index,
json_string(&self.marketing_name),
json_string(&self.gfx),
option_u32_json(self.compute_units),
option_u64_json(self.vram_bytes),
option_u32_json(self.node_id),
self.selected
)
}
}
impl RocmHipToolchain {
fn to_json(&self) -> String {
format!(
"{{\"hipcc_path\":{},\"hip_version\":{},\"clang_version\":{},\"driver_version\":{}}}",
json_string(&self.hipcc_path),
option_string_json(&self.hip_version),
option_string_json(&self.clang_version),
option_string_json(&self.driver_version)
)
}
}
pub fn detect_local_rocm_hip() -> RocmHipCapabilityReport {
let outputs = default_source_commands()
.into_iter()
.map(|command| run_shell_command(&command))
.collect::<Vec<_>>();
capability_report_from_command_outputs(outputs)
}
pub fn capability_report_from_command_outputs(
outputs: Vec<RocmHipCommandOutput>,
) -> RocmHipCapabilityReport {
if outputs.is_empty() {
return RocmHipCapabilityReport::unavailable("no command outputs supplied");
}
let source_commands = outputs
.iter()
.map(|output| output.command.clone())
.collect::<Vec<_>>();
let failed = outputs
.iter()
.filter(|output| !output.status_success)
.map(|output| format!("{} failed: {}", output.command, first_nonempty_line(output)))
.collect::<Vec<_>>();
let combined_rocminfo = command_stdout(&outputs, "rocminfo").unwrap_or_default();
let combined_rocm_smi = command_stdout(&outputs, "rocm-smi").unwrap_or_default();
let hipcc_output = command_stdout(&outputs, "hipcc").unwrap_or_default();
let mut devices = parse_rocm_smi_devices(&combined_rocm_smi);
merge_rocminfo_devices(&mut devices, &combined_rocminfo);
let toolchain = RocmHipToolchain {
hipcc_path: "/opt/rocm/bin/hipcc".to_string(),
hip_version: parse_prefixed_value(&hipcc_output, "HIP version:"),
clang_version: parse_first_line_containing(&hipcc_output, "clang version"),
driver_version: parse_prefixed_value(&combined_rocm_smi, "Driver version:"),
};
let selected_index = select_primary_device(&devices);
for device in &mut devices {
device.selected = Some(device.index) == selected_index;
}
let selected_device = selected_index
.and_then(|index| devices.iter().find(|device| device.index == index).cloned());
let available = failed.is_empty()
&& selected_device.is_some()
&& toolchain.hip_version.is_some()
&& toolchain.driver_version.is_some();
let mut evidence = Vec::new();
evidence.extend(failed);
if let Some(device) = &selected_device {
evidence.push(format!(
"selected ROCm/HIP device GPU[{}] {} {} compute_units={} vram_bytes={}",
device.index,
device.marketing_name,
device.gfx,
option_u32(device.compute_units),
option_u64(device.vram_bytes)
));
} else {
evidence.push("no selectable ROCm/HIP GPU device detected".to_string());
}
if let Some(version) = &toolchain.hip_version {
evidence.push(format!("hipcc reports HIP version {version}"));
}
if let Some(version) = &toolchain.driver_version {
evidence.push(format!("rocm-smi reports driver version {version}"));
}
if devices
.iter()
.any(|device| device.marketing_name.contains("RX 7800 XT") && device.gfx == "gfx1101")
{
evidence
.push("local validation target detected: AMD Radeon RX 7800 XT gfx1101".to_string());
}
let mut report = RocmHipCapabilityReport {
artifact: ROCM_HIP_HARDWARE_CONTRACT_ARTIFACT.to_string(),
version: ROCM_HIP_HARDWARE_CONTRACT_VERSION,
available,
selected_device,
devices,
toolchain,
source_commands,
capability_fingerprint: String::new(),
evidence,
fallback_behavior: fallback_behavior(),
non_claims: non_claims(),
};
report.capability_fingerprint = report.compute_fingerprint();
report
}
fn run_shell_command(command: &str) -> RocmHipCommandOutput {
let output = Command::new("sh").arg("-c").arg(command).output();
match output {
Ok(output) => RocmHipCommandOutput {
command: command.to_string(),
status_success: output.status.success(),
stdout: String::from_utf8_lossy(&output.stdout).into_owned(),
stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
},
Err(err) => RocmHipCommandOutput {
command: command.to_string(),
status_success: false,
stdout: String::new(),
stderr: err.to_string(),
},
}
}
fn parse_rocm_smi_devices(text: &str) -> Vec<RocmHipDevice> {
let mut devices = Vec::new();
for line in text.lines() {
if let Some((index, value)) = parse_gpu_line_value(line, "Card Series:") {
ensure_device(&mut devices, index).marketing_name = value;
} else if let Some((index, value)) = parse_gpu_line_value(line, "GFX Version:") {
ensure_device(&mut devices, index).gfx = value;
} else if let Some((index, value)) = parse_gpu_line_value(line, "VRAM Total Memory (B):") {
ensure_device(&mut devices, index).vram_bytes = value.parse().ok();
} else if let Some((index, value)) = parse_gpu_line_value(line, "Node ID:") {
ensure_device(&mut devices, index).node_id = value.parse().ok();
}
}
devices.sort_by_key(|device| device.index);
devices
}
fn merge_rocminfo_devices(devices: &mut Vec<RocmHipDevice>, text: &str) {
let mut current_gfx: Option<String> = None;
let mut current_name: Option<String> = None;
for line in text.lines() {
let trimmed = strip_ansi(line).trim().to_string();
if let Some(value) = trimmed.strip_prefix("Name:") {
let name = value.trim().to_string();
if name.starts_with("gfx") {
current_gfx = Some(name);
}
} else if let Some(value) = trimmed.strip_prefix("Marketing Name:") {
current_name = Some(value.trim().to_string());
} else if let Some(value) = trimmed.strip_prefix("Compute Unit:") {
let compute_units = value.trim().parse::<u32>().ok();
if let (Some(gfx), Some(compute_units)) = (¤t_gfx, compute_units) {
if let Some(device) = devices.iter_mut().find(|device| device.gfx == *gfx) {
device.compute_units = Some(compute_units);
if device.marketing_name.is_empty() {
device.marketing_name = current_name.clone().unwrap_or_else(|| gfx.clone());
}
} else {
devices.push(RocmHipDevice {
index: devices.len(),
marketing_name: current_name.clone().unwrap_or_else(|| gfx.clone()),
gfx: gfx.clone(),
compute_units: Some(compute_units),
vram_bytes: None,
node_id: None,
selected: false,
});
}
}
} else if trimmed.starts_with("*******") || trimmed.starts_with("Agent ") {
current_gfx = None;
current_name = None;
}
}
devices.sort_by_key(|device| device.index);
}
fn select_primary_device(devices: &[RocmHipDevice]) -> Option<usize> {
devices
.iter()
.filter(|device| !device.gfx.is_empty())
.max_by_key(|device| {
(
device.vram_bytes.unwrap_or(0),
device.compute_units.unwrap_or(0),
std::cmp::Reverse(device.index),
)
})
.map(|device| device.index)
}
fn parse_gpu_line_value(line: &str, key: &str) -> Option<(usize, String)> {
let clean = strip_ansi(line);
let clean = clean.trim();
let gpu_start = clean.find("GPU[")?;
let rest = &clean[gpu_start + 4..];
let end = rest.find(']')?;
let index = rest[..end].parse::<usize>().ok()?;
let key_start = clean.find(key)?;
Some((index, clean[key_start + key.len()..].trim().to_string()))
}
fn parse_prefixed_value(text: &str, prefix: &str) -> Option<String> {
text.lines().find_map(|line| {
let clean = strip_ansi(line);
clean
.trim()
.strip_prefix(prefix)
.map(|value| value.trim().to_string())
.filter(|value| !value.is_empty())
})
}
fn parse_first_line_containing(text: &str, needle: &str) -> Option<String> {
text.lines().find_map(|line| {
let clean = strip_ansi(line);
let trimmed = clean.trim();
if trimmed.contains(needle) {
Some(trimmed.to_string())
} else {
None
}
})
}
fn command_stdout(outputs: &[RocmHipCommandOutput], needle: &str) -> Option<String> {
outputs
.iter()
.find(|output| output.command.contains(needle))
.map(|output| output.stdout.clone())
}
fn first_nonempty_line(output: &RocmHipCommandOutput) -> String {
output
.stderr
.lines()
.chain(output.stdout.lines())
.map(str::trim)
.find(|line| !line.is_empty())
.unwrap_or("no diagnostic")
.to_string()
}
fn ensure_device(devices: &mut Vec<RocmHipDevice>, index: usize) -> &mut RocmHipDevice {
if let Some(position) = devices.iter().position(|device| device.index == index) {
return &mut devices[position];
}
devices.push(RocmHipDevice {
index,
marketing_name: String::new(),
gfx: String::new(),
compute_units: None,
vram_bytes: None,
node_id: None,
selected: false,
});
devices.last_mut().expect("device was just inserted")
}
fn strip_ansi(text: &str) -> String {
let mut output = String::with_capacity(text.len());
let mut chars = text.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '\u{1b}' && chars.peek() == Some(&'[') {
chars.next();
for next in chars.by_ref() {
if next.is_ascii_alphabetic() {
break;
}
}
} else {
output.push(ch);
}
}
output
}
fn default_source_commands() -> Vec<String> {
vec![
"/opt/rocm/bin/rocminfo".to_string(),
"/opt/rocm/bin/hipcc --version".to_string(),
"/opt/rocm/bin/rocm-smi --showproductname --showdriverversion --showvbios --showmeminfo vram"
.to_string(),
]
}
fn fallback_behavior() -> Vec<String> {
vec![
"default tests and CPU semantic execution do not require ROCm/HIP".to_string(),
"missing or partial ROCm/HIP evidence keeps HIP lowerings inadmissible".to_string(),
"future HIP kernels must compare against the CPU oracle before support claims".to_string(),
]
}
fn non_claims() -> Vec<String> {
vec![
"not a HIP kernel execution claim".to_string(),
"not portable AMD GPU support".to_string(),
"not p-adic or finite-site sheaf acceleration".to_string(),
"not production performance evidence".to_string(),
]
}
fn option_u32(value: Option<u32>) -> String {
value.map_or_else(|| "unknown".to_string(), |value| value.to_string())
}
fn option_u64(value: Option<u64>) -> String {
value.map_or_else(|| "unknown".to_string(), |value| value.to_string())
}
fn option_u32_json(value: Option<u32>) -> String {
value.map_or_else(|| "null".to_string(), |value| value.to_string())
}
fn option_u64_json(value: Option<u64>) -> String {
value.map_or_else(|| "null".to_string(), |value| value.to_string())
}
fn option_string_json(value: &Option<String>) -> String {
value
.as_ref()
.map_or_else(|| "null".to_string(), |value| json_string(value))
}
fn json_array(values: &[String]) -> String {
format!(
"[{}]",
values
.iter()
.map(|value| json_string(value))
.collect::<Vec<_>>()
.join(",")
)
}
fn json_string(value: &str) -> String {
let mut escaped = String::from("\"");
for ch in value.chars() {
match ch {
'"' => escaped.push_str("\\\""),
'\\' => escaped.push_str("\\\\"),
'\n' => escaped.push_str("\\n"),
'\r' => escaped.push_str("\\r"),
'\t' => escaped.push_str("\\t"),
ch if ch.is_control() => escaped.push_str(&format!("\\u{:04x}", ch as u32)),
ch => escaped.push(ch),
}
}
escaped.push('"');
escaped
}
fn md(value: &str) -> String {
value.replace('|', "\\|")
}
pub fn require_selected_rx_7800_xt(report: &RocmHipCapabilityReport) -> Result<()> {
let Some(device) = &report.selected_device else {
return Err(Error::backend(
"ROCm/HIP hardware contract has no selected device",
));
};
if device.marketing_name.contains("RX 7800 XT") && device.gfx == "gfx1101" {
Ok(())
} else {
Err(Error::backend(format!(
"expected local RX 7800 XT gfx1101 validation target, selected {} {}",
device.marketing_name, device.gfx
)))
}
}