use std::time::Instant;
use crate::backend::hip_dense::run_rocm_hip_dense_i32_add;
use crate::backend::hip_padic::run_rocm_hip_padic_valuation;
use crate::backend::hip_sheaf::run_rocm_hip_sheaf_overlap_i64;
use crate::backend::rocm::detect_local_rocm_hip;
use crate::domain::PadicDomain;
use crate::object::sheaf::{Cover, FiniteSite, Inclusion, OpenId, SectionTable};
use crate::{Error, Result};
pub const ROCM_BENCHMARK_REPORT_ARTIFACT: &str = "tokitai-rocm-hip-benchmark-report";
pub const ROCM_BENCHMARK_REPORT_VERSION: u32 = 1;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RocmBenchmarkReport {
pub artifact: String,
pub version: u32,
pub rows: Vec<RocmBenchmarkRow>,
pub non_claims: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RocmBenchmarkRow {
pub workload: String,
pub domain: String,
pub backend: String,
pub status: String,
pub scale: String,
pub device_fingerprint: String,
pub gfx: String,
pub hip_version: String,
pub driver_version: String,
pub kernel_source_fingerprint: String,
pub compiler_fingerprint: String,
pub cpu_oracle_matches: bool,
pub fallback_reason: String,
pub transfer_evidence: String,
pub launch_metadata: String,
pub wall_clock_ns: u128,
pub timing_scope: String,
}
impl RocmBenchmarkReport {
pub fn to_markdown(&self) -> String {
let mut lines = vec![
"# ROCm/HIP Benchmark and Conformance Report".to_string(),
String::new(),
format!("artifact: {}", self.artifact),
format!("version: {}", self.version),
String::new(),
"| Workload | Domain | Backend | Status | Scale | CPU oracle | Fallback | Kernel | Compiler | Timing ns |"
.to_string(),
"| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |".to_string(),
]
.into_iter()
.collect::<Vec<_>>();
for row in &self.rows {
lines.push(format!(
"| {} | {} | {} | {} | {} | {} | {} | {} | {} | {} |",
md(&row.workload),
md(&row.domain),
md(&row.backend),
md(&row.status),
md(&row.scale),
row.cpu_oracle_matches,
md(&row.fallback_reason),
md(&row.kernel_source_fingerprint),
md(&row.compiler_fingerprint),
row.wall_clock_ns
));
}
lines.push(String::new());
lines.push("## Non-Claims".to_string());
for item in &self.non_claims {
lines.push(format!("- {item}"));
}
lines.join("\n")
}
pub fn to_csv(&self) -> String {
let mut lines = vec![
"workload,domain,backend,status,scale,device_fingerprint,gfx,hip_version,driver_version,kernel_source_fingerprint,compiler_fingerprint,cpu_oracle_matches,fallback_reason,transfer_evidence,launch_metadata,wall_clock_ns,timing_scope".to_string(),
];
for row in &self.rows {
lines.push(
[
csv(&row.workload),
csv(&row.domain),
csv(&row.backend),
csv(&row.status),
csv(&row.scale),
csv(&row.device_fingerprint),
csv(&row.gfx),
csv(&row.hip_version),
csv(&row.driver_version),
csv(&row.kernel_source_fingerprint),
csv(&row.compiler_fingerprint),
row.cpu_oracle_matches.to_string(),
csv(&row.fallback_reason),
csv(&row.transfer_evidence),
csv(&row.launch_metadata),
row.wall_clock_ns.to_string(),
csv(&row.timing_scope),
]
.join(","),
);
}
lines.join("\n")
}
pub fn to_json(&self) -> String {
let rows = self
.rows
.iter()
.map(RocmBenchmarkRow::to_json)
.collect::<Vec<_>>()
.join(",");
format!(
"{{\"artifact\":{},\"version\":{},\"rows\":[{}],\"non_claims\":{}}}",
json_string(&self.artifact),
self.version,
rows,
json_array(&self.non_claims)
)
}
}
impl RocmBenchmarkRow {
fn to_json(&self) -> String {
format!(
"{{\"workload\":{},\"domain\":{},\"backend\":{},\"status\":{},\"scale\":{},\"device_fingerprint\":{},\"gfx\":{},\"hip_version\":{},\"driver_version\":{},\"kernel_source_fingerprint\":{},\"compiler_fingerprint\":{},\"cpu_oracle_matches\":{},\"fallback_reason\":{},\"transfer_evidence\":{},\"launch_metadata\":{},\"wall_clock_ns\":{},\"timing_scope\":{}}}",
json_string(&self.workload),
json_string(&self.domain),
json_string(&self.backend),
json_string(&self.status),
json_string(&self.scale),
json_string(&self.device_fingerprint),
json_string(&self.gfx),
json_string(&self.hip_version),
json_string(&self.driver_version),
json_string(&self.kernel_source_fingerprint),
json_string(&self.compiler_fingerprint),
self.cpu_oracle_matches,
json_string(&self.fallback_reason),
json_string(&self.transfer_evidence),
json_string(&self.launch_metadata),
self.wall_clock_ns,
json_string(&self.timing_scope)
)
}
}
pub fn generate_rocm_benchmark_report() -> Result<RocmBenchmarkReport> {
let device = detect_local_rocm_hip();
let mut rows = Vec::new();
if !device.available {
rows.extend(unavailable_rows(&device));
} else {
rows.push(dense_row("dense_i32_add_smoke", 8)?);
rows.push(dense_row("dense_i32_add_scale_1024", 1024)?);
rows.push(padic_row()?);
rows.push(sheaf_row("sheaf_overlap_compatible", true)?);
rows.push(sheaf_row("sheaf_overlap_incompatible", false)?);
rows.push(sheaf_fallback_row()?);
rows.push(portable_unavailable_row(&device));
}
Ok(RocmBenchmarkReport {
artifact: ROCM_BENCHMARK_REPORT_ARTIFACT.to_string(),
version: ROCM_BENCHMARK_REPORT_VERSION,
rows,
non_claims: vec![
"wall-clock timings are smoke measurements, not production speedup claims".to_string(),
"compile, transfer, launch, and host orchestration are not isolated with profiler precision"
.to_string(),
"ROCm rows are local hardware evidence, not portable AMD GPU support".to_string(),
],
})
}
fn dense_row(name: &str, len: usize) -> Result<RocmBenchmarkRow> {
let lhs = (0..len).map(|value| value as i32).collect::<Vec<_>>();
let rhs = (0..len)
.map(|value| 1000 - value as i32)
.collect::<Vec<_>>();
let start = Instant::now();
let report = run_rocm_hip_dense_i32_add(&lhs, &rhs)?;
let wall_clock_ns = start.elapsed().as_nanos();
let (device_fingerprint, gfx, hip_version, driver_version) =
device_fields(&report.device_evidence);
Ok(RocmBenchmarkRow {
workload: name.to_string(),
domain: "integer".to_string(),
backend: report.backend,
status: "passed".to_string(),
scale: format!("elements={len}"),
device_fingerprint,
gfx,
hip_version,
driver_version,
kernel_source_fingerprint: report.kernel_source_fingerprint,
compiler_fingerprint: report.compiler_fingerprint,
cpu_oracle_matches: report.cpu_oracle_matches,
fallback_reason: "none".to_string(),
transfer_evidence: "host_to_device_lhs;host_to_device_rhs;device_to_host_output"
.to_string(),
launch_metadata: format!("grid={};block={}", report.launch_grid, report.launch_block),
wall_clock_ns,
timing_scope: "compile+host_transfer+kernel+oracle_smoke_wall_clock".to_string(),
})
}
fn padic_row() -> Result<RocmBenchmarkRow> {
let domain = PadicDomain::new(5, 3)?;
let residues = [0, 1, 5, 25, 50, 75, 124, 125, 250, 625, 3125, 7];
let start = Instant::now();
let report = run_rocm_hip_padic_valuation(&domain, &residues)?;
let wall_clock_ns = start.elapsed().as_nanos();
let (device_fingerprint, gfx, hip_version, driver_version) =
device_fields(&report.device_evidence);
Ok(RocmBenchmarkRow {
workload: "padic_valuation_fixed_precision".to_string(),
domain: "padic:fixed_precision".to_string(),
backend: report.backend,
status: "passed".to_string(),
scale: format!("residues={}", report.residues.len()),
device_fingerprint,
gfx,
hip_version,
driver_version,
kernel_source_fingerprint: report.kernel_source_fingerprint,
compiler_fingerprint: report.compiler_fingerprint,
cpu_oracle_matches: report.cpu_oracle_matches,
fallback_reason: "none".to_string(),
transfer_evidence: "host_to_device_residues;device_to_host_valuations".to_string(),
launch_metadata: format!("grid={};block={}", report.launch_grid, report.launch_block),
wall_clock_ns,
timing_scope: "compile+host_transfer+kernel+padic_cpu_oracle_smoke_wall_clock".to_string(),
})
}
fn sheaf_row(name: &str, compatible: bool) -> Result<RocmBenchmarkRow> {
let (site, cover) = sheaf_fixture();
let mut sections = SectionTable::new();
sections.insert(open("A"), 13);
sections.insert(open("B"), if compatible { 13 } else { 17 });
sections.insert(open("A_cap_B"), 13);
let start = Instant::now();
let report = run_rocm_hip_sheaf_overlap_i64(&site, &cover, §ions)?;
let wall_clock_ns = start.elapsed().as_nanos();
let evidence = report
.device_evidence
.as_ref()
.ok_or_else(|| Error::verification("expected sheaf HIP row to execute"))?;
let (device_fingerprint, gfx, hip_version, driver_version) = device_fields(evidence);
Ok(RocmBenchmarkRow {
workload: name.to_string(),
domain: "sheaf:finite_site".to_string(),
backend: report.backend,
status: "passed".to_string(),
scale: format!("overlaps={}", report.overlap_inputs.len()),
device_fingerprint,
gfx,
hip_version,
driver_version,
kernel_source_fingerprint: report.kernel_source_fingerprint,
compiler_fingerprint: report
.compiler_fingerprint
.unwrap_or_else(|| "unknown".to_string()),
cpu_oracle_matches: report.cpu_oracle_matches,
fallback_reason: "none".to_string(),
transfer_evidence: "host_to_device_overlap_values;device_to_host_compatibility_flags"
.to_string(),
launch_metadata: format!(
"grid={};block={}",
report.launch_grid.unwrap_or(0),
report.launch_block.unwrap_or(0)
),
wall_clock_ns,
timing_scope: "compile+host_transfer+kernel+sectiontable_oracle_smoke_wall_clock"
.to_string(),
})
}
fn sheaf_fallback_row() -> Result<RocmBenchmarkRow> {
let (site, cover) = sheaf_fixture();
let mut sections = SectionTable::new();
sections.insert(open("A"), 13);
let start = Instant::now();
let report = run_rocm_hip_sheaf_overlap_i64(&site, &cover, §ions)?;
let wall_clock_ns = start.elapsed().as_nanos();
Ok(RocmBenchmarkRow {
workload: "sheaf_overlap_missing_section_fallback".to_string(),
domain: "sheaf:finite_site".to_string(),
backend: report.backend,
status: "fallback_captured".to_string(),
scale: "missing_section_fixture".to_string(),
device_fingerprint: "not_used".to_string(),
gfx: "not_used".to_string(),
hip_version: "not_used".to_string(),
driver_version: "not_used".to_string(),
kernel_source_fingerprint: report.kernel_source_fingerprint,
compiler_fingerprint: "not_used".to_string(),
cpu_oracle_matches: report.cpu_oracle_matches,
fallback_reason: report.fallback_reason.unwrap_or_else(|| "none".to_string()),
transfer_evidence: "no_device_transfer;cpu_obstruction_provenance_required".to_string(),
launch_metadata: "not_launched".to_string(),
wall_clock_ns,
timing_scope: "cpu_fallback_obstruction_capture_wall_clock".to_string(),
})
}
fn unavailable_rows(
device: &crate::backend::rocm::RocmHipCapabilityReport,
) -> Vec<RocmBenchmarkRow> {
["dense_i32_add", "padic_valuation", "sheaf_overlap"]
.into_iter()
.map(|workload| RocmBenchmarkRow {
workload: workload.to_string(),
domain: "unavailable".to_string(),
backend: "rocm_hip".to_string(),
status: "unavailable".to_string(),
scale: "not_run".to_string(),
device_fingerprint: device.capability_fingerprint.clone(),
gfx: "unknown".to_string(),
hip_version: device
.toolchain
.hip_version
.clone()
.unwrap_or_else(|| "unknown".to_string()),
driver_version: device
.toolchain
.driver_version
.clone()
.unwrap_or_else(|| "unknown".to_string()),
kernel_source_fingerprint: "not_built".to_string(),
compiler_fingerprint: "unknown".to_string(),
cpu_oracle_matches: true,
fallback_reason: device.evidence.join("; "),
transfer_evidence: "no_device_transfer".to_string(),
launch_metadata: "not_launched".to_string(),
wall_clock_ns: 0,
timing_scope: "unavailable_row".to_string(),
})
.collect()
}
fn portable_unavailable_row(
device: &crate::backend::rocm::RocmHipCapabilityReport,
) -> RocmBenchmarkRow {
RocmBenchmarkRow {
workload: "portable_rocm_hip_support".to_string(),
domain: "unavailable".to_string(),
backend: "rocm_hip_portability_gate".to_string(),
status: "unavailable".to_string(),
scale: "portable_matrix_not_claimed".to_string(),
device_fingerprint: device.capability_fingerprint.clone(),
gfx: device
.selected_device
.as_ref()
.map(|selected| selected.gfx.clone())
.unwrap_or_else(|| "unknown".to_string()),
hip_version: device
.toolchain
.hip_version
.clone()
.unwrap_or_else(|| "unknown".to_string()),
driver_version: device
.toolchain
.driver_version
.clone()
.unwrap_or_else(|| "unknown".to_string()),
kernel_source_fingerprint: "not_built".to_string(),
compiler_fingerprint: "unknown".to_string(),
cpu_oracle_matches: true,
fallback_reason:
"portable ROCm/HIP support is not claimed from a single local RX 7800 XT run"
.to_string(),
transfer_evidence: "no_device_transfer".to_string(),
launch_metadata: "not_launched".to_string(),
wall_clock_ns: 0,
timing_scope: "portable_support_unavailable_row".to_string(),
}
}
fn device_fields(
device: &crate::backend::rocm::RocmHipCapabilityReport,
) -> (String, String, String, String) {
(
device.capability_fingerprint.clone(),
device
.selected_device
.as_ref()
.map(|selected| selected.gfx.clone())
.unwrap_or_else(|| "unknown".to_string()),
device
.toolchain
.hip_version
.clone()
.unwrap_or_else(|| "unknown".to_string()),
device
.toolchain
.driver_version
.clone()
.unwrap_or_else(|| "unknown".to_string()),
)
}
fn sheaf_fixture() -> (FiniteSite, Cover) {
let site = FiniteSite::new(
vec![open("U"), open("A"), open("B"), open("A_cap_B")],
vec![
Inclusion::new("A", "U"),
Inclusion::new("B", "U"),
Inclusion::new("A_cap_B", "A"),
Inclusion::new("A_cap_B", "B"),
Inclusion::new("A_cap_B", "U"),
],
)
.with_intersection(open("A"), open("B"), open("A_cap_B"));
(site, Cover::new("U", ["A", "B"]))
}
fn open(id: &str) -> OpenId {
OpenId(id.to_string())
}
fn csv(value: &str) -> String {
if value.contains(',') || value.contains('"') || value.contains('\n') {
format!("\"{}\"", value.replace('"', "\"\""))
} else {
value.to_string()
}
}
fn json_array(values: &[String]) -> String {
format!(
"[{}]",
values
.iter()
.map(|value| json_string(value))
.collect::<Vec<_>>()
.join(",")
)
}
fn json_string(value: &str) -> String {
let mut escaped = String::from("\"");
for ch in value.chars() {
match ch {
'"' => escaped.push_str("\\\""),
'\\' => escaped.push_str("\\\\"),
'\n' => escaped.push_str("\\n"),
'\r' => escaped.push_str("\\r"),
'\t' => escaped.push_str("\\t"),
ch if ch.is_control() => escaped.push_str(&format!("\\u{:04x}", ch as u32)),
ch => escaped.push(ch),
}
}
escaped.push('"');
escaped
}
fn md(value: &str) -> String {
value.replace('|', "\\|")
}