#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize, Default)]
pub enum SimdWidth {
#[default]
Scalar,
Neon128,
Sse2,
Avx2,
Avx512,
WasmSimd128,
}
impl SimdWidth {
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn lanes(&self) -> usize {
match self {
SimdWidth::Scalar => 1,
SimdWidth::Neon128 | SimdWidth::Sse2 | SimdWidth::WasmSimd128 => 4,
SimdWidth::Avx2 => 8,
SimdWidth::Avx512 => 16,
}
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "score_range")]
pub fn compute_speedup(&self) -> f64 {
let result = match self {
SimdWidth::Scalar => 1.0,
SimdWidth::Neon128 | SimdWidth::Sse2 | SimdWidth::WasmSimd128 => 4.0,
SimdWidth::Avx2 => 10.0, SimdWidth::Avx512 => 12.0, };
debug_assert!(result >= 1.0, "speedup must be >= 1.0: {}", result);
result
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize, Default)]
pub enum GpuBackend {
#[default]
None,
Cuda,
Wgpu,
Metal,
Vulkan,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct CpuCapability {
pub vendor: String,
pub model: String,
pub cores: usize,
pub threads: usize,
pub simd: SimdWidth,
pub base_freq_ghz: f64,
pub peak_gflops: f64,
pub memory_bw_gbps: f64,
}
impl Default for CpuCapability {
fn default() -> Self {
Self {
vendor: "Unknown".to_string(),
model: "Unknown".to_string(),
cores: 1,
threads: 1,
simd: SimdWidth::Scalar,
base_freq_ghz: 3.0,
peak_gflops: 6.0, memory_bw_gbps: 25.0,
}
}
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct GpuCapability {
pub vendor: String,
pub model: String,
pub backend: GpuBackend,
pub compute_capability: Option<String>,
pub peak_tflops_fp32: f64,
pub peak_tflops_tensor: Option<f64>,
pub memory_bw_gbps: f64,
pub vram_gb: f64,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct RooflineParams {
pub cpu_arithmetic_intensity: f64,
pub gpu_arithmetic_intensity: Option<f64>,
}
impl Default for RooflineParams {
fn default() -> Self {
Self {
cpu_arithmetic_intensity: 0.24, gpu_arithmetic_intensity: None,
}
}
}
#[derive(Debug, Clone, Copy, Deserialize, Serialize)]
pub struct ByteBudget {
pub us_per_page: f64,
pub gb_per_sec: f64,
pub page_size: usize,
}
impl Default for ByteBudget {
fn default() -> Self {
let gb_per_sec = 25.0;
let bytes_per_sec = gb_per_sec * 1e9;
let pages_per_sec = bytes_per_sec / 4096.0;
Self {
us_per_page: 1_000_000.0 / pages_per_sec,
gb_per_sec,
page_size: 4096,
}
}
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct HardwareCapability {
pub timestamp: String,
pub hostname: String,
pub cpu: CpuCapability,
pub gpu: Option<GpuCapability>,
pub roofline: RooflineParams,
#[serde(default)]
pub byte_budget: Option<ByteBudget>,
}
impl Default for HardwareCapability {
fn default() -> Self {
Self {
timestamp: chrono::Utc::now().to_rfc3339(),
hostname: "unknown".to_string(),
cpu: CpuCapability::default(),
gpu: None,
roofline: RooflineParams::default(),
byte_budget: Some(ByteBudget::default()),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum Bottleneck {
Memory,
Compute,
}
impl HardwareCapability {
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn bottleneck(&self, arithmetic_intensity: f64, use_gpu: bool) -> Bottleneck {
let threshold = if use_gpu {
self.roofline.gpu_arithmetic_intensity.unwrap_or(f64::MAX)
} else {
self.roofline.cpu_arithmetic_intensity
};
if arithmetic_intensity < threshold {
Bottleneck::Memory
} else {
Bottleneck::Compute
}
}
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub fn default_hardware_path() -> PathBuf {
dirs::home_dir()
.unwrap_or_else(|| PathBuf::from("."))
.join(".pmat")
.join("hardware.toml")
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub fn load_hardware_capability(path: Option<&Path>) -> Option<HardwareCapability> {
let path = path
.map(PathBuf::from)
.unwrap_or_else(default_hardware_path);
if !path.exists() {
return None;
}
fs::read_to_string(&path)
.ok()
.and_then(|content| toml::from_str(&content).ok())
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub fn scale_budgets_for_hardware(
base_budgets: &[BrickBudget],
hardware: &HardwareCapability,
) -> Vec<BrickBudget> {
let simd_factor = hardware.cpu.simd.compute_speedup();
let mem_bw_factor = hardware.cpu.memory_bw_gbps / 25.0;
let scale_factor = (simd_factor * mem_bw_factor).sqrt();
base_budgets
.iter()
.map(|b| BrickBudget {
name: b.name.clone(),
max_us: b.max_us / scale_factor,
})
.collect()
}