hardware 0.0.9

A no_std bare-metal hardware abstraction layer — all port I/O, memory and swap allocations are guarded at runtime. Do not consider this dependency stable before x.1.x
Documentation
use core::sync::atomic::{AtomicU32, AtomicUsize, Ordering};

#[derive(Clone, Copy, PartialEq, Eq)]
pub enum Component {
    Cpu,
    Ram,
    Gpu,
    Tpu,
    Lpu,
}

#[derive(Clone, Copy, PartialEq, Eq)]
pub enum Precision {
    Fp32,
    Fp16,
    Bf16,
    Int8,
    Int4,
}

const COMP_COUNT: usize = 5;

fn comp_idx(c: Component) -> usize {
    match c {
        Component::Cpu => 0,
        Component::Ram => 1,
        Component::Gpu => 2,
        Component::Tpu => 3,
        Component::Lpu => 4,
    }
}

static TEMP_MILLIDEG: [AtomicU32; COMP_COUNT] = [const { AtomicU32::new(0) }; COMP_COUNT];
static TEMP_LIMIT: [AtomicU32; COMP_COUNT] = [const { AtomicU32::new(105_000) }; COMP_COUNT];

static FREQ_HZ: [AtomicUsize; COMP_COUNT] = [const { AtomicUsize::new(0) }; COMP_COUNT];
static FREQ_MIN: [AtomicUsize; COMP_COUNT] = [const { AtomicUsize::new(0) }; COMP_COUNT];
static FREQ_MAX: [AtomicUsize; COMP_COUNT] = [const { AtomicUsize::new(usize::MAX) }; COMP_COUNT];

static CYCLES: [AtomicUsize; COMP_COUNT] = [const { AtomicUsize::new(0) }; COMP_COUNT];
static OPS_COUNT: [AtomicUsize; COMP_COUNT] = [const { AtomicUsize::new(0) }; COMP_COUNT];

static PRECISION: [AtomicU32; COMP_COUNT] = [const { AtomicU32::new(0) }; COMP_COUNT];

fn precision_to_u32(p: Precision) -> u32 {
    match p {
        Precision::Fp32 => 0,
        Precision::Fp16 => 1,
        Precision::Bf16 => 2,
        Precision::Int8 => 3,
        Precision::Int4 => 4,
    }
}

fn u32_to_precision(v: u32) -> Precision {
    match v {
        1 => Precision::Fp16,
        2 => Precision::Bf16,
        3 => Precision::Int8,
        4 => Precision::Int4,
        _ => Precision::Fp32,
    }
}

pub fn read_temperature(comp: Component) -> u32 {
    let idx = comp_idx(comp);
    let hw = match comp {
        Component::Cpu => read_cpu_temp(),
        Component::Gpu => read_accel_temp(comp),
        Component::Tpu => read_accel_temp(comp),
        Component::Lpu => read_accel_temp(comp),
        Component::Ram => 0,
    };
    if hw != 0 {
        TEMP_MILLIDEG[idx].store(hw, Ordering::Release);
        return hw;
    }
    TEMP_MILLIDEG[idx].load(Ordering::Acquire)
}

pub fn set_temperature(comp: Component, millideg: u32) {
    TEMP_MILLIDEG[comp_idx(comp)].store(millideg, Ordering::Release);
}

pub fn set_temp_limit(comp: Component, millideg: u32) {
    TEMP_LIMIT[comp_idx(comp)].store(millideg, Ordering::Release);
}

pub fn temp_limit(comp: Component) -> u32 {
    TEMP_LIMIT[comp_idx(comp)].load(Ordering::Acquire)
}

pub fn is_throttled(comp: Component) -> bool {
    let current = read_temperature(comp);
    let limit = temp_limit(comp);
    current > 0 && current >= limit
}

pub fn set_frequency(comp: Component, hz: usize) {
    let idx = comp_idx(comp);
    let min = FREQ_MIN[idx].load(Ordering::Acquire);
    let max = FREQ_MAX[idx].load(Ordering::Acquire);
    let clamped = if hz < min {
        min
    } else if hz > max {
        max
    } else {
        hz
    };
    FREQ_HZ[idx].store(clamped, Ordering::Release);
    match comp {
        Component::Cpu => crate::power::dvfs::set_frequency(clamped as u64),
        Component::Gpu => apply_accel_freq(comp, clamped),
        Component::Tpu => apply_accel_freq(comp, clamped),
        Component::Lpu => apply_accel_freq(comp, clamped),
        Component::Ram => {}
    }
}

pub fn frequency(comp: Component) -> usize {
    let idx = comp_idx(comp);
    let stored = FREQ_HZ[idx].load(Ordering::Acquire);
    if stored != 0 {
        return stored;
    }
    if let Component::Cpu = comp {
        return crate::power::dvfs::current_frequency() as usize;
    }
    0
}

pub fn set_freq_bounds(comp: Component, min_hz: usize, max_hz: usize) {
    let idx = comp_idx(comp);
    FREQ_MIN[idx].store(min_hz, Ordering::Release);
    FREQ_MAX[idx].store(max_hz, Ordering::Release);
    let current = FREQ_HZ[idx].load(Ordering::Acquire);
    if current != 0 {
        let clamped = if current < min_hz {
            min_hz
        } else if current > max_hz {
            max_hz
        } else {
            current
        };
        if clamped != current {
            set_frequency(comp, clamped);
        }
    }
}

pub fn freq_min(comp: Component) -> usize {
    FREQ_MIN[comp_idx(comp)].load(Ordering::Acquire)
}

pub fn freq_max(comp: Component) -> usize {
    FREQ_MAX[comp_idx(comp)].load(Ordering::Acquire)
}

pub fn record_cycles(comp: Component, count: usize) {
    CYCLES[comp_idx(comp)].fetch_add(count, Ordering::AcqRel);
}

pub fn read_cycles(comp: Component) -> usize {
    let idx = comp_idx(comp);
    if let Component::Cpu = comp {
        let hw = crate::debug::perf::read_timestamp();
        CYCLES[idx].store(hw as usize, Ordering::Release);
        return hw as usize;
    }
    CYCLES[idx].load(Ordering::Acquire)
}

pub fn reset_cycles(comp: Component) {
    CYCLES[comp_idx(comp)].store(0, Ordering::Release);
}

pub fn record_ops(comp: Component, count: usize) {
    OPS_COUNT[comp_idx(comp)].fetch_add(count, Ordering::AcqRel);
}

pub fn read_ops(comp: Component) -> usize {
    OPS_COUNT[comp_idx(comp)].load(Ordering::Acquire)
}

pub fn reset_ops(comp: Component) {
    OPS_COUNT[comp_idx(comp)].store(0, Ordering::Release);
}

pub fn set_precision(comp: Component, prec: Precision) {
    PRECISION[comp_idx(comp)].store(precision_to_u32(prec), Ordering::Release);
}

pub fn precision(comp: Component) -> Precision {
    u32_to_precision(PRECISION[comp_idx(comp)].load(Ordering::Acquire))
}

pub fn element_size(comp: Component) -> usize {
    match precision(comp) {
        Precision::Fp32 => 4,
        Precision::Fp16 => 2,
        Precision::Bf16 => 2,
        Precision::Int8 => 1,
        Precision::Int4 => 1,
    }
}

pub struct ComponentSnapshot {
    pub component: Component,
    pub temp_millideg: u32,
    pub temp_limit: u32,
    pub throttled: bool,
    pub freq_hz: usize,
    pub freq_min: usize,
    pub freq_max: usize,
    pub cycles: usize,
    pub ops: usize,
    pub precision: Precision,
}

pub fn snapshot(comp: Component) -> ComponentSnapshot {
    ComponentSnapshot {
        component: comp,
        temp_millideg: read_temperature(comp),
        temp_limit: temp_limit(comp),
        throttled: is_throttled(comp),
        freq_hz: frequency(comp),
        freq_min: freq_min(comp),
        freq_max: freq_max(comp),
        cycles: read_cycles(comp),
        ops: read_ops(comp),
        precision: precision(comp),
    }
}

pub fn snapshot_all() -> [ComponentSnapshot; COMP_COUNT] {
    [
        snapshot(Component::Cpu),
        snapshot(Component::Ram),
        snapshot(Component::Gpu),
        snapshot(Component::Tpu),
        snapshot(Component::Lpu),
    ]
}

pub fn reset_all() {
    for i in 0..COMP_COUNT {
        TEMP_MILLIDEG[i].store(0, Ordering::Release);
        TEMP_LIMIT[i].store(105_000, Ordering::Release);
        FREQ_HZ[i].store(0, Ordering::Release);
        FREQ_MIN[i].store(0, Ordering::Release);
        FREQ_MAX[i].store(usize::MAX, Ordering::Release);
        CYCLES[i].store(0, Ordering::Release);
        OPS_COUNT[i].store(0, Ordering::Release);
        PRECISION[i].store(0, Ordering::Release);
    }
}

fn read_cpu_temp() -> u32 {
    if let Some(zone_temp) = crate::thermal::api::read_thermal_zone(0) {
        return zone_temp;
    }
    0
}

fn read_accel_temp(comp: Component) -> u32 {
    let reg_offset: usize = 0x08;
    let arch = crate::arch::detect_arch();
    let raw = match (comp, arch) {
        (Component::Gpu, crate::arch::Architecture::X86_64) => {
            crate::arch::x86_64::gpu::read_gpu_reg(reg_offset)
        }
        (Component::Gpu, crate::arch::Architecture::AArch64) => {
            crate::arch::aarch64::gpu::read_gpu_reg(reg_offset)
        }
        (Component::Tpu, crate::arch::Architecture::X86_64) => {
            crate::arch::x86_64::tpu::read_tpu_reg(reg_offset)
        }
        (Component::Tpu, crate::arch::Architecture::AArch64) => {
            crate::arch::aarch64::tpu::read_tpu_reg(reg_offset)
        }
        (Component::Lpu, crate::arch::Architecture::X86_64) => {
            crate::arch::x86_64::lpu::read_lpu_reg(reg_offset)
        }
        (Component::Lpu, crate::arch::Architecture::AArch64) => {
            crate::arch::aarch64::lpu::read_lpu_reg(reg_offset)
        }
        _ => 0,
    };
    if raw > 0 && raw < 200_000 {
        return raw;
    }
    0
}

fn apply_accel_freq(comp: Component, hz: usize) {
    let reg_offset: usize = 0x0C;
    let val = (hz / 1_000_000) as u32;
    let arch = crate::arch::detect_arch();
    match (comp, arch) {
        (Component::Gpu, crate::arch::Architecture::X86_64) => {
            crate::arch::x86_64::gpu::write_gpu_reg(reg_offset, val)
        }
        (Component::Gpu, crate::arch::Architecture::AArch64) => {
            crate::arch::aarch64::gpu::write_gpu_reg(reg_offset, val)
        }
        (Component::Tpu, crate::arch::Architecture::X86_64) => {
            crate::arch::x86_64::tpu::write_tpu_reg(reg_offset, val)
        }
        (Component::Tpu, crate::arch::Architecture::AArch64) => {
            crate::arch::aarch64::tpu::write_tpu_reg(reg_offset, val)
        }
        (Component::Lpu, crate::arch::Architecture::X86_64) => {
            crate::arch::x86_64::lpu::write_lpu_reg(reg_offset, val)
        }
        (Component::Lpu, crate::arch::Architecture::AArch64) => {
            crate::arch::aarch64::lpu::write_lpu_reg(reg_offset, val)
        }
        _ => {}
    }
}