tpu-sg2002 0.1.0

TPU driver in Rust for SG2002 SoC.
Documentation
//! TPU Performance Monitoring Unit (PMU) for SG2002.

use crate::{registers, PhysAddr};
use core::ptr::NonNull;

/// PMU buffer guard value.
pub const TPUPMU_BUFGUARD: u32 = 0x1234_5678;

/// TPU PMU event types.
#[repr(u32)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum TpuPmuEvent {
    /// Bank conflict event.
    BankConflict = 0x0,
    /// Stall count event.
    StallCnt = 0x1,
    /// TDMA bandwidth event.
    #[default]
    TdmaBw = 0x2,
    /// TDMA write strobe event.
    TdmaWstrb = 0x3,
}

/// TPU PMU record types.
#[repr(u32)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TpuPmuType {
    /// TDMA load operation.
    TdmaLoad = 1,
    /// TDMA store operation.
    TdmaStore = 2,
    /// TDMA move operation.
    TdmaMove = 3,
    /// TIU operation.
    Tiu = 4,
}

impl TryFrom<u8> for TpuPmuType {
    type Error = ();

    fn try_from(value: u8) -> Result<Self, Self::Error> {
        match value {
            1 => Ok(TpuPmuType::TdmaLoad),
            2 => Ok(TpuPmuType::TdmaStore),
            3 => Ok(TpuPmuType::TdmaMove),
            4 => Ok(TpuPmuType::Tiu),
            _ => Err(()),
        }
    }
}

/// TPU PMU double event record.
///
/// This structure matches the hardware format for PMU records.
/// Each record contains timing and event information.
#[repr(C, packed)]
#[derive(Debug, Clone, Copy, Default)]
pub struct TpuPmuDoubleEvent {
    /// Packed field containing type, descriptor ID, and event counts.
    /// - bits[3:0]: type
    /// - bits[19:4]: descriptor ID
    /// - bits[41:20]: event count 0
    /// - bits[63:42]: event count 1
    pub type_and_desc: u64,
    /// End time in cycles.
    pub end_time: u32,
    /// Start time in cycles.
    pub start_time: u32,
}

impl TpuPmuDoubleEvent {
    /// Size of a PMU double event record in bytes.
    pub const SIZE: usize = core::mem::size_of::<Self>();

    /// Get the PMU event type.
    pub fn event_type(&self) -> u8 {
        (self.type_and_desc & 0xF) as u8
    }

    /// Get the descriptor ID.
    pub fn desc_id(&self) -> u16 {
        ((self.type_and_desc >> 4) & 0xFFFF) as u16
    }

    /// Get event count 0.
    pub fn event_cnt0(&self) -> u32 {
        ((self.type_and_desc >> 20) & 0x3F_FFFF) as u32
    }

    /// Get event count 1.
    pub fn event_cnt1(&self) -> u32 {
        ((self.type_and_desc >> 42) & 0x3F_FFFF) as u32
    }

    /// Get the duration in cycles.
    pub fn duration(&self) -> u32 {
        self.end_time.wrapping_sub(self.start_time)
    }

    /// Try to get the typed PMU record type.
    pub fn pmu_type(&self) -> Option<TpuPmuType> {
        TpuPmuType::try_from(self.event_type()).ok()
    }

    /// Check if this record is a guard value (invalid/empty).
    pub fn is_guard(&self) -> bool {
        (self.type_and_desc as u32) == TPUPMU_BUFGUARD
    }
}

/// TPU PMU configuration.
#[derive(Debug, Clone, Copy, Default)]
pub struct TpuPmuConfig {
    pub enable: bool,
    pub enable_tpu: bool,
    pub enable_tdma: bool,
    pub event: TpuPmuEvent,
    pub buf_base_addr: u32,
    pub buf_size: u32,
}

/// TPU PMU controller.
pub struct TpuPmu;

impl TpuPmu {
    /// Enable or disable PMU.
    pub fn enable(
        tdma_base: NonNull<u8>,
        enable: bool,
        event: TpuPmuEvent,
        buf_paddr: PhysAddr,
        buf_size: u32,
    ) {
        if enable {
            Self::write32(tdma_base, registers::TPUPMU_BUFBASE, (buf_paddr >> 4) as u32);
            Self::write32(tdma_base, registers::TPUPMU_BUFSIZE, buf_size >> 4);
            let ctrl = 0x1 | 0x8 | 0x10 | ((event as u32) << 5) | (0x3 << 8) | (1 << 10);
            Self::write32(tdma_base, registers::TPUPMU_CTRL, ctrl);
        } else {
            let ctrl = Self::read32(tdma_base, registers::TPUPMU_CTRL);
            Self::write32(tdma_base, registers::TPUPMU_CTRL, ctrl & !0x1);
        }
    }

    /// Reset PMU buffer with guard values.
    pub fn reset_buffer(buf: &mut [u8]) {
        for chunk in buf.chunks_exact_mut(16) {
            chunk[0..4].copy_from_slice(&TPUPMU_BUFGUARD.to_le_bytes());
        }
    }

    /// Iterate over valid PMU records.
    pub fn iter_records(buf: &[u8]) -> impl Iterator<Item = TpuPmuDoubleEvent> + '_ {
        buf.chunks_exact(16)
            .take_while(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]) != TPUPMU_BUFGUARD)
            .map(|c| TpuPmuDoubleEvent {
                type_and_desc: u64::from_le_bytes([c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]]),
                end_time: u32::from_le_bytes([c[8], c[9], c[10], c[11]]),
                start_time: u32::from_le_bytes([c[12], c[13], c[14], c[15]]),
            })
    }

    fn read32(base: NonNull<u8>, offset: u32) -> u32 {
        unsafe { core::ptr::read_volatile(base.as_ptr().add(offset as usize) as *const u32) }
    }

    fn write32(base: NonNull<u8>, offset: u32, value: u32) {
        unsafe { core::ptr::write_volatile(base.as_ptr().add(offset as usize) as *mut u32, value) }
    }
}

/// PMU result summary.
#[derive(Debug, Clone, Copy, Default)]
pub struct PmuSummary {
    pub tdma_load_count: u32,
    pub tdma_store_count: u32,
    pub tdma_move_count: u32,
    pub tiu_count: u32,
    pub tdma_total_cycles: u64,
    pub tiu_total_cycles: u64,
}

impl PmuSummary {
    /// Create summary from PMU buffer.
    pub fn from_buffer(buf: &[u8]) -> Self {
        let mut s = Self::default();
        for r in TpuPmu::iter_records(buf) {
            let d = r.duration() as u64;
            match r.pmu_type() {
                Some(TpuPmuType::TdmaLoad) => { s.tdma_load_count += 1; s.tdma_total_cycles += d; }
                Some(TpuPmuType::TdmaStore) => { s.tdma_store_count += 1; s.tdma_total_cycles += d; }
                Some(TpuPmuType::TdmaMove) => { s.tdma_move_count += 1; s.tdma_total_cycles += d; }
                Some(TpuPmuType::Tiu) => { s.tiu_count += 1; s.tiu_total_cycles += d; }
                None => {}
            }
        }
        s
    }

    pub fn total_ops(&self) -> u32 {
        self.tdma_load_count + self.tdma_store_count + self.tdma_move_count + self.tiu_count
    }
}