use crate::{registers, PhysAddr};
use core::ptr::NonNull;
pub const TPUPMU_BUFGUARD: u32 = 0x1234_5678;
#[repr(u32)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum TpuPmuEvent {
BankConflict = 0x0,
StallCnt = 0x1,
#[default]
TdmaBw = 0x2,
TdmaWstrb = 0x3,
}
#[repr(u32)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TpuPmuType {
TdmaLoad = 1,
TdmaStore = 2,
TdmaMove = 3,
Tiu = 4,
}
impl TryFrom<u8> for TpuPmuType {
type Error = ();
fn try_from(value: u8) -> Result<Self, Self::Error> {
match value {
1 => Ok(TpuPmuType::TdmaLoad),
2 => Ok(TpuPmuType::TdmaStore),
3 => Ok(TpuPmuType::TdmaMove),
4 => Ok(TpuPmuType::Tiu),
_ => Err(()),
}
}
}
#[repr(C, packed)]
#[derive(Debug, Clone, Copy, Default)]
pub struct TpuPmuDoubleEvent {
pub type_and_desc: u64,
pub end_time: u32,
pub start_time: u32,
}
impl TpuPmuDoubleEvent {
pub const SIZE: usize = core::mem::size_of::<Self>();
pub fn event_type(&self) -> u8 {
(self.type_and_desc & 0xF) as u8
}
pub fn desc_id(&self) -> u16 {
((self.type_and_desc >> 4) & 0xFFFF) as u16
}
pub fn event_cnt0(&self) -> u32 {
((self.type_and_desc >> 20) & 0x3F_FFFF) as u32
}
pub fn event_cnt1(&self) -> u32 {
((self.type_and_desc >> 42) & 0x3F_FFFF) as u32
}
pub fn duration(&self) -> u32 {
self.end_time.wrapping_sub(self.start_time)
}
pub fn pmu_type(&self) -> Option<TpuPmuType> {
TpuPmuType::try_from(self.event_type()).ok()
}
pub fn is_guard(&self) -> bool {
(self.type_and_desc as u32) == TPUPMU_BUFGUARD
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct TpuPmuConfig {
pub enable: bool,
pub enable_tpu: bool,
pub enable_tdma: bool,
pub event: TpuPmuEvent,
pub buf_base_addr: u32,
pub buf_size: u32,
}
pub struct TpuPmu;
impl TpuPmu {
pub fn enable(
tdma_base: NonNull<u8>,
enable: bool,
event: TpuPmuEvent,
buf_paddr: PhysAddr,
buf_size: u32,
) {
if enable {
Self::write32(tdma_base, registers::TPUPMU_BUFBASE, (buf_paddr >> 4) as u32);
Self::write32(tdma_base, registers::TPUPMU_BUFSIZE, buf_size >> 4);
let ctrl = 0x1 | 0x8 | 0x10 | ((event as u32) << 5) | (0x3 << 8) | (1 << 10);
Self::write32(tdma_base, registers::TPUPMU_CTRL, ctrl);
} else {
let ctrl = Self::read32(tdma_base, registers::TPUPMU_CTRL);
Self::write32(tdma_base, registers::TPUPMU_CTRL, ctrl & !0x1);
}
}
pub fn reset_buffer(buf: &mut [u8]) {
for chunk in buf.chunks_exact_mut(16) {
chunk[0..4].copy_from_slice(&TPUPMU_BUFGUARD.to_le_bytes());
}
}
pub fn iter_records(buf: &[u8]) -> impl Iterator<Item = TpuPmuDoubleEvent> + '_ {
buf.chunks_exact(16)
.take_while(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]) != TPUPMU_BUFGUARD)
.map(|c| TpuPmuDoubleEvent {
type_and_desc: u64::from_le_bytes([c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]]),
end_time: u32::from_le_bytes([c[8], c[9], c[10], c[11]]),
start_time: u32::from_le_bytes([c[12], c[13], c[14], c[15]]),
})
}
fn read32(base: NonNull<u8>, offset: u32) -> u32 {
unsafe { core::ptr::read_volatile(base.as_ptr().add(offset as usize) as *const u32) }
}
fn write32(base: NonNull<u8>, offset: u32, value: u32) {
unsafe { core::ptr::write_volatile(base.as_ptr().add(offset as usize) as *mut u32, value) }
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct PmuSummary {
pub tdma_load_count: u32,
pub tdma_store_count: u32,
pub tdma_move_count: u32,
pub tiu_count: u32,
pub tdma_total_cycles: u64,
pub tiu_total_cycles: u64,
}
impl PmuSummary {
pub fn from_buffer(buf: &[u8]) -> Self {
let mut s = Self::default();
for r in TpuPmu::iter_records(buf) {
let d = r.duration() as u64;
match r.pmu_type() {
Some(TpuPmuType::TdmaLoad) => { s.tdma_load_count += 1; s.tdma_total_cycles += d; }
Some(TpuPmuType::TdmaStore) => { s.tdma_store_count += 1; s.tdma_total_cycles += d; }
Some(TpuPmuType::TdmaMove) => { s.tdma_move_count += 1; s.tdma_total_cycles += d; }
Some(TpuPmuType::Tiu) => { s.tiu_count += 1; s.tiu_total_cycles += d; }
None => {}
}
}
s
}
pub fn total_ops(&self) -> u32 {
self.tdma_load_count + self.tdma_store_count + self.tdma_move_count + self.tiu_count
}
}