use core::{hint::spin_loop, ptr::NonNull};
use log::debug;
use crate::{
CpuSyncDesc, DmaHeader, KernelFns, PhysAddr, TpuConfig, TpuError,
platform::{TpuRegBackup, TdmaReg, TIMEOUT_US, TdmaSyncStatus, TiuCtrlStatus},
pmu::{TpuPmu, TpuPmuEvent},
registers::{self, TiuLaneNum},
TpuTdmaPioInfo,
};
pub struct TpuDevice<K: KernelFns> {
tdma_base: NonNull<u8>,
tiu_base: NonNull<u8>,
config: TpuConfig,
kfns: K,
last_tdma_int_mask: u32,
last_tdma_sync_status: u32,
reg_backup: TpuRegBackup,
sync_backup: bool,
suspend_handle_int: bool,
}
impl<K: KernelFns> TpuDevice<K> {
pub const fn new(
tdma_base: NonNull<u8>,
tiu_base: NonNull<u8>,
config: TpuConfig,
kfns: K,
) -> Self {
Self {
tdma_base,
tiu_base,
config,
kfns,
last_tdma_int_mask: 0,
last_tdma_sync_status: 0,
reg_backup: TpuRegBackup {
tdma_int_mask: 0,
tdma_sync_status: 0,
tiu_ctrl: 0,
tdma_arraybase: [0; 10],
tdma_des_base: 0,
tdma_dbg_mode: 0,
tdma_dcm_disable: 0,
tdma_ctrl: 0,
},
sync_backup: false,
suspend_handle_int: false,
}
}
pub fn initialize(&mut self) -> Result<(), TpuError> {
debug!("TPU device initializing");
Ok(())
}
pub fn probe_setting(&mut self) {
self.sync_backup = false;
self.suspend_handle_int = false;
}
pub fn clear_interrupt(&mut self) -> u32 {
let reg_value = self.read32(self.tdma_base, registers::TDMA_INT_MASK);
self.last_tdma_int_mask = reg_value;
self.last_tdma_sync_status = self.read32(self.tdma_base, registers::TDMA_SYNC_STATUS);
let int_status = (reg_value >> 16) & !registers::TDMA_MASK_INIT;
self.write32(self.tdma_base, registers::TDMA_INT_MASK, 0xFFFF_0000);
self.sync_backup = true;
debug!("clear_interrupt: status=0x{:x}", int_status);
int_status
}
pub fn irq_handle(&mut self) -> u32 { self.clear_interrupt() }
pub fn program_tdma_descriptor(&mut self, desc_offset: u32, num_tdma: u32) {
debug!("program_tdma_descriptor: offset=0x{:x}, num={}", desc_offset, num_tdma);
self.write32(self.tdma_base, registers::TDMA_DES_BASE, desc_offset);
self.write32(self.tdma_base, registers::TDMA_DEBUG_MODE, 0);
self.write32(self.tdma_base, registers::TDMA_DCM_DISABLE, 0);
self.write32(self.tdma_base, registers::TDMA_INT_MASK, registers::TDMA_MASK_INIT);
let ctrl = (1 << registers::TDMA_CTRL_ENABLE_BIT)
| (1 << registers::TDMA_CTRL_MODESEL_BIT)
| (num_tdma << registers::TDMA_CTRL_DESNUM_BIT)
| (0x3 << registers::TDMA_CTRL_BURSTLEN_BIT)
| (1 << registers::TDMA_CTRL_FORCE_1ARRAY)
| (1 << registers::TDMA_CTRL_INTRA_CMD_OFF)
| (1 << registers::TDMA_CTRL_64BYTE_ALIGN_EN);
self.write32(self.tdma_base, registers::TDMA_CTRL, ctrl);
}
pub fn program_tiu_descriptor(&mut self, desc_offset: u64, lane: TiuLaneNum) {
debug!("program_tiu_descriptor: offset=0x{:x}, lane={:?}", desc_offset, lane);
let desc_addr = desc_offset << registers::BDC_ENGINE_CMD_ALIGNED_BIT;
self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0x4, desc_addr as u32);
let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0x8);
let upper = ((desc_addr >> 32) as u32) & 0xFF;
self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0x8, (reg_val & 0xFFFF_FF00) | upper);
let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0xC);
self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0xC, reg_val | (1 << 11));
let mut reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
reg_val &= !0x3FC0_0000;
reg_val |= (lane as u32) << registers::BD_LANE_NUM;
self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR, reg_val);
let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR,
reg_val | (1 << registers::BD_DES_ADDR_VLD) | (1 << registers::BD_INTR_ENABLE) | (1 << registers::BD_TPU_EN));
}
pub fn resync_cmd_id(&mut self) {
debug!("resync_cmd_id");
let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0xC);
self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0xC, reg_val | 0x1);
self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0xC, reg_val & !0x1);
let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR,
reg_val & !((1 << registers::BD_TPU_EN) | (1 << registers::BD_DES_ADDR_VLD)));
let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR, reg_val | (1 << 1));
self.write32(self.tdma_base, registers::TDMA_CTRL, 1 << registers::TDMA_CTRL_RESET_SYNCID_BIT);
self.write32(self.tdma_base, registers::TDMA_CTRL, 0);
self.write32(self.tdma_base, registers::TDMA_INT_MASK, 0xFFFF_0000);
}
pub fn suspend(&mut self) -> Result<(), TpuError> {
debug!("TPU suspending");
self.reg_backup.tdma_int_mask = self.read32(self.tdma_base, registers::TDMA_INT_MASK);
self.reg_backup.tdma_sync_status = self.read32(self.tdma_base, registers::TDMA_SYNC_STATUS);
self.reg_backup.tiu_ctrl = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
const ARRAYBASE_OFFSETS: [u32; 10] = [
registers::TDMA_ARRAYBASE0_L, registers::TDMA_ARRAYBASE1_L, registers::TDMA_ARRAYBASE2_L,
registers::TDMA_ARRAYBASE3_L, registers::TDMA_ARRAYBASE4_L, registers::TDMA_ARRAYBASE5_L,
registers::TDMA_ARRAYBASE6_L, registers::TDMA_ARRAYBASE7_L,
registers::TDMA_ARRAYBASE0_H, registers::TDMA_ARRAYBASE1_H,
];
for (i, &off) in ARRAYBASE_OFFSETS.iter().enumerate() {
self.reg_backup.tdma_arraybase[i] = self.read32(self.tdma_base, off);
}
self.reg_backup.tdma_des_base = self.read32(self.tdma_base, registers::TDMA_DES_BASE);
self.reg_backup.tdma_dbg_mode = self.read32(self.tdma_base, registers::TDMA_DEBUG_MODE);
self.reg_backup.tdma_dcm_disable = self.read32(self.tdma_base, registers::TDMA_DCM_DISABLE);
self.reg_backup.tdma_ctrl = self.read32(self.tdma_base, registers::TDMA_CTRL);
if (self.reg_backup.tdma_ctrl & (1 << registers::TDMA_CTRL_ENABLE_BIT)) != 0 && !self.sync_backup {
let start = self.kfns.now_us();
while self.kfns.now_us().saturating_sub(start) < TIMEOUT_US {
let int_status = (self.read32(self.tdma_base, registers::TDMA_INT_MASK) >> 16) & !registers::TDMA_MASK_INIT;
if int_status != 0 { break; }
spin_loop();
}
self.sync_backup = true;
self.suspend_handle_int = true;
}
self.kfns.disable_clocks();
debug!("TPU suspended");
Ok(())
}
pub fn resume(&mut self) -> Result<(), TpuError> {
debug!("TPU resuming");
self.kfns.enable_clocks();
self.suspend_handle_int = false;
self.write32(self.tdma_base, registers::TDMA_INT_MASK, self.reg_backup.tdma_int_mask);
self.write32(self.tdma_base, registers::TDMA_SYNC_STATUS, self.reg_backup.tdma_sync_status);
self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR, self.reg_backup.tiu_ctrl);
const ARRAYBASE_OFFSETS: [u32; 10] = [
registers::TDMA_ARRAYBASE0_L, registers::TDMA_ARRAYBASE1_L, registers::TDMA_ARRAYBASE2_L,
registers::TDMA_ARRAYBASE3_L, registers::TDMA_ARRAYBASE4_L, registers::TDMA_ARRAYBASE5_L,
registers::TDMA_ARRAYBASE6_L, registers::TDMA_ARRAYBASE7_L,
registers::TDMA_ARRAYBASE0_H, registers::TDMA_ARRAYBASE1_H,
];
for (i, &off) in ARRAYBASE_OFFSETS.iter().enumerate() {
self.write32(self.tdma_base, off, self.reg_backup.tdma_arraybase[i]);
}
self.write32(self.tdma_base, registers::TDMA_DES_BASE, self.reg_backup.tdma_des_base);
self.write32(self.tdma_base, registers::TDMA_DEBUG_MODE, self.reg_backup.tdma_dbg_mode);
self.write32(self.tdma_base, registers::TDMA_DCM_DISABLE, self.reg_backup.tdma_dcm_disable);
debug!("TPU resumed");
Ok(())
}
pub fn open(&mut self) -> Result<(), TpuError> { Ok(()) }
pub fn reset(&mut self) -> Result<(), TpuError> {
debug!("TPU reset");
self.kfns.reset();
Ok(())
}
pub fn platform_init(&mut self) -> Result<(), TpuError> {
debug!("TPU platform_init");
self.kfns.enable_clocks();
self.reset()
}
pub fn platform_deinit(&mut self) {
debug!("TPU platform_deinit");
self.kfns.disable_clocks();
}
pub fn pmu_enable(&mut self, enable: bool, event: TpuPmuEvent) -> Result<(), TpuError> {
debug!("pmu_enable: enable={}, event={:?}", enable, event);
if enable && (self.config.pmu_buf_size == 0 || self.config.pmu_buf_paddr == 0) {
return Err(TpuError::InvalidParameter);
}
TpuPmu::enable(self.tdma_base, enable, event, self.config.pmu_buf_paddr, self.config.pmu_buf_size);
Ok(())
}
pub fn run_dmabuf(&mut self, dmabuf_paddr: PhysAddr, header: &DmaHeader, descs: &[CpuSyncDesc]) -> Result<(), TpuError> {
debug!("run_dmabuf: paddr=0x{:x}, desc_count={}", dmabuf_paddr, header.cpu_desc_count);
if !header.is_valid() {
debug!("run_dmabuf: invalid header");
return Err(TpuError::InvalidParameter);
}
if descs.len() < header.cpu_desc_count as usize {
debug!("run_dmabuf: desc count mismatch");
return Err(TpuError::InvalidParameter);
}
self.sync_backup = false;
self.suspend_handle_int = false;
self.set_array_base(header);
let pmu_enabled = header.pmubuf_offset != 0 && header.pmubuf_size != 0;
if pmu_enabled {
let pmu_paddr = dmabuf_paddr + header.pmubuf_offset as u64;
self.pmu_enable_raw(true, TpuPmuEvent::TdmaBw, pmu_paddr, header.pmubuf_size)?;
}
for (i, desc) in descs.iter().take(header.cpu_desc_count as usize).enumerate() {
let bd_num = desc.bd_count();
let tdma_num = desc.tdma_count();
debug!("run_dmabuf: desc[{}] bd={}, tdma={}", i, bd_num, tdma_num);
self.resync_cmd_id();
if bd_num > 0 {
self.program_tiu_descriptor(desc.offset_bd as u64, TiuLaneNum::Lane8);
}
if tdma_num > 0 {
self.program_tdma_descriptor(desc.offset_gdma, tdma_num);
self.wait_tdma_done()?;
}
if !self.suspend_handle_int {
self.poll_cmdbuf_done(bd_num, tdma_num)?;
}
}
if pmu_enabled {
self.pmu_enable_raw(false, TpuPmuEvent::TdmaBw, 0, 0)?;
if !self.suspend_handle_int {
self.wait_tdma_done()?;
}
}
debug!("run_dmabuf: completed");
Ok(())
}
pub fn run_pio(&mut self, info: &TpuTdmaPioInfo) -> Result<(), TpuError> {
debug!("run_pio: src=0x{:x}, dst=0x{:x}", info.paddr_src, info.paddr_dst);
let mut reg = TdmaReg::default();
reg.vld = 1;
reg.trans_dir = 2;
reg.src_base_addr_low = info.paddr_src as u32;
reg.src_base_addr_high = (info.paddr_src >> 32) as u32;
reg.dst_base_addr_low = info.paddr_dst as u32;
reg.dst_base_addr_high = (info.paddr_dst >> 32) as u32;
reg.eod = 1;
reg.intp_en = 1;
if info.enable_2d != 0 {
reg.trans_fmt = 0;
reg.src_n = 1;
reg.src_c = 1;
reg.src_h = info.h;
reg.src_w = info.w_bytes;
reg.dst_c = 1;
reg.dst_h = info.h;
reg.dst_w = info.w_bytes;
reg.src_n_stride = info.stride_bytes_src.saturating_mul(info.h);
reg.src_h_stride = info.stride_bytes_src;
reg.dst_n_stride = info.stride_bytes_dst.saturating_mul(info.h);
reg.dst_h_stride = info.stride_bytes_dst;
} else {
reg.trans_fmt = 1;
reg.src_n_stride = info.leng_bytes;
}
self.set_tdma_pio(®.emit());
self.wait_tdma_done()
}
pub fn tdma_base(&self) -> NonNull<u8> { self.tdma_base }
pub fn tiu_base(&self) -> NonNull<u8> { self.tiu_base }
pub fn get_tdma_sync_status(&self) -> TdmaSyncStatus {
TdmaSyncStatus::from_raw(self.read32(self.tdma_base, registers::TDMA_SYNC_STATUS))
}
pub fn get_tiu_ctrl_status(&self) -> TiuCtrlStatus {
TiuCtrlStatus::from_raw(self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR))
}
pub fn is_idle(&self) -> bool {
let tiu = self.get_tiu_ctrl_status();
self.get_tdma_sync_status().is_all_idle() && (tiu.interrupt() || !tiu.enabled())
}
pub fn wait_idle(&mut self) -> Result<(), TpuError> {
let start = self.kfns.now_us();
while !self.is_idle() {
if self.kfns.now_us().saturating_sub(start) > TIMEOUT_US {
debug!("wait_idle: timeout");
return Err(TpuError::Timeout);
}
spin_loop();
}
Ok(())
}
pub fn emergency_stop(&mut self) {
debug!("emergency_stop");
self.write32(self.tdma_base, registers::TDMA_CTRL, 0);
let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR,
reg_val & !((1 << registers::BD_TPU_EN) | (1 << registers::BD_DES_ADDR_VLD)));
self.write32(self.tdma_base, registers::TDMA_INT_MASK, 0xFFFF_0000);
}
fn set_array_base(&mut self, header: &DmaHeader) {
let bases = header.arraybase_l();
const OFFSETS: [u32; 8] = [
registers::TDMA_ARRAYBASE0_L, registers::TDMA_ARRAYBASE1_L, registers::TDMA_ARRAYBASE2_L,
registers::TDMA_ARRAYBASE3_L, registers::TDMA_ARRAYBASE4_L, registers::TDMA_ARRAYBASE5_L,
registers::TDMA_ARRAYBASE6_L, registers::TDMA_ARRAYBASE7_L,
];
for (off, val) in OFFSETS.iter().zip(bases.iter()) {
self.write32(self.tdma_base, *off, *val);
}
self.write32(self.tdma_base, registers::TDMA_ARRAYBASE0_H, 0);
self.write32(self.tdma_base, registers::TDMA_ARRAYBASE1_H, 0);
}
fn wait_tdma_done(&mut self) -> Result<(), TpuError> {
let start = self.kfns.now_us();
loop {
let int_status = (self.read32(self.tdma_base, registers::TDMA_INT_MASK) >> 16) & !registers::TDMA_MASK_INIT;
if int_status != 0 {
self.clear_interrupt();
return Ok(());
}
if self.kfns.now_us().saturating_sub(start) > TIMEOUT_US {
debug!("wait_tdma_done: timeout");
return Err(TpuError::Timeout);
}
spin_loop();
}
}
fn poll_cmdbuf_done(&mut self, bd_cmd_id: u32, tdma_cmd_id: u32) -> Result<(), TpuError> {
if tdma_cmd_id > 0 && (self.last_tdma_sync_status >> 16) < tdma_cmd_id {
debug!("poll_cmdbuf_done: tdma sync id mismatch");
return Err(TpuError::DeviceError);
}
if bd_cmd_id > 0 {
let start = self.kfns.now_us();
loop {
let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
let done_id = (reg_val >> 6) & 0xFFFF;
let intr = (reg_val & 2) != 0;
if done_id >= bd_cmd_id && intr {
self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR, reg_val | 2);
break;
}
if self.kfns.now_us().saturating_sub(start) > TIMEOUT_US {
debug!("poll_cmdbuf_done: tiu timeout");
return Err(TpuError::Timeout);
}
spin_loop();
}
}
Ok(())
}
fn set_tdma_pio(&mut self, pio_array: &[u32; 16]) {
self.resync_cmd_id();
for (i, value) in pio_array.iter().enumerate() {
self.write32(self.tdma_base, registers::TDMA_CMD_ACCP0 + (i as u32 * 4), *value);
}
self.write32(self.tdma_base, registers::TDMA_DEBUG_MODE, 0);
self.write32(self.tdma_base, registers::TDMA_DCM_DISABLE, 0);
self.write32(self.tdma_base, registers::TDMA_INT_MASK, registers::TDMA_MASK_INIT);
let ctrl = (1 << registers::TDMA_CTRL_ENABLE_BIT)
| (1 << registers::TDMA_CTRL_DESNUM_BIT)
| (0x3 << registers::TDMA_CTRL_BURSTLEN_BIT)
| (1 << registers::TDMA_CTRL_FORCE_1ARRAY)
| (1 << registers::TDMA_CTRL_INTRA_CMD_OFF)
| (1 << registers::TDMA_CTRL_64BYTE_ALIGN_EN);
self.write32(self.tdma_base, registers::TDMA_CTRL, ctrl);
}
fn pmu_enable_raw(&mut self, enable: bool, event: TpuPmuEvent, buf_paddr: PhysAddr, buf_size: u32) -> Result<(), TpuError> {
TpuPmu::enable(self.tdma_base, enable, event, buf_paddr, buf_size);
Ok(())
}
fn read32(&self, base: NonNull<u8>, offset: u32) -> u32 {
unsafe { core::ptr::read_volatile(base.as_ptr().add(offset as usize) as *const u32) }
}
fn write32(&self, base: NonNull<u8>, offset: u32, value: u32) {
unsafe { core::ptr::write_volatile(base.as_ptr().add(offset as usize) as *mut u32, value) }
}
}