Skip to main content

tpu_sg2002/
device.rs

1//! TPU device implementation for SG2002.
2
3use core::{hint::spin_loop, ptr::NonNull};
4use log::debug;
5
6use crate::{
7    CpuSyncDesc, DmaHeader, KernelFns, PhysAddr, TpuConfig, TpuError,
8    platform::{TpuRegBackup, TdmaReg, TIMEOUT_US, TdmaSyncStatus, TiuCtrlStatus},
9    pmu::{TpuPmu, TpuPmuEvent},
10    registers::{self, TiuLaneNum},
11    TpuTdmaPioInfo,
12};
13
14/// TPU device handle.
15pub struct TpuDevice<K: KernelFns> {
16    tdma_base: NonNull<u8>,
17    tiu_base: NonNull<u8>,
18    config: TpuConfig,
19    kfns: K,
20    last_tdma_int_mask: u32,
21    last_tdma_sync_status: u32,
22    reg_backup: TpuRegBackup,
23    sync_backup: bool,
24    suspend_handle_int: bool,
25}
26
27impl<K: KernelFns> TpuDevice<K> {
28    /// Create a new TPU device handle.
29    pub const fn new(
30        tdma_base: NonNull<u8>,
31        tiu_base: NonNull<u8>,
32        config: TpuConfig,
33        kfns: K,
34    ) -> Self {
35        Self {
36            tdma_base,
37            tiu_base,
38            config,
39            kfns,
40            last_tdma_int_mask: 0,
41            last_tdma_sync_status: 0,
42            reg_backup: TpuRegBackup {
43                tdma_int_mask: 0,
44                tdma_sync_status: 0,
45                tiu_ctrl: 0,
46                tdma_arraybase: [0; 10],
47                tdma_des_base: 0,
48                tdma_dbg_mode: 0,
49                tdma_dcm_disable: 0,
50                tdma_ctrl: 0,
51            },
52            sync_backup: false,
53            suspend_handle_int: false,
54        }
55    }
56
57    pub fn initialize(&mut self) -> Result<(), TpuError> {
58        debug!("TPU device initializing");
59        Ok(())
60    }
61
62    pub fn probe_setting(&mut self) {
63        self.sync_backup = false;
64        self.suspend_handle_int = false;
65    }
66
67    /// Clear TDMA interrupt and return status bits.
68    pub fn clear_interrupt(&mut self) -> u32 {
69        let reg_value = self.read32(self.tdma_base, registers::TDMA_INT_MASK);
70        self.last_tdma_int_mask = reg_value;
71        self.last_tdma_sync_status = self.read32(self.tdma_base, registers::TDMA_SYNC_STATUS);
72        let int_status = (reg_value >> 16) & !registers::TDMA_MASK_INIT;
73        self.write32(self.tdma_base, registers::TDMA_INT_MASK, 0xFFFF_0000);
74        self.sync_backup = true;
75        debug!("clear_interrupt: status=0x{:x}", int_status);
76        int_status
77    }
78
79    pub fn irq_handle(&mut self) -> u32 { self.clear_interrupt() }
80
81    /// Program TDMA descriptor base and fire.
82    pub fn program_tdma_descriptor(&mut self, desc_offset: u32, num_tdma: u32) {
83        debug!("program_tdma_descriptor: offset=0x{:x}, num={}", desc_offset, num_tdma);
84        self.write32(self.tdma_base, registers::TDMA_DES_BASE, desc_offset);
85        self.write32(self.tdma_base, registers::TDMA_DEBUG_MODE, 0);
86        self.write32(self.tdma_base, registers::TDMA_DCM_DISABLE, 0);
87        self.write32(self.tdma_base, registers::TDMA_INT_MASK, registers::TDMA_MASK_INIT);
88
89        let ctrl = (1 << registers::TDMA_CTRL_ENABLE_BIT)
90            | (1 << registers::TDMA_CTRL_MODESEL_BIT)
91            | (num_tdma << registers::TDMA_CTRL_DESNUM_BIT)
92            | (0x3 << registers::TDMA_CTRL_BURSTLEN_BIT)
93            | (1 << registers::TDMA_CTRL_FORCE_1ARRAY)
94            | (1 << registers::TDMA_CTRL_INTRA_CMD_OFF)
95            | (1 << registers::TDMA_CTRL_64BYTE_ALIGN_EN);
96        self.write32(self.tdma_base, registers::TDMA_CTRL, ctrl);
97    }
98
99    /// Program TIU descriptor base and fire.
100    pub fn program_tiu_descriptor(&mut self, desc_offset: u64, lane: TiuLaneNum) {
101        debug!("program_tiu_descriptor: offset=0x{:x}, lane={:?}", desc_offset, lane);
102        let desc_addr = desc_offset << registers::BDC_ENGINE_CMD_ALIGNED_BIT;
103
104        self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0x4, desc_addr as u32);
105
106        let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0x8);
107        let upper = ((desc_addr >> 32) as u32) & 0xFF;
108        self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0x8, (reg_val & 0xFFFF_FF00) | upper);
109
110        let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0xC);
111        self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0xC, reg_val | (1 << 11));
112
113        let mut reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
114        reg_val &= !0x3FC0_0000;
115        reg_val |= (lane as u32) << registers::BD_LANE_NUM;
116        self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR, reg_val);
117
118        let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
119        self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR,
120            reg_val | (1 << registers::BD_DES_ADDR_VLD) | (1 << registers::BD_INTR_ENABLE) | (1 << registers::BD_TPU_EN));
121    }
122
123    /// Reset TIU/TDMA sync IDs.
124    pub fn resync_cmd_id(&mut self) {
125        debug!("resync_cmd_id");
126        let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0xC);
127        self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0xC, reg_val | 0x1);
128        self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0xC, reg_val & !0x1);
129
130        let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
131        self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR,
132            reg_val & !((1 << registers::BD_TPU_EN) | (1 << registers::BD_DES_ADDR_VLD)));
133
134        let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
135        self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR, reg_val | (1 << 1));
136
137        self.write32(self.tdma_base, registers::TDMA_CTRL, 1 << registers::TDMA_CTRL_RESET_SYNCID_BIT);
138        self.write32(self.tdma_base, registers::TDMA_CTRL, 0);
139        self.write32(self.tdma_base, registers::TDMA_INT_MASK, 0xFFFF_0000);
140    }
141
142    /// Suspend TPU.
143    pub fn suspend(&mut self) -> Result<(), TpuError> {
144        debug!("TPU suspending");
145        self.reg_backup.tdma_int_mask = self.read32(self.tdma_base, registers::TDMA_INT_MASK);
146        self.reg_backup.tdma_sync_status = self.read32(self.tdma_base, registers::TDMA_SYNC_STATUS);
147        self.reg_backup.tiu_ctrl = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
148
149        const ARRAYBASE_OFFSETS: [u32; 10] = [
150            registers::TDMA_ARRAYBASE0_L, registers::TDMA_ARRAYBASE1_L, registers::TDMA_ARRAYBASE2_L,
151            registers::TDMA_ARRAYBASE3_L, registers::TDMA_ARRAYBASE4_L, registers::TDMA_ARRAYBASE5_L,
152            registers::TDMA_ARRAYBASE6_L, registers::TDMA_ARRAYBASE7_L,
153            registers::TDMA_ARRAYBASE0_H, registers::TDMA_ARRAYBASE1_H,
154        ];
155        for (i, &off) in ARRAYBASE_OFFSETS.iter().enumerate() {
156            self.reg_backup.tdma_arraybase[i] = self.read32(self.tdma_base, off);
157        }
158
159        self.reg_backup.tdma_des_base = self.read32(self.tdma_base, registers::TDMA_DES_BASE);
160        self.reg_backup.tdma_dbg_mode = self.read32(self.tdma_base, registers::TDMA_DEBUG_MODE);
161        self.reg_backup.tdma_dcm_disable = self.read32(self.tdma_base, registers::TDMA_DCM_DISABLE);
162        self.reg_backup.tdma_ctrl = self.read32(self.tdma_base, registers::TDMA_CTRL);
163
164        if (self.reg_backup.tdma_ctrl & (1 << registers::TDMA_CTRL_ENABLE_BIT)) != 0 && !self.sync_backup {
165            let start = self.kfns.now_us();
166            while self.kfns.now_us().saturating_sub(start) < TIMEOUT_US {
167                let int_status = (self.read32(self.tdma_base, registers::TDMA_INT_MASK) >> 16) & !registers::TDMA_MASK_INIT;
168                if int_status != 0 { break; }
169                spin_loop();
170            }
171            self.sync_backup = true;
172            self.suspend_handle_int = true;
173        }
174
175        self.kfns.disable_clocks();
176        debug!("TPU suspended");
177        Ok(())
178    }
179
180    /// Resume TPU.
181    pub fn resume(&mut self) -> Result<(), TpuError> {
182        debug!("TPU resuming");
183        self.kfns.enable_clocks();
184        self.suspend_handle_int = false;
185
186        self.write32(self.tdma_base, registers::TDMA_INT_MASK, self.reg_backup.tdma_int_mask);
187        self.write32(self.tdma_base, registers::TDMA_SYNC_STATUS, self.reg_backup.tdma_sync_status);
188        self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR, self.reg_backup.tiu_ctrl);
189
190        const ARRAYBASE_OFFSETS: [u32; 10] = [
191            registers::TDMA_ARRAYBASE0_L, registers::TDMA_ARRAYBASE1_L, registers::TDMA_ARRAYBASE2_L,
192            registers::TDMA_ARRAYBASE3_L, registers::TDMA_ARRAYBASE4_L, registers::TDMA_ARRAYBASE5_L,
193            registers::TDMA_ARRAYBASE6_L, registers::TDMA_ARRAYBASE7_L,
194            registers::TDMA_ARRAYBASE0_H, registers::TDMA_ARRAYBASE1_H,
195        ];
196        for (i, &off) in ARRAYBASE_OFFSETS.iter().enumerate() {
197            self.write32(self.tdma_base, off, self.reg_backup.tdma_arraybase[i]);
198        }
199
200        self.write32(self.tdma_base, registers::TDMA_DES_BASE, self.reg_backup.tdma_des_base);
201        self.write32(self.tdma_base, registers::TDMA_DEBUG_MODE, self.reg_backup.tdma_dbg_mode);
202        self.write32(self.tdma_base, registers::TDMA_DCM_DISABLE, self.reg_backup.tdma_dcm_disable);
203        debug!("TPU resumed");
204        Ok(())
205    }
206
207    pub fn open(&mut self) -> Result<(), TpuError> { Ok(()) }
208
209    pub fn reset(&mut self) -> Result<(), TpuError> {
210        debug!("TPU reset");
211        self.kfns.reset();
212        Ok(())
213    }
214
215    pub fn platform_init(&mut self) -> Result<(), TpuError> {
216        debug!("TPU platform_init");
217        self.kfns.enable_clocks();
218        self.reset()
219    }
220
221    pub fn platform_deinit(&mut self) {
222        debug!("TPU platform_deinit");
223        self.kfns.disable_clocks();
224    }
225
226    pub fn pmu_enable(&mut self, enable: bool, event: TpuPmuEvent) -> Result<(), TpuError> {
227        debug!("pmu_enable: enable={}, event={:?}", enable, event);
228        if enable && (self.config.pmu_buf_size == 0 || self.config.pmu_buf_paddr == 0) {
229            return Err(TpuError::InvalidParameter);
230        }
231        TpuPmu::enable(self.tdma_base, enable, event, self.config.pmu_buf_paddr, self.config.pmu_buf_size);
232        Ok(())
233    }
234
235    /// Run a DMA buffer.
236    pub fn run_dmabuf(&mut self, dmabuf_paddr: PhysAddr, header: &DmaHeader, descs: &[CpuSyncDesc]) -> Result<(), TpuError> {
237        debug!("run_dmabuf: paddr=0x{:x}, desc_count={}", dmabuf_paddr, header.cpu_desc_count);
238        
239        if !header.is_valid() {
240            debug!("run_dmabuf: invalid header");
241            return Err(TpuError::InvalidParameter);
242        }
243        if descs.len() < header.cpu_desc_count as usize {
244            debug!("run_dmabuf: desc count mismatch");
245            return Err(TpuError::InvalidParameter);
246        }
247
248        self.sync_backup = false;
249        self.suspend_handle_int = false;
250        self.set_array_base(header);
251
252        let pmu_enabled = header.pmubuf_offset != 0 && header.pmubuf_size != 0;
253        if pmu_enabled {
254            let pmu_paddr = dmabuf_paddr + header.pmubuf_offset as u64;
255            self.pmu_enable_raw(true, TpuPmuEvent::TdmaBw, pmu_paddr, header.pmubuf_size)?;
256        }
257
258        for (i, desc) in descs.iter().take(header.cpu_desc_count as usize).enumerate() {
259            let bd_num = desc.bd_count();
260            let tdma_num = desc.tdma_count();
261            debug!("run_dmabuf: desc[{}] bd={}, tdma={}", i, bd_num, tdma_num);
262
263            self.resync_cmd_id();
264
265            if bd_num > 0 {
266                self.program_tiu_descriptor(desc.offset_bd as u64, TiuLaneNum::Lane8);
267            }
268
269            if tdma_num > 0 {
270                self.program_tdma_descriptor(desc.offset_gdma, tdma_num);
271                self.wait_tdma_done()?;
272            }
273
274            if !self.suspend_handle_int {
275                self.poll_cmdbuf_done(bd_num, tdma_num)?;
276            }
277        }
278
279        if pmu_enabled {
280            self.pmu_enable_raw(false, TpuPmuEvent::TdmaBw, 0, 0)?;
281            if !self.suspend_handle_int {
282                self.wait_tdma_done()?;
283            }
284        }
285
286        debug!("run_dmabuf: completed");
287        Ok(())
288    }
289
290    /// Run a TDMA PIO transfer.
291    pub fn run_pio(&mut self, info: &TpuTdmaPioInfo) -> Result<(), TpuError> {
292        debug!("run_pio: src=0x{:x}, dst=0x{:x}", info.paddr_src, info.paddr_dst);
293        
294        let mut reg = TdmaReg::default();
295        reg.vld = 1;
296        reg.trans_dir = 2;
297        reg.src_base_addr_low = info.paddr_src as u32;
298        reg.src_base_addr_high = (info.paddr_src >> 32) as u32;
299        reg.dst_base_addr_low = info.paddr_dst as u32;
300        reg.dst_base_addr_high = (info.paddr_dst >> 32) as u32;
301        reg.eod = 1;
302        reg.intp_en = 1;
303
304        if info.enable_2d != 0 {
305            reg.trans_fmt = 0;
306            reg.src_n = 1;
307            reg.src_c = 1;
308            reg.src_h = info.h;
309            reg.src_w = info.w_bytes;
310            reg.dst_c = 1;
311            reg.dst_h = info.h;
312            reg.dst_w = info.w_bytes;
313            reg.src_n_stride = info.stride_bytes_src.saturating_mul(info.h);
314            reg.src_h_stride = info.stride_bytes_src;
315            reg.dst_n_stride = info.stride_bytes_dst.saturating_mul(info.h);
316            reg.dst_h_stride = info.stride_bytes_dst;
317        } else {
318            reg.trans_fmt = 1;
319            reg.src_n_stride = info.leng_bytes;
320        }
321
322        self.set_tdma_pio(&reg.emit());
323        self.wait_tdma_done()
324    }
325
326    pub fn tdma_base(&self) -> NonNull<u8> { self.tdma_base }
327    pub fn tiu_base(&self) -> NonNull<u8> { self.tiu_base }
328
329    pub fn get_tdma_sync_status(&self) -> TdmaSyncStatus {
330        TdmaSyncStatus::from_raw(self.read32(self.tdma_base, registers::TDMA_SYNC_STATUS))
331    }
332
333    pub fn get_tiu_ctrl_status(&self) -> TiuCtrlStatus {
334        TiuCtrlStatus::from_raw(self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR))
335    }
336
337    pub fn is_idle(&self) -> bool {
338        let tiu = self.get_tiu_ctrl_status();
339        self.get_tdma_sync_status().is_all_idle() && (tiu.interrupt() || !tiu.enabled())
340    }
341
342    pub fn wait_idle(&mut self) -> Result<(), TpuError> {
343        let start = self.kfns.now_us();
344        while !self.is_idle() {
345            if self.kfns.now_us().saturating_sub(start) > TIMEOUT_US {
346                debug!("wait_idle: timeout");
347                return Err(TpuError::Timeout);
348            }
349            spin_loop();
350        }
351        Ok(())
352    }
353
354    pub fn emergency_stop(&mut self) {
355        debug!("emergency_stop");
356        self.write32(self.tdma_base, registers::TDMA_CTRL, 0);
357        let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
358        self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR,
359            reg_val & !((1 << registers::BD_TPU_EN) | (1 << registers::BD_DES_ADDR_VLD)));
360        self.write32(self.tdma_base, registers::TDMA_INT_MASK, 0xFFFF_0000);
361    }
362
363    // Private helpers
364
365    fn set_array_base(&mut self, header: &DmaHeader) {
366        let bases = header.arraybase_l();
367        const OFFSETS: [u32; 8] = [
368            registers::TDMA_ARRAYBASE0_L, registers::TDMA_ARRAYBASE1_L, registers::TDMA_ARRAYBASE2_L,
369            registers::TDMA_ARRAYBASE3_L, registers::TDMA_ARRAYBASE4_L, registers::TDMA_ARRAYBASE5_L,
370            registers::TDMA_ARRAYBASE6_L, registers::TDMA_ARRAYBASE7_L,
371        ];
372        for (off, val) in OFFSETS.iter().zip(bases.iter()) {
373            self.write32(self.tdma_base, *off, *val);
374        }
375        self.write32(self.tdma_base, registers::TDMA_ARRAYBASE0_H, 0);
376        self.write32(self.tdma_base, registers::TDMA_ARRAYBASE1_H, 0);
377    }
378
379    fn wait_tdma_done(&mut self) -> Result<(), TpuError> {
380        let start = self.kfns.now_us();
381        loop {
382            let int_status = (self.read32(self.tdma_base, registers::TDMA_INT_MASK) >> 16) & !registers::TDMA_MASK_INIT;
383            if int_status != 0 {
384                self.clear_interrupt();
385                return Ok(());
386            }
387            if self.kfns.now_us().saturating_sub(start) > TIMEOUT_US {
388                debug!("wait_tdma_done: timeout");
389                return Err(TpuError::Timeout);
390            }
391            spin_loop();
392        }
393    }
394
395    fn poll_cmdbuf_done(&mut self, bd_cmd_id: u32, tdma_cmd_id: u32) -> Result<(), TpuError> {
396        if tdma_cmd_id > 0 && (self.last_tdma_sync_status >> 16) < tdma_cmd_id {
397            debug!("poll_cmdbuf_done: tdma sync id mismatch");
398            return Err(TpuError::DeviceError);
399        }
400
401        if bd_cmd_id > 0 {
402            let start = self.kfns.now_us();
403            loop {
404                let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
405                let done_id = (reg_val >> 6) & 0xFFFF;
406                let intr = (reg_val & 2) != 0;
407                if done_id >= bd_cmd_id && intr {
408                    self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR, reg_val | 2);
409                    break;
410                }
411                if self.kfns.now_us().saturating_sub(start) > TIMEOUT_US {
412                    debug!("poll_cmdbuf_done: tiu timeout");
413                    return Err(TpuError::Timeout);
414                }
415                spin_loop();
416            }
417        }
418        Ok(())
419    }
420
421    fn set_tdma_pio(&mut self, pio_array: &[u32; 16]) {
422        self.resync_cmd_id();
423        for (i, value) in pio_array.iter().enumerate() {
424            self.write32(self.tdma_base, registers::TDMA_CMD_ACCP0 + (i as u32 * 4), *value);
425        }
426        self.write32(self.tdma_base, registers::TDMA_DEBUG_MODE, 0);
427        self.write32(self.tdma_base, registers::TDMA_DCM_DISABLE, 0);
428        self.write32(self.tdma_base, registers::TDMA_INT_MASK, registers::TDMA_MASK_INIT);
429
430        let ctrl = (1 << registers::TDMA_CTRL_ENABLE_BIT)
431            | (1 << registers::TDMA_CTRL_DESNUM_BIT)
432            | (0x3 << registers::TDMA_CTRL_BURSTLEN_BIT)
433            | (1 << registers::TDMA_CTRL_FORCE_1ARRAY)
434            | (1 << registers::TDMA_CTRL_INTRA_CMD_OFF)
435            | (1 << registers::TDMA_CTRL_64BYTE_ALIGN_EN);
436        self.write32(self.tdma_base, registers::TDMA_CTRL, ctrl);
437    }
438
439    fn pmu_enable_raw(&mut self, enable: bool, event: TpuPmuEvent, buf_paddr: PhysAddr, buf_size: u32) -> Result<(), TpuError> {
440        TpuPmu::enable(self.tdma_base, enable, event, buf_paddr, buf_size);
441        Ok(())
442    }
443
444    fn read32(&self, base: NonNull<u8>, offset: u32) -> u32 {
445        unsafe { core::ptr::read_volatile(base.as_ptr().add(offset as usize) as *const u32) }
446    }
447
448    fn write32(&self, base: NonNull<u8>, offset: u32, value: u32) {
449        unsafe { core::ptr::write_volatile(base.as_ptr().add(offset as usize) as *mut u32, value) }
450    }
451}