1use core::{hint::spin_loop, ptr::NonNull};
4use log::debug;
5
6use crate::{
7 CpuSyncDesc, DmaHeader, KernelFns, PhysAddr, TpuConfig, TpuError,
8 platform::{TpuRegBackup, TdmaReg, TIMEOUT_US, TdmaSyncStatus, TiuCtrlStatus},
9 pmu::{TpuPmu, TpuPmuEvent},
10 registers::{self, TiuLaneNum},
11 TpuTdmaPioInfo,
12};
13
14pub struct TpuDevice<K: KernelFns> {
16 tdma_base: NonNull<u8>,
17 tiu_base: NonNull<u8>,
18 config: TpuConfig,
19 kfns: K,
20 last_tdma_int_mask: u32,
21 last_tdma_sync_status: u32,
22 reg_backup: TpuRegBackup,
23 sync_backup: bool,
24 suspend_handle_int: bool,
25}
26
27impl<K: KernelFns> TpuDevice<K> {
28 pub const fn new(
30 tdma_base: NonNull<u8>,
31 tiu_base: NonNull<u8>,
32 config: TpuConfig,
33 kfns: K,
34 ) -> Self {
35 Self {
36 tdma_base,
37 tiu_base,
38 config,
39 kfns,
40 last_tdma_int_mask: 0,
41 last_tdma_sync_status: 0,
42 reg_backup: TpuRegBackup {
43 tdma_int_mask: 0,
44 tdma_sync_status: 0,
45 tiu_ctrl: 0,
46 tdma_arraybase: [0; 10],
47 tdma_des_base: 0,
48 tdma_dbg_mode: 0,
49 tdma_dcm_disable: 0,
50 tdma_ctrl: 0,
51 },
52 sync_backup: false,
53 suspend_handle_int: false,
54 }
55 }
56
57 pub fn initialize(&mut self) -> Result<(), TpuError> {
58 debug!("TPU device initializing");
59 Ok(())
60 }
61
62 pub fn probe_setting(&mut self) {
63 self.sync_backup = false;
64 self.suspend_handle_int = false;
65 }
66
67 pub fn clear_interrupt(&mut self) -> u32 {
69 let reg_value = self.read32(self.tdma_base, registers::TDMA_INT_MASK);
70 self.last_tdma_int_mask = reg_value;
71 self.last_tdma_sync_status = self.read32(self.tdma_base, registers::TDMA_SYNC_STATUS);
72 let int_status = (reg_value >> 16) & !registers::TDMA_MASK_INIT;
73 self.write32(self.tdma_base, registers::TDMA_INT_MASK, 0xFFFF_0000);
74 self.sync_backup = true;
75 debug!("clear_interrupt: status=0x{:x}", int_status);
76 int_status
77 }
78
79 pub fn irq_handle(&mut self) -> u32 { self.clear_interrupt() }
80
81 pub fn program_tdma_descriptor(&mut self, desc_offset: u32, num_tdma: u32) {
83 debug!("program_tdma_descriptor: offset=0x{:x}, num={}", desc_offset, num_tdma);
84 self.write32(self.tdma_base, registers::TDMA_DES_BASE, desc_offset);
85 self.write32(self.tdma_base, registers::TDMA_DEBUG_MODE, 0);
86 self.write32(self.tdma_base, registers::TDMA_DCM_DISABLE, 0);
87 self.write32(self.tdma_base, registers::TDMA_INT_MASK, registers::TDMA_MASK_INIT);
88
89 let ctrl = (1 << registers::TDMA_CTRL_ENABLE_BIT)
90 | (1 << registers::TDMA_CTRL_MODESEL_BIT)
91 | (num_tdma << registers::TDMA_CTRL_DESNUM_BIT)
92 | (0x3 << registers::TDMA_CTRL_BURSTLEN_BIT)
93 | (1 << registers::TDMA_CTRL_FORCE_1ARRAY)
94 | (1 << registers::TDMA_CTRL_INTRA_CMD_OFF)
95 | (1 << registers::TDMA_CTRL_64BYTE_ALIGN_EN);
96 self.write32(self.tdma_base, registers::TDMA_CTRL, ctrl);
97 }
98
99 pub fn program_tiu_descriptor(&mut self, desc_offset: u64, lane: TiuLaneNum) {
101 debug!("program_tiu_descriptor: offset=0x{:x}, lane={:?}", desc_offset, lane);
102 let desc_addr = desc_offset << registers::BDC_ENGINE_CMD_ALIGNED_BIT;
103
104 self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0x4, desc_addr as u32);
105
106 let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0x8);
107 let upper = ((desc_addr >> 32) as u32) & 0xFF;
108 self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0x8, (reg_val & 0xFFFF_FF00) | upper);
109
110 let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0xC);
111 self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0xC, reg_val | (1 << 11));
112
113 let mut reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
114 reg_val &= !0x3FC0_0000;
115 reg_val |= (lane as u32) << registers::BD_LANE_NUM;
116 self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR, reg_val);
117
118 let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
119 self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR,
120 reg_val | (1 << registers::BD_DES_ADDR_VLD) | (1 << registers::BD_INTR_ENABLE) | (1 << registers::BD_TPU_EN));
121 }
122
123 pub fn resync_cmd_id(&mut self) {
125 debug!("resync_cmd_id");
126 let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0xC);
127 self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0xC, reg_val | 0x1);
128 self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR + 0xC, reg_val & !0x1);
129
130 let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
131 self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR,
132 reg_val & !((1 << registers::BD_TPU_EN) | (1 << registers::BD_DES_ADDR_VLD)));
133
134 let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
135 self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR, reg_val | (1 << 1));
136
137 self.write32(self.tdma_base, registers::TDMA_CTRL, 1 << registers::TDMA_CTRL_RESET_SYNCID_BIT);
138 self.write32(self.tdma_base, registers::TDMA_CTRL, 0);
139 self.write32(self.tdma_base, registers::TDMA_INT_MASK, 0xFFFF_0000);
140 }
141
142 pub fn suspend(&mut self) -> Result<(), TpuError> {
144 debug!("TPU suspending");
145 self.reg_backup.tdma_int_mask = self.read32(self.tdma_base, registers::TDMA_INT_MASK);
146 self.reg_backup.tdma_sync_status = self.read32(self.tdma_base, registers::TDMA_SYNC_STATUS);
147 self.reg_backup.tiu_ctrl = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
148
149 const ARRAYBASE_OFFSETS: [u32; 10] = [
150 registers::TDMA_ARRAYBASE0_L, registers::TDMA_ARRAYBASE1_L, registers::TDMA_ARRAYBASE2_L,
151 registers::TDMA_ARRAYBASE3_L, registers::TDMA_ARRAYBASE4_L, registers::TDMA_ARRAYBASE5_L,
152 registers::TDMA_ARRAYBASE6_L, registers::TDMA_ARRAYBASE7_L,
153 registers::TDMA_ARRAYBASE0_H, registers::TDMA_ARRAYBASE1_H,
154 ];
155 for (i, &off) in ARRAYBASE_OFFSETS.iter().enumerate() {
156 self.reg_backup.tdma_arraybase[i] = self.read32(self.tdma_base, off);
157 }
158
159 self.reg_backup.tdma_des_base = self.read32(self.tdma_base, registers::TDMA_DES_BASE);
160 self.reg_backup.tdma_dbg_mode = self.read32(self.tdma_base, registers::TDMA_DEBUG_MODE);
161 self.reg_backup.tdma_dcm_disable = self.read32(self.tdma_base, registers::TDMA_DCM_DISABLE);
162 self.reg_backup.tdma_ctrl = self.read32(self.tdma_base, registers::TDMA_CTRL);
163
164 if (self.reg_backup.tdma_ctrl & (1 << registers::TDMA_CTRL_ENABLE_BIT)) != 0 && !self.sync_backup {
165 let start = self.kfns.now_us();
166 while self.kfns.now_us().saturating_sub(start) < TIMEOUT_US {
167 let int_status = (self.read32(self.tdma_base, registers::TDMA_INT_MASK) >> 16) & !registers::TDMA_MASK_INIT;
168 if int_status != 0 { break; }
169 spin_loop();
170 }
171 self.sync_backup = true;
172 self.suspend_handle_int = true;
173 }
174
175 self.kfns.disable_clocks();
176 debug!("TPU suspended");
177 Ok(())
178 }
179
180 pub fn resume(&mut self) -> Result<(), TpuError> {
182 debug!("TPU resuming");
183 self.kfns.enable_clocks();
184 self.suspend_handle_int = false;
185
186 self.write32(self.tdma_base, registers::TDMA_INT_MASK, self.reg_backup.tdma_int_mask);
187 self.write32(self.tdma_base, registers::TDMA_SYNC_STATUS, self.reg_backup.tdma_sync_status);
188 self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR, self.reg_backup.tiu_ctrl);
189
190 const ARRAYBASE_OFFSETS: [u32; 10] = [
191 registers::TDMA_ARRAYBASE0_L, registers::TDMA_ARRAYBASE1_L, registers::TDMA_ARRAYBASE2_L,
192 registers::TDMA_ARRAYBASE3_L, registers::TDMA_ARRAYBASE4_L, registers::TDMA_ARRAYBASE5_L,
193 registers::TDMA_ARRAYBASE6_L, registers::TDMA_ARRAYBASE7_L,
194 registers::TDMA_ARRAYBASE0_H, registers::TDMA_ARRAYBASE1_H,
195 ];
196 for (i, &off) in ARRAYBASE_OFFSETS.iter().enumerate() {
197 self.write32(self.tdma_base, off, self.reg_backup.tdma_arraybase[i]);
198 }
199
200 self.write32(self.tdma_base, registers::TDMA_DES_BASE, self.reg_backup.tdma_des_base);
201 self.write32(self.tdma_base, registers::TDMA_DEBUG_MODE, self.reg_backup.tdma_dbg_mode);
202 self.write32(self.tdma_base, registers::TDMA_DCM_DISABLE, self.reg_backup.tdma_dcm_disable);
203 debug!("TPU resumed");
204 Ok(())
205 }
206
207 pub fn open(&mut self) -> Result<(), TpuError> { Ok(()) }
208
209 pub fn reset(&mut self) -> Result<(), TpuError> {
210 debug!("TPU reset");
211 self.kfns.reset();
212 Ok(())
213 }
214
215 pub fn platform_init(&mut self) -> Result<(), TpuError> {
216 debug!("TPU platform_init");
217 self.kfns.enable_clocks();
218 self.reset()
219 }
220
221 pub fn platform_deinit(&mut self) {
222 debug!("TPU platform_deinit");
223 self.kfns.disable_clocks();
224 }
225
226 pub fn pmu_enable(&mut self, enable: bool, event: TpuPmuEvent) -> Result<(), TpuError> {
227 debug!("pmu_enable: enable={}, event={:?}", enable, event);
228 if enable && (self.config.pmu_buf_size == 0 || self.config.pmu_buf_paddr == 0) {
229 return Err(TpuError::InvalidParameter);
230 }
231 TpuPmu::enable(self.tdma_base, enable, event, self.config.pmu_buf_paddr, self.config.pmu_buf_size);
232 Ok(())
233 }
234
235 pub fn run_dmabuf(&mut self, dmabuf_paddr: PhysAddr, header: &DmaHeader, descs: &[CpuSyncDesc]) -> Result<(), TpuError> {
237 debug!("run_dmabuf: paddr=0x{:x}, desc_count={}", dmabuf_paddr, header.cpu_desc_count);
238
239 if !header.is_valid() {
240 debug!("run_dmabuf: invalid header");
241 return Err(TpuError::InvalidParameter);
242 }
243 if descs.len() < header.cpu_desc_count as usize {
244 debug!("run_dmabuf: desc count mismatch");
245 return Err(TpuError::InvalidParameter);
246 }
247
248 self.sync_backup = false;
249 self.suspend_handle_int = false;
250 self.set_array_base(header);
251
252 let pmu_enabled = header.pmubuf_offset != 0 && header.pmubuf_size != 0;
253 if pmu_enabled {
254 let pmu_paddr = dmabuf_paddr + header.pmubuf_offset as u64;
255 self.pmu_enable_raw(true, TpuPmuEvent::TdmaBw, pmu_paddr, header.pmubuf_size)?;
256 }
257
258 for (i, desc) in descs.iter().take(header.cpu_desc_count as usize).enumerate() {
259 let bd_num = desc.bd_count();
260 let tdma_num = desc.tdma_count();
261 debug!("run_dmabuf: desc[{}] bd={}, tdma={}", i, bd_num, tdma_num);
262
263 self.resync_cmd_id();
264
265 if bd_num > 0 {
266 self.program_tiu_descriptor(desc.offset_bd as u64, TiuLaneNum::Lane8);
267 }
268
269 if tdma_num > 0 {
270 self.program_tdma_descriptor(desc.offset_gdma, tdma_num);
271 self.wait_tdma_done()?;
272 }
273
274 if !self.suspend_handle_int {
275 self.poll_cmdbuf_done(bd_num, tdma_num)?;
276 }
277 }
278
279 if pmu_enabled {
280 self.pmu_enable_raw(false, TpuPmuEvent::TdmaBw, 0, 0)?;
281 if !self.suspend_handle_int {
282 self.wait_tdma_done()?;
283 }
284 }
285
286 debug!("run_dmabuf: completed");
287 Ok(())
288 }
289
290 pub fn run_pio(&mut self, info: &TpuTdmaPioInfo) -> Result<(), TpuError> {
292 debug!("run_pio: src=0x{:x}, dst=0x{:x}", info.paddr_src, info.paddr_dst);
293
294 let mut reg = TdmaReg::default();
295 reg.vld = 1;
296 reg.trans_dir = 2;
297 reg.src_base_addr_low = info.paddr_src as u32;
298 reg.src_base_addr_high = (info.paddr_src >> 32) as u32;
299 reg.dst_base_addr_low = info.paddr_dst as u32;
300 reg.dst_base_addr_high = (info.paddr_dst >> 32) as u32;
301 reg.eod = 1;
302 reg.intp_en = 1;
303
304 if info.enable_2d != 0 {
305 reg.trans_fmt = 0;
306 reg.src_n = 1;
307 reg.src_c = 1;
308 reg.src_h = info.h;
309 reg.src_w = info.w_bytes;
310 reg.dst_c = 1;
311 reg.dst_h = info.h;
312 reg.dst_w = info.w_bytes;
313 reg.src_n_stride = info.stride_bytes_src.saturating_mul(info.h);
314 reg.src_h_stride = info.stride_bytes_src;
315 reg.dst_n_stride = info.stride_bytes_dst.saturating_mul(info.h);
316 reg.dst_h_stride = info.stride_bytes_dst;
317 } else {
318 reg.trans_fmt = 1;
319 reg.src_n_stride = info.leng_bytes;
320 }
321
322 self.set_tdma_pio(®.emit());
323 self.wait_tdma_done()
324 }
325
326 pub fn tdma_base(&self) -> NonNull<u8> { self.tdma_base }
327 pub fn tiu_base(&self) -> NonNull<u8> { self.tiu_base }
328
329 pub fn get_tdma_sync_status(&self) -> TdmaSyncStatus {
330 TdmaSyncStatus::from_raw(self.read32(self.tdma_base, registers::TDMA_SYNC_STATUS))
331 }
332
333 pub fn get_tiu_ctrl_status(&self) -> TiuCtrlStatus {
334 TiuCtrlStatus::from_raw(self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR))
335 }
336
337 pub fn is_idle(&self) -> bool {
338 let tiu = self.get_tiu_ctrl_status();
339 self.get_tdma_sync_status().is_all_idle() && (tiu.interrupt() || !tiu.enabled())
340 }
341
342 pub fn wait_idle(&mut self) -> Result<(), TpuError> {
343 let start = self.kfns.now_us();
344 while !self.is_idle() {
345 if self.kfns.now_us().saturating_sub(start) > TIMEOUT_US {
346 debug!("wait_idle: timeout");
347 return Err(TpuError::Timeout);
348 }
349 spin_loop();
350 }
351 Ok(())
352 }
353
354 pub fn emergency_stop(&mut self) {
355 debug!("emergency_stop");
356 self.write32(self.tdma_base, registers::TDMA_CTRL, 0);
357 let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
358 self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR,
359 reg_val & !((1 << registers::BD_TPU_EN) | (1 << registers::BD_DES_ADDR_VLD)));
360 self.write32(self.tdma_base, registers::TDMA_INT_MASK, 0xFFFF_0000);
361 }
362
363 fn set_array_base(&mut self, header: &DmaHeader) {
366 let bases = header.arraybase_l();
367 const OFFSETS: [u32; 8] = [
368 registers::TDMA_ARRAYBASE0_L, registers::TDMA_ARRAYBASE1_L, registers::TDMA_ARRAYBASE2_L,
369 registers::TDMA_ARRAYBASE3_L, registers::TDMA_ARRAYBASE4_L, registers::TDMA_ARRAYBASE5_L,
370 registers::TDMA_ARRAYBASE6_L, registers::TDMA_ARRAYBASE7_L,
371 ];
372 for (off, val) in OFFSETS.iter().zip(bases.iter()) {
373 self.write32(self.tdma_base, *off, *val);
374 }
375 self.write32(self.tdma_base, registers::TDMA_ARRAYBASE0_H, 0);
376 self.write32(self.tdma_base, registers::TDMA_ARRAYBASE1_H, 0);
377 }
378
379 fn wait_tdma_done(&mut self) -> Result<(), TpuError> {
380 let start = self.kfns.now_us();
381 loop {
382 let int_status = (self.read32(self.tdma_base, registers::TDMA_INT_MASK) >> 16) & !registers::TDMA_MASK_INIT;
383 if int_status != 0 {
384 self.clear_interrupt();
385 return Ok(());
386 }
387 if self.kfns.now_us().saturating_sub(start) > TIMEOUT_US {
388 debug!("wait_tdma_done: timeout");
389 return Err(TpuError::Timeout);
390 }
391 spin_loop();
392 }
393 }
394
395 fn poll_cmdbuf_done(&mut self, bd_cmd_id: u32, tdma_cmd_id: u32) -> Result<(), TpuError> {
396 if tdma_cmd_id > 0 && (self.last_tdma_sync_status >> 16) < tdma_cmd_id {
397 debug!("poll_cmdbuf_done: tdma sync id mismatch");
398 return Err(TpuError::DeviceError);
399 }
400
401 if bd_cmd_id > 0 {
402 let start = self.kfns.now_us();
403 loop {
404 let reg_val = self.read32(self.tiu_base, registers::BD_CTRL_BASE_ADDR);
405 let done_id = (reg_val >> 6) & 0xFFFF;
406 let intr = (reg_val & 2) != 0;
407 if done_id >= bd_cmd_id && intr {
408 self.write32(self.tiu_base, registers::BD_CTRL_BASE_ADDR, reg_val | 2);
409 break;
410 }
411 if self.kfns.now_us().saturating_sub(start) > TIMEOUT_US {
412 debug!("poll_cmdbuf_done: tiu timeout");
413 return Err(TpuError::Timeout);
414 }
415 spin_loop();
416 }
417 }
418 Ok(())
419 }
420
421 fn set_tdma_pio(&mut self, pio_array: &[u32; 16]) {
422 self.resync_cmd_id();
423 for (i, value) in pio_array.iter().enumerate() {
424 self.write32(self.tdma_base, registers::TDMA_CMD_ACCP0 + (i as u32 * 4), *value);
425 }
426 self.write32(self.tdma_base, registers::TDMA_DEBUG_MODE, 0);
427 self.write32(self.tdma_base, registers::TDMA_DCM_DISABLE, 0);
428 self.write32(self.tdma_base, registers::TDMA_INT_MASK, registers::TDMA_MASK_INIT);
429
430 let ctrl = (1 << registers::TDMA_CTRL_ENABLE_BIT)
431 | (1 << registers::TDMA_CTRL_DESNUM_BIT)
432 | (0x3 << registers::TDMA_CTRL_BURSTLEN_BIT)
433 | (1 << registers::TDMA_CTRL_FORCE_1ARRAY)
434 | (1 << registers::TDMA_CTRL_INTRA_CMD_OFF)
435 | (1 << registers::TDMA_CTRL_64BYTE_ALIGN_EN);
436 self.write32(self.tdma_base, registers::TDMA_CTRL, ctrl);
437 }
438
439 fn pmu_enable_raw(&mut self, enable: bool, event: TpuPmuEvent, buf_paddr: PhysAddr, buf_size: u32) -> Result<(), TpuError> {
440 TpuPmu::enable(self.tdma_base, enable, event, buf_paddr, buf_size);
441 Ok(())
442 }
443
444 fn read32(&self, base: NonNull<u8>, offset: u32) -> u32 {
445 unsafe { core::ptr::read_volatile(base.as_ptr().add(offset as usize) as *const u32) }
446 }
447
448 fn write32(&self, base: NonNull<u8>, offset: u32, value: u32) {
449 unsafe { core::ptr::write_volatile(base.as_ptr().add(offset as usize) as *mut u32, value) }
450 }
451}