Skip to main content

edgefirst_tensor/
dma.rs

1// SPDX-FileCopyrightText: Copyright 2025 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::{
5    error::{Error, Result},
6    TensorMap, TensorMapTrait, TensorMemory, TensorTrait,
7};
8use log::{trace, warn};
9use num_traits::Num;
10use std::{
11    ffi::c_void,
12    fmt,
13    num::NonZero,
14    ops::{Deref, DerefMut},
15    os::fd::{AsRawFd, OwnedFd},
16    ptr::NonNull,
17    sync::{Arc, Mutex},
18};
19
20/// A tensor backed by DMA (Direct Memory Access) memory.
21///
22/// On Linux, for self-allocated (dma_heap) buffers a DRM PRIME attachment is
23/// created to enable CPU cache coherency via `DMA_BUF_IOCTL_SYNC`. Without an
24/// active attachment, sync ioctls are no-ops on cached CMA heaps.
25///
26/// For imported (foreign) DMA-BUF fds — e.g. those exported by the Neutron
27/// NPU driver — no DRM attachment is created. Cache coherency for foreign
28/// buffers is the responsibility of the buffer owner (the kernel driver).
29#[derive(Debug)]
30pub struct DmaTensor<T>
31where
32    T: Num + Clone + fmt::Debug + Send + Sync,
33{
34    pub name: String,
35    pub fd: OwnedFd,
36    pub shape: Vec<usize>,
37    pub _marker: std::marker::PhantomData<T>,
38    #[cfg(target_os = "linux")]
39    _drm_attachment: Option<crate::dmabuf::DrmAttachment>,
40    identity: crate::BufferIdentity,
41    /// Actual buffer size in bytes (from fstat at creation time).
42    /// May be larger than shape.product() * sizeof(T) for externally
43    /// allocated buffers with row padding.
44    pub(crate) buf_size: usize,
45    /// Byte offset into the DMA buffer where the tensor data begins.
46    /// Set via `Tensor::set_plane_offset` for sub-region imports.
47    pub(crate) mmap_offset: usize,
48    /// Whether this tensor was created via `from_fd()` (imported from an
49    /// external allocator).  Propagated through `try_clone()` so that DRM
50    /// PRIME import failures are logged at DEBUG rather than WARN, and
51    /// used to gate CPU mapping of strided tensors: self-allocated DMA
52    /// tensors with pitch padding (via `new_with_byte_size`) are
53    /// mappable because HAL owns the layout, but foreign V4L2/GStreamer
54    /// strided imports are not — the external allocator defines the
55    /// layout and HAL cannot validate what the caller expects.
56    #[cfg(target_os = "linux")]
57    pub(crate) is_imported: bool,
58}
59
60unsafe impl<T> Send for DmaTensor<T> where T: Num + Clone + fmt::Debug + Send + Sync {}
61unsafe impl<T> Sync for DmaTensor<T> where T: Num + Clone + fmt::Debug + Send + Sync {}
62
63impl<T> TensorTrait<T> for DmaTensor<T>
64where
65    T: Num + Clone + fmt::Debug + Send + Sync,
66{
67    #[cfg(target_os = "linux")]
68    fn new(shape: &[usize], name: Option<&str>) -> Result<Self> {
69        use log::debug;
70        use nix::sys::stat::fstat;
71
72        let logical_size = shape.iter().product::<usize>() * std::mem::size_of::<T>();
73        let name = match name {
74            Some(name) => name.to_owned(),
75            None => {
76                let uuid = uuid::Uuid::new_v4().as_simple().to_string();
77                format!("/{}", &uuid[..16])
78            }
79        };
80
81        let heap = match dma_heap::Heap::new(dma_heap::HeapKind::Cma) {
82            Ok(heap) => heap,
83            Err(_) => dma_heap::Heap::new(dma_heap::HeapKind::System)?,
84        };
85
86        let dma_fd = heap.allocate(logical_size)?;
87        let stat = fstat(&dma_fd)?;
88        debug!("DMA memory stat: {stat:?}");
89        let buf_size = if stat.st_size > 0 {
90            std::cmp::max(stat.st_size as usize, logical_size)
91        } else {
92            logical_size
93        };
94
95        let drm_attachment = crate::dmabuf::DrmAttachment::new(&dma_fd, false);
96
97        Ok(DmaTensor::<T> {
98            name: name.to_owned(),
99            fd: dma_fd,
100            shape: shape.to_vec(),
101            _marker: std::marker::PhantomData,
102            _drm_attachment: drm_attachment,
103            identity: crate::BufferIdentity::new(),
104            buf_size,
105            mmap_offset: 0,
106            is_imported: false,
107        })
108    }
109
110    #[cfg(not(target_os = "linux"))]
111    fn new(_shape: &[usize], _name: Option<&str>) -> Result<Self> {
112        Err(Error::NotImplemented(
113            "DMA tensors are not supported on this platform".to_owned(),
114        ))
115    }
116
117    fn from_fd(fd: OwnedFd, shape: &[usize], name: Option<&str>) -> Result<Self> {
118        if shape.is_empty() {
119            return Err(Error::InvalidSize(0));
120        }
121
122        let logical_size = shape.iter().product::<usize>() * std::mem::size_of::<T>();
123        if logical_size == 0 {
124            return Err(Error::InvalidSize(0));
125        }
126
127        // fstat may return st_size=0 for DMA-BUF fds on some kernels;
128        // fall back to logical_size in that case.
129        let buf_size = {
130            #[cfg(target_os = "linux")]
131            {
132                use nix::sys::stat::fstat;
133                match fstat(&fd) {
134                    Ok(stat) if stat.st_size > 0 && stat.st_size as usize >= logical_size => {
135                        stat.st_size as usize
136                    }
137                    _ => logical_size,
138                }
139            }
140            #[cfg(not(target_os = "linux"))]
141            {
142                logical_size
143            }
144        };
145
146        // Do NOT attempt a DRM attachment for foreign (imported) DMA-BUF fds.
147        // DRM PRIME import is only meaningful for DMA-BUF fds that were
148        // allocated by the same DRM device (e.g. via the CMA/system heap).
149        // For fds owned by other kernel drivers (e.g. Neutron NPU), the
150        // PRIME_FD_TO_HANDLE ioctl will fail and the resulting no-op
151        // attachment attempt adds unnecessary ioctl overhead on every import.
152        // DMA_BUF_IOCTL_SYNC coherency for foreign buffers is the
153        // responsibility of the buffer owner (the NPU driver in this case).
154        #[cfg(target_os = "linux")]
155        let drm_attachment = None;
156
157        Ok(DmaTensor {
158            name: name.unwrap_or("").to_owned(),
159            fd,
160            shape: shape.to_vec(),
161            _marker: std::marker::PhantomData,
162            #[cfg(target_os = "linux")]
163            _drm_attachment: drm_attachment,
164            identity: crate::BufferIdentity::new(),
165            buf_size,
166            mmap_offset: 0,
167            #[cfg(target_os = "linux")]
168            is_imported: true,
169        })
170    }
171
172    fn clone_fd(&self) -> Result<OwnedFd> {
173        Ok(self.fd.try_clone()?)
174    }
175
176    fn memory(&self) -> TensorMemory {
177        TensorMemory::Dma
178    }
179
180    fn name(&self) -> String {
181        self.name.clone()
182    }
183
184    fn shape(&self) -> &[usize] {
185        &self.shape
186    }
187
188    fn reshape(&mut self, shape: &[usize]) -> Result<()> {
189        if shape.is_empty() {
190            return Err(Error::InvalidSize(0));
191        }
192
193        let new_size = shape.iter().product::<usize>() * std::mem::size_of::<T>();
194        if new_size != self.size() {
195            return Err(Error::ShapeMismatch(format!(
196                "Cannot reshape incompatible shape: {:?} to {:?}",
197                self.shape, shape
198            )));
199        }
200
201        self.shape = shape.to_vec();
202        Ok(())
203    }
204
205    fn map(&self) -> Result<TensorMap<T>> {
206        Ok(TensorMap::Dma(DmaMap::new(
207            self.fd.try_clone()?,
208            &self.shape,
209            self.buf_size,
210            self.mmap_offset,
211        )?))
212    }
213
214    fn buffer_identity(&self) -> &crate::BufferIdentity {
215        &self.identity
216    }
217}
218
219impl<T> AsRawFd for DmaTensor<T>
220where
221    T: Num + Clone + fmt::Debug + Send + Sync,
222{
223    fn as_raw_fd(&self) -> std::os::fd::RawFd {
224        self.fd.as_raw_fd()
225    }
226}
227
228impl<T> DmaTensor<T>
229where
230    T: Num + Clone + Send + Sync + std::fmt::Debug + Send + Sync,
231{
232    /// Allocate a DMA-BUF with an explicit byte size that may exceed
233    /// `shape.product() * sizeof(T)`.
234    ///
235    /// Used for image tensors that need a row-padded layout so the
236    /// resulting DMA-BUF satisfies a downstream consumer's pitch
237    /// alignment requirement (e.g. Mali Valhall's 64-byte EGLImage
238    /// import rule). The `shape` field stores the **logical** dimensions
239    /// `[height, width, channels]`, so `Tensor::width()` / `height()` /
240    /// `shape()` continue to report the user-requested values; the
241    /// padding is carried separately by `Tensor::row_stride` and is
242    /// visible to the CPU mapping (which spans the full `byte_size`
243    /// bytes) but not to the logical shape.
244    ///
245    /// Errors:
246    /// - `InvalidArgument` if `byte_size < shape.product() * sizeof(T)`
247    ///   (the request would lose data)
248    /// - `IoError` if the DMA-heap allocation fails
249    #[cfg(target_os = "linux")]
250    pub(crate) fn new_with_byte_size(
251        shape: &[usize],
252        byte_size: usize,
253        name: Option<&str>,
254    ) -> Result<Self> {
255        use log::debug;
256        use nix::sys::stat::fstat;
257
258        // Compute the logical byte size with checked arithmetic. A caller
259        // passing an absurdly large shape (or sizeof::<T> × product) must
260        // not silently wrap — the comparison below would then accept an
261        // allocation that's actually smaller than the logical size.
262        let logical_elems = shape
263            .iter()
264            .copied()
265            .try_fold(1usize, |acc, dim| acc.checked_mul(dim))
266            .ok_or_else(|| {
267                Error::InvalidArgument(format!(
268                    "DmaTensor::new_with_byte_size: shape.product() overflows usize \
269                     (shape={shape:?})"
270                ))
271            })?;
272        let logical_size = logical_elems
273            .checked_mul(std::mem::size_of::<T>())
274            .ok_or_else(|| {
275                Error::InvalidArgument(format!(
276                    "DmaTensor::new_with_byte_size: logical_elems {logical_elems} × \
277                     sizeof::<T>={} overflows usize (shape={shape:?})",
278                    std::mem::size_of::<T>()
279                ))
280            })?;
281        if byte_size < logical_size {
282            return Err(Error::InvalidArgument(format!(
283                "DmaTensor::new_with_byte_size: byte_size {byte_size} < logical {logical_size} \
284                 (shape={shape:?}, sizeof::<T>={})",
285                std::mem::size_of::<T>()
286            )));
287        }
288        let name = match name {
289            Some(name) => name.to_owned(),
290            None => {
291                let uuid = uuid::Uuid::new_v4().as_simple().to_string();
292                format!("/{}", &uuid[..16])
293            }
294        };
295
296        let heap = match dma_heap::Heap::new(dma_heap::HeapKind::Cma) {
297            Ok(heap) => heap,
298            Err(_) => dma_heap::Heap::new(dma_heap::HeapKind::System)?,
299        };
300
301        let dma_fd = heap.allocate(byte_size)?;
302        let stat = fstat(&dma_fd)?;
303        debug!("DMA padded memory stat: {stat:?}");
304        let buf_size = if stat.st_size > 0 {
305            std::cmp::max(stat.st_size as usize, byte_size)
306        } else {
307            byte_size
308        };
309
310        let drm_attachment = crate::dmabuf::DrmAttachment::new(&dma_fd, false);
311
312        Ok(DmaTensor::<T> {
313            name,
314            fd: dma_fd,
315            shape: shape.to_vec(),
316            _marker: std::marker::PhantomData,
317            _drm_attachment: drm_attachment,
318            identity: crate::BufferIdentity::new(),
319            buf_size,
320            mmap_offset: 0,
321            is_imported: false,
322        })
323    }
324
325    #[cfg(not(target_os = "linux"))]
326    pub(crate) fn new_with_byte_size(
327        _shape: &[usize],
328        _byte_size: usize,
329        _name: Option<&str>,
330    ) -> Result<Self> {
331        Err(Error::NotImplemented(
332            "DMA tensors are not supported on this platform".to_owned(),
333        ))
334    }
335
336    /// Map this DMA tensor with an explicit total byte size.
337    ///
338    /// Used by `Tensor::map()` for self-allocated strided tensors — the
339    /// returned `DmaMap` exposes the full `byte_size` bytes via
340    /// `as_slice()`/`as_mut_slice()`, not just the shape-derived logical
341    /// count. Callers are expected to iterate rows with
342    /// `Tensor::effective_row_stride()` so they don't read past the end.
343    pub(crate) fn map_with_byte_size(&self, byte_size: usize) -> Result<DmaMap<T>> {
344        DmaMap::new_with_byte_size(
345            self.fd.try_clone()?,
346            &self.shape,
347            self.buf_size,
348            self.mmap_offset,
349            byte_size,
350        )
351    }
352
353    pub fn try_clone(&self) -> Result<Self> {
354        let fd = self.clone_fd()?;
355        // Preserve the imported/owned distinction: imported fds never get a
356        // DRM attachment (consistent with from_fd()).
357        #[cfg(target_os = "linux")]
358        let drm_attachment = if self.is_imported {
359            None
360        } else {
361            crate::dmabuf::DrmAttachment::new(&fd, false)
362        };
363        Ok(Self {
364            name: self.name.clone(),
365            fd,
366            shape: self.shape.clone(),
367            _marker: std::marker::PhantomData,
368            #[cfg(target_os = "linux")]
369            _drm_attachment: drm_attachment,
370            identity: self.identity.clone(),
371            buf_size: self.buf_size,
372            mmap_offset: self.mmap_offset,
373            #[cfg(target_os = "linux")]
374            is_imported: self.is_imported,
375        })
376    }
377}
378
379#[derive(Debug)]
380pub struct DmaMap<T>
381where
382    T: Num + Clone + fmt::Debug,
383{
384    ptr: Arc<Mutex<DmaPtr>>,
385    fd: OwnedFd,
386    shape: Vec<usize>,
387    /// Actual mmap'd size (may be > shape.product() * sizeof(T) for padded buffers).
388    mmap_size: usize,
389    /// Byte offset into the mmap'd region where tensor data begins.
390    offset: usize,
391    /// Optional override for `as_slice().len() * sizeof(T)`. When `None`,
392    /// `as_slice()` returns `shape.product()` elements (the traditional
393    /// logical view). When `Some(bytes)`, `as_slice()` returns `bytes /
394    /// sizeof(T)` elements, exposing the full padded buffer. Used for
395    /// self-allocated strided DMA tensors where the mmap'd region has
396    /// row-padding between logical rows and callers need to iterate via
397    /// `row_stride` rather than a packed `width * bpp` layout.
398    byte_size_override: Option<usize>,
399    _marker: std::marker::PhantomData<T>,
400}
401
402impl<T> DmaMap<T>
403where
404    T: Num + Clone + fmt::Debug,
405{
406    pub fn new(fd: OwnedFd, shape: &[usize], buf_size: usize, offset: usize) -> Result<Self> {
407        Self::new_internal(fd, shape, buf_size, offset, None)
408    }
409
410    /// Construct a DmaMap whose `as_slice()` exposes the full padded
411    /// buffer rather than the shape-derived logical byte count. Used by
412    /// `Tensor::map()` for self-allocated strided DMA tensors so CPU
413    /// iteration can respect `row_stride` without going past the end
414    /// of the returned slice.
415    ///
416    /// Crate-private: the only caller is `Tensor::map()`, which already
417    /// performs the outer `stride × height <= buf_size - offset` check.
418    /// Keeping this API `pub(crate)` ensures an unchecked `byte_size`
419    /// can never be fed in from outside the crate.
420    pub(crate) fn new_with_byte_size(
421        fd: OwnedFd,
422        shape: &[usize],
423        buf_size: usize,
424        offset: usize,
425        byte_size: usize,
426    ) -> Result<Self> {
427        Self::new_internal(fd, shape, buf_size, offset, Some(byte_size))
428    }
429
430    fn new_internal(
431        fd: OwnedFd,
432        shape: &[usize],
433        buf_size: usize,
434        offset: usize,
435        byte_size_override: Option<usize>,
436    ) -> Result<Self> {
437        if shape.is_empty() {
438            return Err(Error::InvalidSize(0));
439        }
440
441        let logical_size = shape.iter().product::<usize>() * std::mem::size_of::<T>();
442        if logical_size == 0 {
443            return Err(Error::InvalidSize(0));
444        }
445
446        // Use the buffer's actual size (from fstat at DmaTensor creation).
447        // as_slice() uses the logical element count from shape.
448        // When an offset is present (sub-region of a larger DMA-BUF), verify
449        // that offset + logical_size fits within the allocated buffer — mapping
450        // beyond buf_size would cause SIGBUS on access.
451        let total_needed = offset
452            .checked_add(logical_size)
453            .ok_or(Error::InvalidSize(0))?;
454        if total_needed > buf_size {
455            warn!(
456                "DmaMap: offset={} + logical_size={} = {} exceeds buf_size={} (fd={})",
457                offset,
458                logical_size,
459                total_needed,
460                buf_size,
461                fd.as_raw_fd()
462            );
463            return Err(Error::InvalidSize(total_needed));
464        }
465        if std::mem::size_of::<T>() > 1 && !offset.is_multiple_of(std::mem::align_of::<T>()) {
466            return Err(Error::InvalidOperation(format!(
467                "DmaMap: offset {} is not aligned to align_of::<T>()={}",
468                offset,
469                std::mem::align_of::<T>()
470            )));
471        }
472
473        // Defense in depth: even though `new_with_byte_size` is crate-private
474        // and its callers validate upstream, verify the override is non-zero,
475        // sizeof::<T>()-aligned, and fits inside the mapped region. Any breach
476        // would otherwise turn into an out-of-bounds slice in `as_slice()`.
477        if let Some(byte_size) = byte_size_override {
478            if byte_size == 0 {
479                return Err(Error::InvalidSize(0));
480            }
481            let t_size = std::mem::size_of::<T>();
482            if t_size > 1 && !byte_size.is_multiple_of(t_size) {
483                return Err(Error::InvalidOperation(format!(
484                    "DmaMap: byte_size_override {byte_size} is not a multiple of sizeof::<T>()={t_size}"
485                )));
486            }
487            let available = buf_size.saturating_sub(offset);
488            if byte_size > available {
489                return Err(Error::InvalidSize(byte_size));
490            }
491        }
492
493        let mmap_size = buf_size;
494
495        #[cfg(target_os = "linux")]
496        {
497            trace!("DmaMap: sync start fd={} size={mmap_size}", fd.as_raw_fd());
498            if let Err(e) = crate::dmabuf::start_readwrite(&fd) {
499                warn!(
500                    "DmaMap: DMA_BUF_IOCTL_SYNC(START) failed fd={}: {e}",
501                    fd.as_raw_fd()
502                );
503                return Err(Error::NixError(e));
504            }
505        }
506
507        let ptr = unsafe {
508            nix::sys::mman::mmap(
509                None,
510                NonZero::new(mmap_size).ok_or(Error::InvalidSize(mmap_size))?,
511                nix::sys::mman::ProtFlags::PROT_READ | nix::sys::mman::ProtFlags::PROT_WRITE,
512                nix::sys::mman::MapFlags::MAP_SHARED,
513                &fd,
514                0,
515            )?
516        };
517
518        trace!("Mapping DMA memory: {ptr:?}");
519        let dma_ptr = DmaPtr(NonNull::new(ptr.as_ptr()).ok_or(Error::InvalidSize(mmap_size))?);
520        Ok(DmaMap {
521            ptr: Arc::new(Mutex::new(dma_ptr)),
522            fd,
523            shape: shape.to_vec(),
524            mmap_size,
525            offset,
526            byte_size_override,
527            _marker: std::marker::PhantomData,
528        })
529    }
530}
531
532impl<T> Deref for DmaMap<T>
533where
534    T: Num + Clone + fmt::Debug,
535{
536    type Target = [T];
537
538    fn deref(&self) -> &[T] {
539        self.as_slice()
540    }
541}
542
543impl<T> DerefMut for DmaMap<T>
544where
545    T: Num + Clone + fmt::Debug,
546{
547    fn deref_mut(&mut self) -> &mut [T] {
548        self.as_mut_slice()
549    }
550}
551
552#[derive(Debug)]
553struct DmaPtr(NonNull<c_void>);
554impl Deref for DmaPtr {
555    type Target = NonNull<c_void>;
556
557    fn deref(&self) -> &Self::Target {
558        &self.0
559    }
560}
561
562unsafe impl Send for DmaPtr {}
563
564impl<T> TensorMapTrait<T> for DmaMap<T>
565where
566    T: Num + Clone + fmt::Debug,
567{
568    fn shape(&self) -> &[usize] {
569        &self.shape
570    }
571
572    fn unmap(&mut self) {
573        let ptr = self.ptr.lock().expect("Failed to lock DmaMap pointer");
574
575        if let Err(e) = unsafe { nix::sys::mman::munmap(**ptr, self.mmap_size) } {
576            warn!("Failed to unmap DMA memory: {e}");
577        }
578
579        #[cfg(target_os = "linux")]
580        if let Err(e) = crate::dmabuf::end_readwrite(&self.fd) {
581            warn!("Failed to end read/write on DMA memory: {e}");
582        }
583    }
584
585    fn as_slice(&self) -> &[T] {
586        let ptr = self.ptr.lock().expect("Failed to lock DmaMap pointer");
587        let base = unsafe { (ptr.as_ptr() as *const u8).add(self.offset) as *const T };
588        unsafe { std::slice::from_raw_parts(base, self.slice_len_elems()) }
589    }
590
591    fn as_mut_slice(&mut self) -> &mut [T] {
592        let ptr = self.ptr.lock().expect("Failed to lock DmaMap pointer");
593        let base = unsafe { (ptr.as_ptr() as *mut u8).add(self.offset) as *mut T };
594        unsafe { std::slice::from_raw_parts_mut(base, self.slice_len_elems()) }
595    }
596}
597
598impl<T> DmaMap<T>
599where
600    T: Num + Clone + fmt::Debug,
601{
602    /// Number of `T` elements exposed by `as_slice()`. Honours
603    /// `byte_size_override` when set (for strided tensors the caller
604    /// wants the full padded mmap exposed, not just `shape.product()`).
605    /// Falls back to the shape-derived logical element count.
606    fn slice_len_elems(&self) -> usize {
607        match self.byte_size_override {
608            Some(bytes) => bytes / std::mem::size_of::<T>(),
609            None => self.shape.iter().product(),
610        }
611    }
612}
613
614impl<T> Drop for DmaMap<T>
615where
616    T: Num + Clone + fmt::Debug,
617{
618    fn drop(&mut self) {
619        trace!("DmaMap dropped, unmapping memory");
620        self.unmap();
621    }
622}
623
624#[cfg(test)]
625mod tests {
626    use super::*;
627
628    /// Returns a valid fd backed by /dev/null.  The new error paths in
629    /// DmaMap::new() all fire before any fd-specific syscall (mmap,
630    /// DMA_BUF_IOCTL_SYNC), so any readable fd is sufficient.
631    #[cfg(target_os = "linux")]
632    fn dummy_fd() -> std::os::fd::OwnedFd {
633        use std::os::fd::FromRawFd;
634        use std::os::unix::io::IntoRawFd;
635        let f = std::fs::File::open("/dev/null").expect("open /dev/null");
636        unsafe { std::os::fd::OwnedFd::from_raw_fd(f.into_raw_fd()) }
637    }
638
639    /// offset + logical_size exceeds buf_size — must return InvalidSize.
640    #[test]
641    #[cfg(target_os = "linux")]
642    fn test_dma_map_offset_exceeds_buf_size() {
643        let fd = dummy_fd();
644        // shape=[4096] u8 → logical_size=4096; offset=4096 → total_needed=8192
645        // buf_size=4096 < 8192 → error
646        let result = DmaMap::<u8>::new(fd, &[4096], 4096, 4096);
647        match result {
648            Err(Error::InvalidSize(n)) => assert_eq!(n, 8192),
649            other => panic!("expected InvalidSize(8192), got {:?}", other),
650        }
651    }
652
653    /// Offset not aligned to align_of::<T>() — must return InvalidOperation.
654    #[test]
655    #[cfg(target_os = "linux")]
656    fn test_dma_map_misaligned_offset() {
657        let fd = dummy_fd();
658        // shape=[1024] u32 → logical_size=4096; offset=3 (not aligned to 4)
659        // buf_size=8192 so total_needed check passes; alignment check fires
660        let result = DmaMap::<u32>::new(fd, &[1024], 8192, 3);
661        assert!(
662            matches!(result, Err(Error::InvalidOperation(_))),
663            "expected InvalidOperation for misaligned offset, got {:?}",
664            result
665        );
666    }
667
668    /// offset + logical_size overflows usize — must return InvalidSize(0).
669    #[test]
670    #[cfg(target_os = "linux")]
671    fn test_dma_map_offset_overflow() {
672        let fd = dummy_fd();
673        // offset=usize::MAX, shape=[1] u8 → checked_add overflows
674        let result = DmaMap::<u8>::new(fd, &[1], usize::MAX, usize::MAX);
675        assert!(
676            matches!(result, Err(Error::InvalidSize(0))),
677            "expected InvalidSize(0) on overflow, got {:?}",
678            result
679        );
680    }
681
682    #[test]
683    #[cfg(target_os = "linux")]
684    fn test_dma_map_with_offset() {
685        use crate::{Tensor, TensorMapTrait, TensorMemory, TensorTrait};
686
687        // Skip if DMA heap not available
688        let total_size: usize = 4096 * 4; // 16KB
689        let offset: usize = 4096; // 4KB offset
690        let data_size: usize = 4096; // 4KB of data after offset
691
692        let large_buf = match Tensor::<u8>::new(&[total_size], Some(TensorMemory::Dma), None) {
693            Ok(buf) => buf,
694            Err(_) => {
695                eprintln!("SKIPPED: DMA not available");
696                return;
697            }
698        };
699
700        // Fill entire buffer with sentinel
701        {
702            let mut map = large_buf.map().unwrap();
703            map.as_mut_slice().fill(0xAA);
704        }
705
706        // Import at offset as a smaller tensor using clone_fd + set_plane_offset
707        let fd = large_buf.clone_fd().unwrap();
708        let mut offset_tensor = Tensor::<u8>::from_fd(fd, &[data_size], None).unwrap();
709        offset_tensor.set_plane_offset(offset);
710
711        // Map the offset tensor — should succeed (not rejected)
712        let mut map = offset_tensor.map().unwrap();
713        let slice = map.as_mut_slice();
714
715        // Should see the sentinel at the offset position
716        assert_eq!(slice.len(), data_size);
717        assert!(
718            slice.iter().all(|&b| b == 0xAA),
719            "Offset tensor map should see sentinel data at offset"
720        );
721
722        // Write different data at offset
723        slice.fill(0xBB);
724        drop(map);
725
726        // Verify via the original buffer: bytes before offset unchanged,
727        // bytes at offset are 0xBB
728        {
729            let map = large_buf.map().unwrap();
730            let buf = map.as_slice();
731            assert!(
732                buf[..offset].iter().all(|&b| b == 0xAA),
733                "Data before offset should be unchanged"
734            );
735            assert!(
736                buf[offset..offset + data_size].iter().all(|&b| b == 0xBB),
737                "Data at offset should be 0xBB"
738            );
739        }
740    }
741}