edgefirst_image/
lib.rs

1// SPDX-FileCopyrightText: Copyright 2025 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4/*!
5
6## EdgeFirst HAL - Image Converter
7
8The `edgefirst_image` crate is part of the EdgeFirst Hardware Abstraction
9Layer (HAL) and provides functionality for converting images between
10different formats and sizes.  The crate is designed to work with hardware
11acceleration when available, but also provides a CPU-based fallback for
12environments where hardware acceleration is not present or not suitable.
13
14The main features of the `edgefirst_image` crate include:
15- Support for various image formats, including YUYV, RGB, RGBA, and GREY.
16- Support for source crop, destination crop, rotation, and flipping.
17- Image conversion using hardware acceleration (G2D, OpenGL) when available.
18- CPU-based image conversion as a fallback option.
19
20The crate uses [`TensorDyn`] from `edgefirst_tensor` to represent images,
21with [`PixelFormat`] metadata describing the pixel layout. The
22[`ImageProcessor`] struct manages the conversion process, selecting
23the appropriate conversion method based on the available hardware.
24
25## Examples
26
27```rust
28# use edgefirst_image::{ImageProcessor, Rotation, Flip, Crop, ImageProcessorTrait, load_image};
29# use edgefirst_tensor::{PixelFormat, DType, TensorDyn};
30# fn main() -> Result<(), edgefirst_image::Error> {
31let image = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/../../testdata/zidane.jpg"));
32let src = load_image(image, Some(PixelFormat::Rgba), None)?;
33let mut converter = ImageProcessor::new()?;
34let mut dst = converter.create_image(640, 480, PixelFormat::Rgb, DType::U8, None)?;
35converter.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())?;
36# Ok(())
37# }
38```
39
40## Environment Variables
41The behavior of the `edgefirst_image::ImageProcessor` struct can be influenced by the
42following environment variables:
43- `EDGEFIRST_FORCE_BACKEND`: When set to `cpu`, `g2d`, or `opengl` (case-insensitive),
44  only that single backend is initialized and no fallback chain is used. If the
45  forced backend fails to initialize, an error is returned immediately. This is
46  useful for benchmarking individual backends in isolation. When this variable is
47  set, the `EDGEFIRST_DISABLE_*` variables are ignored.
48- `EDGEFIRST_DISABLE_GL`: If set to `1`, disables the use of OpenGL for image
49  conversion, forcing the use of CPU or other available hardware methods.
50- `EDGEFIRST_DISABLE_G2D`: If set to `1`, disables the use of G2D for image
51  conversion, forcing the use of CPU or other available hardware methods.
52- `EDGEFIRST_DISABLE_CPU`: If set to `1`, disables the use of CPU for image
53  conversion, forcing the use of hardware acceleration methods. If no hardware
54  acceleration methods are available, an error will be returned when attempting
55  to create an `ImageProcessor`.
56
57Additionally the TensorMemory used by default allocations can be controlled using the
58`EDGEFIRST_TENSOR_FORCE_MEM` environment variable. If set to `1`, default tensor memory
59uses system memory. This will disable the use of specialized memory regions for tensors
60and hardware acceleration. However, this will increase the performance of the CPU converter.
61*/
62#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
63
64/// Pitch alignment requirement for DMA-BUF tensors that may be imported as
65/// EGLImages by the GL backend. Mali Valhall (i.MX 95 / G310) rejects
66/// `eglCreateImageKHR` with `EGL_BAD_ALLOC` for any DMA-BUF whose row pitch
67/// is not a multiple of 64 bytes; Vivante GC7000UL (i.MX 8MP) accepts any
68/// pitch so the constant is harmless on that path. 64 is the smallest
69/// alignment that satisfies every embedded ARM GPU we ship to.
70///
71/// Applied automatically inside [`ImageProcessor::create_image`] when the
72/// allocation lands on `TensorMemory::Dma`. External callers that allocate
73/// their own DMA-BUF tensors (e.g. GStreamer plugins, video pipelines) can
74/// use [`align_width_for_gpu_pitch`] to compute a width whose resulting row
75/// stride satisfies this requirement.
76pub const GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES: usize = 64;
77
78/// Round `width` (in pixels) up so the resulting row stride
79/// `width * bpp` is a multiple of [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`]
80/// AND a multiple of `bpp` (so the rounded width is an integer pixel count).
81///
82/// `bpp` must be the per-pixel byte count for the image's primary plane
83/// (e.g. 4 for RGBA8/BGRA8, 3 for RGB888, 1 for Grey/NV12-luma).
84///
85/// External callers — GStreamer plugins, video pipelines, anyone wrapping a
86/// foreign DMA-BUF — should call this when sizing the destination so that
87/// `eglCreateImageKHR` doesn't reject the import on Mali. Pre-aligned widths
88/// (640, 1280, 1920, 3008, 3840 …) round-trip unchanged; misaligned widths
89/// are bumped up to the next valid value.
90///
91/// # Overflow behaviour
92///
93/// All arithmetic is checked. If the alignment computation or the rounded
94/// width would overflow `usize`, the function logs a warning and returns the
95/// original `width` unchanged rather than wrapping or producing a smaller
96/// value. Callers can rely on the returned width being **at least** the
97/// requested width.
98///
99/// `bpp == 0` and `width == 0` short-circuit to return the input unchanged.
100///
101/// # Examples
102///
103/// ```
104/// use edgefirst_image::align_width_for_gpu_pitch;
105///
106/// // RGBA8 (bpp=4): width must round to a multiple of 16 pixels (64-byte stride).
107/// assert_eq!(align_width_for_gpu_pitch(1920, 4), 1920); // already aligned
108/// assert_eq!(align_width_for_gpu_pitch(3004, 4), 3008); // crowd.png case: +4 px
109/// assert_eq!(align_width_for_gpu_pitch(1281, 4), 1296); // +15 px
110///
111/// // RGB888 (bpp=3): width must round to a multiple of 64 pixels (192-byte stride).
112/// assert_eq!(align_width_for_gpu_pitch(640, 3), 640);
113/// assert_eq!(align_width_for_gpu_pitch(641, 3), 704);
114/// ```
115pub fn align_width_for_gpu_pitch(width: usize, bpp: usize) -> usize {
116    if bpp == 0 || width == 0 {
117        return width;
118    }
119
120    // The minimum aligned stride must be a common multiple of both the
121    // GPU's pitch alignment and the per-pixel byte count. Using the LCM
122    // guarantees the rounded stride is an integer multiple of `bpp`, so
123    // converting back to a pixel count is exact.
124    //
125    // Compute the alignment in pixels (`width_alignment`) so we never need
126    // to multiply `width * bpp`, which is the only operation that could
127    // realistically overflow for large caller-supplied widths.
128    let Some(lcm_alignment) = checked_num_integer_lcm(GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES, bpp)
129    else {
130        log::warn!(
131            "align_width_for_gpu_pitch: lcm({GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES}, {bpp}) \
132             overflows usize, returning unaligned width {width}"
133        );
134        return width;
135    };
136    if lcm_alignment == 0 {
137        return width;
138    }
139
140    debug_assert_eq!(lcm_alignment % bpp, 0);
141    let width_alignment = lcm_alignment / bpp;
142    if width_alignment == 0 {
143        return width;
144    }
145
146    let remainder = width % width_alignment;
147    if remainder == 0 {
148        return width;
149    }
150
151    let pad = width_alignment - remainder;
152    match width.checked_add(pad) {
153        Some(aligned) => aligned,
154        None => {
155            log::warn!(
156                "align_width_for_gpu_pitch: width {width} + pad {pad} overflows usize, \
157                 returning unaligned (caller should use a smaller width or pre-aligned size)"
158            );
159            width
160        }
161    }
162}
163
164/// Round `min_pitch_bytes` up to the next multiple of
165/// [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`]. Returns `None` if the rounded
166/// value would overflow `usize`. Returns `Some(0)` for input 0.
167///
168/// Used internally by [`ImageProcessor::create_image`] to compute the
169/// padded row stride for DMA-backed image allocations. External callers
170/// that need pixel-counted alignment (instead of raw byte pitch) should
171/// use [`align_width_for_gpu_pitch`] instead.
172#[cfg(target_os = "linux")]
173pub(crate) fn align_pitch_bytes_to_gpu_alignment(min_pitch_bytes: usize) -> Option<usize> {
174    let alignment = GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES;
175    if min_pitch_bytes == 0 {
176        return Some(0);
177    }
178    let remainder = min_pitch_bytes % alignment;
179    if remainder == 0 {
180        return Some(min_pitch_bytes);
181    }
182    min_pitch_bytes.checked_add(alignment - remainder)
183}
184
185/// Overflow-safe least common multiple. Returns `None` when `(a / gcd) * b`
186/// would wrap.
187fn checked_num_integer_lcm(a: usize, b: usize) -> Option<usize> {
188    if a == 0 || b == 0 {
189        return Some(0);
190    }
191    let g = num_integer_gcd(a, b);
192    // a / g is exact (g divides a by definition) and at most a, so this
193    // division never panics. Only the subsequent multiply can overflow.
194    (a / g).checked_mul(b)
195}
196
197fn num_integer_gcd(a: usize, b: usize) -> usize {
198    if b == 0 {
199        a
200    } else {
201        num_integer_gcd(b, a % b)
202    }
203}
204
205/// Bytes-per-pixel for the primary plane of `format` at element size `elem`.
206/// Returns `None` for formats that don't have a single packed BPP (semi-planar
207/// chroma is handled separately, returning the luma-plane bpp).
208///
209/// External callers can use this together with [`align_width_for_gpu_pitch`]
210/// to size their own DMA-BUFs without having to remember per-format BPPs:
211///
212/// ```
213/// use edgefirst_image::{align_width_for_gpu_pitch, primary_plane_bpp};
214/// use edgefirst_tensor::PixelFormat;
215///
216/// let bpp = primary_plane_bpp(PixelFormat::Rgba, 1).unwrap();
217/// let aligned = align_width_for_gpu_pitch(3004, bpp);
218/// assert_eq!(aligned, 3008);
219/// ```
220pub fn primary_plane_bpp(format: PixelFormat, elem: usize) -> Option<usize> {
221    use edgefirst_tensor::PixelLayout;
222    match format.layout() {
223        PixelLayout::Packed => Some(format.channels() * elem),
224        PixelLayout::Planar => Some(elem),
225        // For NV12/NV16 the luma plane is single-channel so the pitch
226        // matches `elem`; the chroma plane uses the same pitch in bytes
227        // (UV is half-width but two interleaved channels = same pitch).
228        PixelLayout::SemiPlanar => Some(elem),
229        // `PixelLayout` is non-exhaustive — fall through unaligned for
230        // any future variant we don't yet recognise.
231        _ => None,
232    }
233}
234
235/// Return the GPU-aligned pitch in bytes when a DMA-backed image of
236/// `width × fmt` would need row-stride padding, or `None` when the
237/// natural pitch already satisfies `GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`
238/// or the caller has explicitly requested non-DMA memory.
239///
240/// Mali G310 (i.MX 95) rejects `eglCreateImage` from DMA-BUFs whose
241/// `PLANE0_PITCH_EXT` is not a multiple of 64 bytes, surfacing as
242/// `EGL_BAD_ALLOC`. Decoders like [`load_jpeg`]/[`load_png`] use this
243/// helper to decide whether to route through the two-buffer padded
244/// decode path.
245#[cfg(target_os = "linux")]
246pub(crate) fn padded_dma_pitch_for(
247    fmt: PixelFormat,
248    width: usize,
249    memory: &Option<TensorMemory>,
250) -> Option<usize> {
251    // Only pad when the caller explicitly requested DMA, or when they
252    // left memory selection to the allocator AND DMA is actually
253    // available. `Tensor::image_with_stride(..., None)` always routes
254    // through DMA allocation, so treating `None` as "DMA wanted"
255    // unconditionally would convert a normally-working image load into
256    // a hard failure on systems where DMA is unavailable (sandboxed
257    // CI, missing `/dev/dma_heap`, permission-denied containers) —
258    // whereas `Tensor::image(..., None)` would have fallen back to
259    // SHM/Mem there.
260    match memory {
261        Some(TensorMemory::Dma) => {}
262        None if edgefirst_tensor::is_dma_available() => {}
263        _ => return None,
264    }
265    // Padding only applies to packed layouts — `Tensor::image_with_stride`
266    // rejects semi-planar / planar formats, and those take their own
267    // per-plane pitches on import anyway.
268    if fmt.layout() != PixelLayout::Packed {
269        return None;
270    }
271    let bpp = primary_plane_bpp(fmt, 1)?;
272    let natural = width.checked_mul(bpp)?;
273    let aligned = align_pitch_bytes_to_gpu_alignment(natural)?;
274    if aligned > natural {
275        Some(aligned)
276    } else {
277        None
278    }
279}
280
281/// Row-copy a tightly-packed `src` tensor into a `dst` tensor that has a
282/// larger row stride (typically a DMA-BUF allocated with GPU-aligned pitch).
283///
284/// Both tensors must share the same width, height and pixel format. The
285/// bytes between the end of each source row and the next destination row
286/// are left untouched — EGL import doesn't read past the row's valid
287/// width, so the padding can remain whatever the allocator produced.
288#[cfg(target_os = "linux")]
289pub(crate) fn copy_packed_to_padded_dma(src: &Tensor<u8>, dst: &mut Tensor<u8>) -> Result<()> {
290    let width = dst.width().ok_or(Error::NotAnImage)?;
291    let height = dst.height().ok_or(Error::NotAnImage)?;
292    let fmt = dst.format().ok_or(Error::NotAnImage)?;
293    let src_width = src.width().ok_or(Error::NotAnImage)?;
294    let src_height = src.height().ok_or(Error::NotAnImage)?;
295    let src_fmt = src.format().ok_or(Error::NotAnImage)?;
296    if src_width != width || src_height != height || src_fmt != fmt {
297        return Err(Error::Internal(format!(
298            "copy_packed_to_padded_dma: src and dst image metadata must match \
299             (src: {src_width}x{src_height} {src_fmt:?}, dst: {width}x{height} {fmt:?})"
300        )));
301    }
302    let bpp = primary_plane_bpp(fmt, 1).ok_or_else(|| {
303        Error::NotSupported(format!(
304            "copy_packed_to_padded_dma: unknown bpp for {fmt:?}"
305        ))
306    })?;
307    let natural = width.checked_mul(bpp).ok_or_else(|| {
308        Error::Internal(format!(
309            "copy_packed_to_padded_dma: width {width} × bpp {bpp} overflows"
310        ))
311    })?;
312    let dst_stride = dst.effective_row_stride().ok_or_else(|| {
313        Error::Internal("copy_packed_to_padded_dma: dst has no effective row stride".into())
314    })?;
315
316    // `TensorMap` derefs to `[T]`, which gives us the slice without
317    // needing to import the `TensorMapTrait` at this call site.
318    let src_map = src.map()?;
319    let src_bytes: &[u8] = &src_map;
320    let mut dst_map = dst.map()?;
321    let dst_bytes: &mut [u8] = &mut dst_map;
322
323    if src_bytes.len() < natural.saturating_mul(height) {
324        return Err(Error::Internal(format!(
325            "copy_packed_to_padded_dma: src has {} bytes, need {} ({}x{} @ {} bpp)",
326            src_bytes.len(),
327            natural.saturating_mul(height),
328            width,
329            height,
330            bpp,
331        )));
332    }
333    if dst_bytes.len() < dst_stride.saturating_mul(height) {
334        return Err(Error::Internal(format!(
335            "copy_packed_to_padded_dma: dst has {} bytes, need {} ({} stride × {} rows)",
336            dst_bytes.len(),
337            dst_stride.saturating_mul(height),
338            dst_stride,
339            height,
340        )));
341    }
342
343    for row in 0..height {
344        let s = row * natural;
345        let d = row * dst_stride;
346        dst_bytes[d..d + natural].copy_from_slice(&src_bytes[s..s + natural]);
347    }
348    Ok(())
349}
350
351use edgefirst_decoder::{DetectBox, ProtoData, Segmentation};
352use edgefirst_tensor::{
353    DType, PixelFormat, PixelLayout, Tensor, TensorDyn, TensorMemory, TensorTrait as _,
354};
355use enum_dispatch::enum_dispatch;
356use std::{fmt::Display, time::Instant};
357use zune_jpeg::{
358    zune_core::{colorspace::ColorSpace, options::DecoderOptions},
359    JpegDecoder,
360};
361use zune_png::PngDecoder;
362
363pub use cpu::CPUProcessor;
364pub use error::{Error, Result};
365#[cfg(target_os = "linux")]
366pub use g2d::G2DProcessor;
367#[cfg(target_os = "linux")]
368#[cfg(feature = "opengl")]
369pub use opengl_headless::GLProcessorThreaded;
370#[cfg(target_os = "linux")]
371#[cfg(feature = "opengl")]
372pub use opengl_headless::Int8InterpolationMode;
373#[cfg(target_os = "linux")]
374#[cfg(feature = "opengl")]
375pub use opengl_headless::{probe_egl_displays, EglDisplayInfo, EglDisplayKind};
376
377mod cpu;
378mod error;
379mod g2d;
380#[path = "gl/mod.rs"]
381mod opengl_headless;
382
383// Use `edgefirst_tensor::PixelFormat` variants (Rgb, Rgba, Grey, etc.) and
384// `TensorDyn` / `Tensor<u8>` with `.format()` metadata instead.
385
386/// Flips the image data, then rotates it. Returns a new `TensorDyn`.
387fn rotate_flip_to_dyn(
388    src: &Tensor<u8>,
389    src_fmt: PixelFormat,
390    rotation: Rotation,
391    flip: Flip,
392    memory: Option<TensorMemory>,
393) -> Result<TensorDyn, Error> {
394    let src_w = src.width().unwrap();
395    let src_h = src.height().unwrap();
396    let channels = src_fmt.channels();
397
398    let (dst_w, dst_h) = match rotation {
399        Rotation::None | Rotation::Rotate180 => (src_w, src_h),
400        Rotation::Clockwise90 | Rotation::CounterClockwise90 => (src_h, src_w),
401    };
402
403    // Rotate/flip into Mem staging then row-copy into padded DMA when the
404    // caller wants DMA and the destination width would produce an
405    // unaligned pitch (see [`padded_dma_pitch_for`]).
406    #[cfg(target_os = "linux")]
407    if let Some(aligned_pitch) = padded_dma_pitch_for(src_fmt, dst_w, &memory) {
408        let tmp = Tensor::<u8>::image(dst_w, dst_h, src_fmt, Some(TensorMemory::Mem))?;
409        let src_map = src.map()?;
410        let mut tmp_map = tmp.map()?;
411        CPUProcessor::flip_rotate_ndarray_pf(
412            &src_map,
413            &mut tmp_map,
414            dst_w,
415            dst_h,
416            channels,
417            rotation,
418            flip,
419        )?;
420        drop(tmp_map);
421        drop(src_map);
422        let mut dma = Tensor::<u8>::image_with_stride(
423            dst_w,
424            dst_h,
425            src_fmt,
426            aligned_pitch,
427            Some(TensorMemory::Dma),
428        )?;
429        copy_packed_to_padded_dma(&tmp, &mut dma)?;
430        return Ok(TensorDyn::from(dma));
431    }
432
433    let dst = Tensor::<u8>::image(dst_w, dst_h, src_fmt, memory)?;
434    let src_map = src.map()?;
435    let mut dst_map = dst.map()?;
436
437    CPUProcessor::flip_rotate_ndarray_pf(
438        &src_map,
439        &mut dst_map,
440        dst_w,
441        dst_h,
442        channels,
443        rotation,
444        flip,
445    )?;
446    drop(dst_map);
447    drop(src_map);
448
449    Ok(TensorDyn::from(dst))
450}
451
452#[derive(Debug, Clone, Copy, PartialEq, Eq)]
453pub enum Rotation {
454    None = 0,
455    Clockwise90 = 1,
456    Rotate180 = 2,
457    CounterClockwise90 = 3,
458}
459impl Rotation {
460    /// Creates a Rotation enum from an angle in degrees. The angle must be a
461    /// multiple of 90.
462    ///
463    /// # Panics
464    /// Panics if the angle is not a multiple of 90.
465    ///
466    /// # Examples
467    /// ```rust
468    /// # use edgefirst_image::Rotation;
469    /// let rotation = Rotation::from_degrees_clockwise(270);
470    /// assert_eq!(rotation, Rotation::CounterClockwise90);
471    /// ```
472    pub fn from_degrees_clockwise(angle: usize) -> Rotation {
473        match angle.rem_euclid(360) {
474            0 => Rotation::None,
475            90 => Rotation::Clockwise90,
476            180 => Rotation::Rotate180,
477            270 => Rotation::CounterClockwise90,
478            _ => panic!("rotation angle is not a multiple of 90"),
479        }
480    }
481}
482
483#[derive(Debug, Clone, Copy, PartialEq, Eq)]
484pub enum Flip {
485    None = 0,
486    Vertical = 1,
487    Horizontal = 2,
488}
489
490/// Controls how the color palette index is chosen for each detected object.
491#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
492pub enum ColorMode {
493    /// Color is chosen by object class label (`det.label`). Default.
494    ///
495    /// Preserves backward compatibility and is correct for semantic
496    /// segmentation where colors carry class meaning.
497    #[default]
498    Class,
499    /// Color is chosen by instance order (loop index, zero-based).
500    ///
501    /// Each detected object gets a unique color regardless of class,
502    /// useful for instance segmentation.
503    Instance,
504    /// Color is chosen by track ID (future use; currently behaves like
505    /// [`Instance`](Self::Instance)).
506    Track,
507}
508
509impl ColorMode {
510    /// Return the palette index for a detection given its loop index and label.
511    #[inline]
512    pub fn index(self, idx: usize, label: usize) -> usize {
513        match self {
514            ColorMode::Class => label,
515            ColorMode::Instance | ColorMode::Track => idx,
516        }
517    }
518}
519
520/// Controls the resolution and coordinate frame of masks produced by
521/// [`ImageProcessor::materialize_masks`].
522///
523/// - [`Proto`](Self::Proto) returns per-detection tiles at proto-plane
524///   resolution (e.g. 48×32 u8 for a typical COCO bbox on a 160×160 proto
525///   plane). This is the historical behavior of `materialize_masks` and the
526///   fastest path because no upsample runs inside HAL. Mask values are
527///   continuous sigmoid output quantized to `uint8 [0, 255]`.
528/// - [`Scaled`](Self::Scaled) returns per-detection tiles at caller-specified
529///   pixel resolution by upsampling the full proto plane once and cropping by
530///   bbox after sigmoid. The upsample uses bilinear interpolation with
531///   edge-clamp sampling — semantically equivalent to Ultralytics'
532///   `process_masks_retina` reference. When a `letterbox` is also passed to
533///   [`materialize_masks`], the inverse letterbox transform is applied during
534///   the upsample so mask pixels land in original-content coordinates
535///   (drop-in for overlay on the original image). Mask values are binary
536///   `uint8 {0, 255}` after thresholding sigmoid > 0.5 — interchangeable
537///   with `Proto` output via the same `> 127` test.
538///
539/// [`materialize_masks`]: ImageProcessor::materialize_masks
540#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
541pub enum MaskResolution {
542    /// Per-detection tile at proto-plane resolution (default).
543    #[default]
544    Proto,
545    /// Per-detection tile at `(width, height)` pixel resolution in the
546    /// coordinate frame determined by the `letterbox` parameter of
547    /// [`ImageProcessor::materialize_masks`].
548    Scaled {
549        /// Target pixel width of the output coordinate frame.
550        width: u32,
551        /// Target pixel height of the output coordinate frame.
552        height: u32,
553    },
554}
555
556/// Options for mask overlay rendering.
557///
558/// Controls how segmentation masks are composited onto the destination image:
559/// - `background`: when set, the background image is drawn first and masks
560///   are composited over it (result written to `dst`). When `None`, `dst` is
561///   cleared to `0x00000000` (fully transparent) before masks are drawn.
562///   **`dst` is always fully overwritten — its prior contents are never
563///   preserved.** Callers who used to pre-load an image into `dst` before
564///   calling `draw_decoded_masks` / `draw_proto_masks` must now supply that
565///   image via `background` instead (behaviour changed in v0.16.4).
566/// - `opacity`: scales the alpha of rendered mask colors. `1.0` (default)
567///   preserves the class color's alpha unchanged; `0.5` makes masks
568///   semi-transparent.
569/// - `color_mode`: controls whether colors are assigned by class label,
570///   instance index, or track ID. Defaults to [`ColorMode::Class`].
571#[derive(Debug, Clone, Copy)]
572pub struct MaskOverlay<'a> {
573    /// Compositing source image. Must have the same dimensions and pixel
574    /// format as `dst`. When `Some`, the output is `background + masks`.
575    /// When `None`, `dst` is cleared to `0x00000000` before masks are drawn.
576    pub background: Option<&'a TensorDyn>,
577    pub opacity: f32,
578    /// Normalized letterbox region `[xmin, ymin, xmax, ymax]` in model-input
579    /// space that contains actual image content (the rest is padding).
580    ///
581    /// When set, bounding boxes and mask coordinates from the decoder (which
582    /// are in model-input normalized space) are mapped back to the original
583    /// image coordinate space before rendering.
584    ///
585    /// Use [`with_letterbox_crop`](Self::with_letterbox_crop) to compute this
586    /// from the [`Crop`] that was used in the model input [`convert`](crate::ImageProcessorTrait::convert) call.
587    pub letterbox: Option<[f32; 4]>,
588    pub color_mode: ColorMode,
589}
590
591impl Default for MaskOverlay<'_> {
592    fn default() -> Self {
593        Self {
594            background: None,
595            opacity: 1.0,
596            letterbox: None,
597            color_mode: ColorMode::Class,
598        }
599    }
600}
601
602impl<'a> MaskOverlay<'a> {
603    pub fn new() -> Self {
604        Self::default()
605    }
606
607    /// Set the compositing source image.
608    ///
609    /// `bg` must have the same dimensions and pixel format as the `dst` passed
610    /// to [`draw_decoded_masks`](crate::ImageProcessorTrait::draw_decoded_masks) /
611    /// [`draw_proto_masks`](crate::ImageProcessorTrait::draw_proto_masks).
612    /// The output will be `bg + masks`. Without a background, `dst` is cleared
613    /// to `0x00000000`.
614    pub fn with_background(mut self, bg: &'a TensorDyn) -> Self {
615        self.background = Some(bg);
616        self
617    }
618
619    pub fn with_opacity(mut self, opacity: f32) -> Self {
620        self.opacity = opacity.clamp(0.0, 1.0);
621        self
622    }
623
624    pub fn with_color_mode(mut self, mode: ColorMode) -> Self {
625        self.color_mode = mode;
626        self
627    }
628
629    /// Set the letterbox transform from the [`Crop`] used when preparing the
630    /// model input, so that bounding boxes and masks are correctly mapped back
631    /// to the original image coordinate space during rendering.
632    ///
633    /// Pass the same `crop` that was given to
634    /// [`convert`](crate::ImageProcessorTrait::convert) along with the model
635    /// input dimensions (`model_w` × `model_h`).
636    ///
637    /// Has no effect when `crop.dst_rect` is `None` (no letterbox applied).
638    pub fn with_letterbox_crop(mut self, crop: &Crop, model_w: usize, model_h: usize) -> Self {
639        if let Some(r) = crop.dst_rect {
640            self.letterbox = Some([
641                r.left as f32 / model_w as f32,
642                r.top as f32 / model_h as f32,
643                (r.left + r.width) as f32 / model_w as f32,
644                (r.top + r.height) as f32 / model_h as f32,
645            ]);
646        }
647        self
648    }
649}
650
651/// Apply the inverse letterbox transform to a bounding box.
652///
653/// `letterbox` is `[lx0, ly0, lx1, ly1]` — the normalized region of the model
654/// input that contains actual image content (output of
655/// [`MaskOverlay::with_letterbox_crop`]).
656///
657/// Converts model-input-normalized coords to output-image-normalized coords,
658/// clamped to `[0.0, 1.0]`. Also canonicalises the bbox (ensures xmin ≤ xmax).
659#[inline]
660fn unletter_bbox(bbox: DetectBox, lb: [f32; 4]) -> DetectBox {
661    let b = bbox.bbox.to_canonical();
662    let [lx0, ly0, lx1, ly1] = lb;
663    let inv_w = if lx1 > lx0 { 1.0 / (lx1 - lx0) } else { 1.0 };
664    let inv_h = if ly1 > ly0 { 1.0 / (ly1 - ly0) } else { 1.0 };
665    DetectBox {
666        bbox: edgefirst_decoder::BoundingBox {
667            xmin: ((b.xmin - lx0) * inv_w).clamp(0.0, 1.0),
668            ymin: ((b.ymin - ly0) * inv_h).clamp(0.0, 1.0),
669            xmax: ((b.xmax - lx0) * inv_w).clamp(0.0, 1.0),
670            ymax: ((b.ymax - ly0) * inv_h).clamp(0.0, 1.0),
671        },
672        ..bbox
673    }
674}
675
676#[derive(Debug, Clone, Copy, PartialEq, Eq)]
677pub struct Crop {
678    pub src_rect: Option<Rect>,
679    pub dst_rect: Option<Rect>,
680    pub dst_color: Option<[u8; 4]>,
681}
682
683impl Default for Crop {
684    fn default() -> Self {
685        Crop::new()
686    }
687}
688impl Crop {
689    // Creates a new Crop with default values (no cropping).
690    pub fn new() -> Self {
691        Crop {
692            src_rect: None,
693            dst_rect: None,
694            dst_color: None,
695        }
696    }
697
698    // Sets the source rectangle for cropping.
699    pub fn with_src_rect(mut self, src_rect: Option<Rect>) -> Self {
700        self.src_rect = src_rect;
701        self
702    }
703
704    // Sets the destination rectangle for cropping.
705    pub fn with_dst_rect(mut self, dst_rect: Option<Rect>) -> Self {
706        self.dst_rect = dst_rect;
707        self
708    }
709
710    // Sets the destination color for areas outside the cropped region.
711    pub fn with_dst_color(mut self, dst_color: Option<[u8; 4]>) -> Self {
712        self.dst_color = dst_color;
713        self
714    }
715
716    // Creates a new Crop with no cropping.
717    pub fn no_crop() -> Self {
718        Crop::new()
719    }
720
721    /// Validate crop rectangles against explicit dimensions.
722    pub(crate) fn check_crop_dims(
723        &self,
724        src_w: usize,
725        src_h: usize,
726        dst_w: usize,
727        dst_h: usize,
728    ) -> Result<(), Error> {
729        let src_ok = self
730            .src_rect
731            .is_none_or(|r| r.left + r.width <= src_w && r.top + r.height <= src_h);
732        let dst_ok = self
733            .dst_rect
734            .is_none_or(|r| r.left + r.width <= dst_w && r.top + r.height <= dst_h);
735        match (src_ok, dst_ok) {
736            (true, true) => Ok(()),
737            (true, false) => Err(Error::CropInvalid(format!(
738                "Dest crop invalid: {:?}",
739                self.dst_rect
740            ))),
741            (false, true) => Err(Error::CropInvalid(format!(
742                "Src crop invalid: {:?}",
743                self.src_rect
744            ))),
745            (false, false) => Err(Error::CropInvalid(format!(
746                "Dest and Src crop invalid: {:?} {:?}",
747                self.dst_rect, self.src_rect
748            ))),
749        }
750    }
751
752    /// Validate crop rectangles against TensorDyn source and destination.
753    pub fn check_crop_dyn(
754        &self,
755        src: &edgefirst_tensor::TensorDyn,
756        dst: &edgefirst_tensor::TensorDyn,
757    ) -> Result<(), Error> {
758        self.check_crop_dims(
759            src.width().unwrap_or(0),
760            src.height().unwrap_or(0),
761            dst.width().unwrap_or(0),
762            dst.height().unwrap_or(0),
763        )
764    }
765}
766
767#[derive(Debug, Clone, Copy, PartialEq, Eq)]
768pub struct Rect {
769    pub left: usize,
770    pub top: usize,
771    pub width: usize,
772    pub height: usize,
773}
774
775impl Rect {
776    // Creates a new Rect with the specified left, top, width, and height.
777    pub fn new(left: usize, top: usize, width: usize, height: usize) -> Self {
778        Self {
779            left,
780            top,
781            width,
782            height,
783        }
784    }
785
786    // Checks if the rectangle is valid for the given TensorDyn image.
787    pub fn check_rect_dyn(&self, image: &TensorDyn) -> bool {
788        let w = image.width().unwrap_or(0);
789        let h = image.height().unwrap_or(0);
790        self.left + self.width <= w && self.top + self.height <= h
791    }
792}
793
794#[enum_dispatch(ImageProcessor)]
795pub trait ImageProcessorTrait {
796    /// Converts the source image to the destination image format and size. The
797    /// image is cropped first, then flipped, then rotated
798    ///
799    /// # Arguments
800    ///
801    /// * `dst` - The destination image to be converted to.
802    /// * `src` - The source image to convert from.
803    /// * `rotation` - The rotation to apply to the destination image.
804    /// * `flip` - Flips the image
805    /// * `crop` - An optional rectangle specifying the area to crop from the
806    ///   source image
807    ///
808    /// # Returns
809    ///
810    /// A `Result` indicating success or failure of the conversion.
811    fn convert(
812        &mut self,
813        src: &TensorDyn,
814        dst: &mut TensorDyn,
815        rotation: Rotation,
816        flip: Flip,
817        crop: Crop,
818    ) -> Result<()>;
819
820    /// Draw pre-decoded detection boxes and segmentation masks onto `dst`.
821    ///
822    /// Supports two segmentation modes based on the mask channel count:
823    /// - **Instance segmentation** (`C=1`): one `Segmentation` per detection,
824    ///   `segmentation` and `detect` are zipped.
825    /// - **Semantic segmentation** (`C>1`): a single `Segmentation` covering
826    ///   all classes; only the first element is used.
827    ///
828    /// # Format requirements
829    ///
830    /// - CPU backend: `dst` must be `RGBA` or `RGB`.
831    /// - OpenGL backend: `dst` must be `RGBA`, `BGRA`, or `RGB`.
832    /// - G2D backend: only produces the base frame (empty detections);
833    ///   returns `NotImplemented` when any detection or segmentation is
834    ///   supplied.
835    ///
836    /// # Output contract
837    ///
838    /// This function always fully writes `dst` — it never relies on the
839    /// caller having pre-cleared the destination. The four cases are:
840    ///
841    /// | detections | background | output                              |
842    /// |------------|------------|-------------------------------------|
843    /// | none       | none       | dst cleared to `0x00000000`         |
844    /// | none       | set        | dst ← background                    |
845    /// | set        | none       | masks drawn over cleared dst        |
846    /// | set        | set        | masks drawn over background         |
847    ///
848    /// Each backend implements this with its native primitives: G2D uses
849    /// `g2d_clear` / `g2d_blit`, OpenGL uses `glClear` / DMA-BUF GPU blit
850    /// plus the mask program, and CPU uses direct buffer fill / memcpy as
851    /// the terminal fallback. CPU-memcpy of DMA buffers is avoided on the
852    /// accelerated paths.
853    ///
854    /// An empty `segmentation` slice is valid — only bounding boxes are drawn.
855    ///
856    /// `overlay` controls compositing: `background` is the compositing source
857    /// (must match `dst` in size and format); `opacity` scales mask alpha.
858    ///
859    /// # Buffer aliasing
860    ///
861    /// `dst` and `overlay.background` must reference **distinct underlying
862    /// buffers**. An aliased pair returns [`Error::AliasedBuffers`] without
863    /// dispatching to any backend — the GL path would otherwise read and
864    /// write the same texture in a single draw, which is undefined behaviour
865    /// on most drivers. Aliasing is detected via
866    /// [`TensorDyn::aliases`](edgefirst_tensor::TensorDyn::aliases), which
867    /// catches both shared-allocation clones and separate imports over the
868    /// same dmabuf fd.
869    ///
870    /// # Migration from v0.16.3 and earlier
871    ///
872    /// Prior to v0.16.4 the call silently preserved `dst`'s contents on empty
873    /// detections. That invariant no longer holds — `dst` is always fully
874    /// written. Callers who pre-loaded an image into `dst` before calling this
875    /// function must now pass that image via `overlay.background` instead.
876    fn draw_decoded_masks(
877        &mut self,
878        dst: &mut TensorDyn,
879        detect: &[DetectBox],
880        segmentation: &[Segmentation],
881        overlay: MaskOverlay<'_>,
882    ) -> Result<()>;
883
884    /// Draw masks from proto data onto image (fused decode+draw).
885    ///
886    /// For YOLO segmentation models, this avoids materializing intermediate
887    /// `Array3<u8>` masks. The `ProtoData` contains mask coefficients and the
888    /// prototype tensor; the renderer computes `mask_coeff @ protos` directly
889    /// at the output resolution using bilinear sampling.
890    ///
891    /// `detect` and `proto_data.mask_coefficients` must have the same length
892    /// (enforced by zip — excess entries are silently ignored). An empty
893    /// `detect` slice is valid and produces the base frame — cleared or
894    /// background-blitted — via the selected backend's native primitive.
895    ///
896    /// # Format requirements and output contract
897    ///
898    /// Same as [`draw_decoded_masks`](Self::draw_decoded_masks), including
899    /// the "always fully writes dst" guarantee across all four
900    /// detection/background combinations.
901    ///
902    /// `overlay` controls compositing — see [`draw_decoded_masks`](Self::draw_decoded_masks).
903    fn draw_proto_masks(
904        &mut self,
905        dst: &mut TensorDyn,
906        detect: &[DetectBox],
907        proto_data: &ProtoData,
908        overlay: MaskOverlay<'_>,
909    ) -> Result<()>;
910
911    /// Sets the colors used for rendering segmentation masks. Up to 20 colors
912    /// can be set.
913    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()>;
914}
915
916/// Configuration for [`ImageProcessor`] construction.
917///
918/// Use with [`ImageProcessor::with_config`] to override the default EGL
919/// display auto-detection and backend selection. The default configuration
920/// preserves the existing auto-detection behaviour.
921#[derive(Debug, Clone, Default)]
922pub struct ImageProcessorConfig {
923    /// Force OpenGL to use this EGL display type instead of auto-detecting.
924    ///
925    /// When `None`, the processor probes displays in priority order: GBM,
926    /// PlatformDevice, Default. Use [`probe_egl_displays`] to discover
927    /// which displays are available on the current system.
928    ///
929    /// Ignored when `EDGEFIRST_DISABLE_GL=1` is set.
930    #[cfg(target_os = "linux")]
931    #[cfg(feature = "opengl")]
932    pub egl_display: Option<EglDisplayKind>,
933
934    /// Preferred compute backend.
935    ///
936    /// When set to a specific backend (not [`ComputeBackend::Auto`]), the
937    /// processor initializes that backend with no fallback — returns an error if the conversion is not supported.
938    /// This takes precedence over `EDGEFIRST_FORCE_BACKEND` and the
939    /// `EDGEFIRST_DISABLE_*` environment variables.
940    ///
941    /// - [`ComputeBackend::OpenGl`]: init OpenGL + CPU, skip G2D
942    /// - [`ComputeBackend::G2d`]: init G2D + CPU, skip OpenGL
943    /// - [`ComputeBackend::Cpu`]: init CPU only
944    /// - [`ComputeBackend::Auto`]: existing env-var-driven selection
945    pub backend: ComputeBackend,
946}
947
948/// Compute backend selection for [`ImageProcessor`].
949///
950/// Use with [`ImageProcessorConfig::backend`] to select which backend the
951/// processor should prefer. When a specific backend is selected, the
952/// processor initializes that backend plus CPU as a fallback. When `Auto`
953/// is used, the existing environment-variable-driven selection applies.
954#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
955pub enum ComputeBackend {
956    /// Auto-detect based on available hardware and environment variables.
957    #[default]
958    Auto,
959    /// CPU-only processing (no hardware acceleration).
960    Cpu,
961    /// Prefer G2D hardware blitter (+ CPU fallback).
962    G2d,
963    /// Prefer OpenGL ES (+ CPU fallback).
964    OpenGl,
965}
966
967/// Backend forced via the `EDGEFIRST_FORCE_BACKEND` environment variable
968/// or [`ImageProcessorConfig::backend`].
969///
970/// When set, the [`ImageProcessor`] only initializes and dispatches to the
971/// selected backend — no fallback chain is used.
972#[derive(Debug, Clone, Copy, PartialEq, Eq)]
973pub(crate) enum ForcedBackend {
974    Cpu,
975    G2d,
976    OpenGl,
977}
978
979/// Image converter that uses available hardware acceleration or CPU as a
980/// fallback.
981#[derive(Debug)]
982pub struct ImageProcessor {
983    /// CPU-based image converter as a fallback. This is only None if the
984    /// EDGEFIRST_DISABLE_CPU environment variable is set.
985    pub cpu: Option<CPUProcessor>,
986
987    #[cfg(target_os = "linux")]
988    /// G2D-based image converter for Linux systems. This is only available if
989    /// the EDGEFIRST_DISABLE_G2D environment variable is not set and libg2d.so
990    /// is available.
991    pub g2d: Option<G2DProcessor>,
992    #[cfg(target_os = "linux")]
993    #[cfg(feature = "opengl")]
994    /// OpenGL-based image converter for Linux systems. This is only available
995    /// if the EDGEFIRST_DISABLE_GL environment variable is not set and OpenGL
996    /// ES is available.
997    pub opengl: Option<GLProcessorThreaded>,
998
999    /// When set, only the specified backend is used — no fallback chain.
1000    pub(crate) forced_backend: Option<ForcedBackend>,
1001}
1002
1003unsafe impl Send for ImageProcessor {}
1004unsafe impl Sync for ImageProcessor {}
1005
1006impl ImageProcessor {
1007    /// Creates a new `ImageProcessor` instance, initializing available
1008    /// hardware converters based on the system capabilities and environment
1009    /// variables.
1010    ///
1011    /// # Examples
1012    /// ```rust
1013    /// # use edgefirst_image::{ImageProcessor, Rotation, Flip, Crop, ImageProcessorTrait, load_image};
1014    /// # use edgefirst_tensor::{PixelFormat, DType, TensorDyn};
1015    /// # fn main() -> Result<(), edgefirst_image::Error> {
1016    /// let image = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/../../testdata/zidane.jpg"));
1017    /// let src = load_image(image, Some(PixelFormat::Rgba), None)?;
1018    /// let mut converter = ImageProcessor::new()?;
1019    /// let mut dst = converter.create_image(640, 480, PixelFormat::Rgb, DType::U8, None)?;
1020    /// converter.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())?;
1021    /// # Ok(())
1022    /// # }
1023    /// ```
1024    pub fn new() -> Result<Self> {
1025        Self::with_config(ImageProcessorConfig::default())
1026    }
1027
1028    /// Creates a new `ImageProcessor` with the given configuration.
1029    ///
1030    /// When [`ImageProcessorConfig::backend`] is set to a specific backend,
1031    /// environment variables are ignored and the processor initializes the
1032    /// requested backend plus CPU as a fallback.
1033    ///
1034    /// When `Auto`, the existing `EDGEFIRST_FORCE_BACKEND` and
1035    /// `EDGEFIRST_DISABLE_*` environment variables apply.
1036    #[allow(unused_variables)]
1037    pub fn with_config(config: ImageProcessorConfig) -> Result<Self> {
1038        // ── Config-driven backend selection ──────────────────────────
1039        // When the caller explicitly requests a backend via the config,
1040        // skip all environment variable logic.
1041        match config.backend {
1042            ComputeBackend::Cpu => {
1043                log::info!("ComputeBackend::Cpu — CPU only");
1044                return Ok(Self {
1045                    cpu: Some(CPUProcessor::new()),
1046                    #[cfg(target_os = "linux")]
1047                    g2d: None,
1048                    #[cfg(target_os = "linux")]
1049                    #[cfg(feature = "opengl")]
1050                    opengl: None,
1051                    forced_backend: None,
1052                });
1053            }
1054            ComputeBackend::G2d => {
1055                log::info!("ComputeBackend::G2d — G2D + CPU fallback");
1056                #[cfg(target_os = "linux")]
1057                {
1058                    let g2d = match G2DProcessor::new() {
1059                        Ok(g) => Some(g),
1060                        Err(e) => {
1061                            log::warn!("G2D requested but failed to initialize: {e:?}");
1062                            None
1063                        }
1064                    };
1065                    return Ok(Self {
1066                        cpu: Some(CPUProcessor::new()),
1067                        g2d,
1068                        #[cfg(feature = "opengl")]
1069                        opengl: None,
1070                        forced_backend: None,
1071                    });
1072                }
1073                #[cfg(not(target_os = "linux"))]
1074                {
1075                    log::warn!("G2D requested but not available on this platform, using CPU");
1076                    return Ok(Self {
1077                        cpu: Some(CPUProcessor::new()),
1078                        forced_backend: None,
1079                    });
1080                }
1081            }
1082            ComputeBackend::OpenGl => {
1083                log::info!("ComputeBackend::OpenGl — OpenGL + CPU fallback");
1084                #[cfg(target_os = "linux")]
1085                {
1086                    #[cfg(feature = "opengl")]
1087                    let opengl = match GLProcessorThreaded::new(config.egl_display) {
1088                        Ok(gl) => Some(gl),
1089                        Err(e) => {
1090                            log::warn!("OpenGL requested but failed to initialize: {e:?}");
1091                            None
1092                        }
1093                    };
1094                    return Ok(Self {
1095                        cpu: Some(CPUProcessor::new()),
1096                        g2d: None,
1097                        #[cfg(feature = "opengl")]
1098                        opengl,
1099                        forced_backend: None,
1100                    });
1101                }
1102                #[cfg(not(target_os = "linux"))]
1103                {
1104                    log::warn!("OpenGL requested but not available on this platform, using CPU");
1105                    return Ok(Self {
1106                        cpu: Some(CPUProcessor::new()),
1107                        forced_backend: None,
1108                    });
1109                }
1110            }
1111            ComputeBackend::Auto => { /* fall through to env-var logic below */ }
1112        }
1113
1114        // ── EDGEFIRST_FORCE_BACKEND ──────────────────────────────────
1115        // When set, only the requested backend is initialised and no
1116        // fallback chain is used. Accepted values (case-insensitive):
1117        //   "cpu", "g2d", "opengl"
1118        if let Ok(val) = std::env::var("EDGEFIRST_FORCE_BACKEND") {
1119            let val_lower = val.to_lowercase();
1120            let forced = match val_lower.as_str() {
1121                "cpu" => ForcedBackend::Cpu,
1122                "g2d" => ForcedBackend::G2d,
1123                "opengl" => ForcedBackend::OpenGl,
1124                other => {
1125                    return Err(Error::ForcedBackendUnavailable(format!(
1126                        "unknown EDGEFIRST_FORCE_BACKEND value: {other:?} (expected cpu, g2d, or opengl)"
1127                    )));
1128                }
1129            };
1130
1131            log::info!("EDGEFIRST_FORCE_BACKEND={val} — only initializing {val_lower} backend");
1132
1133            return match forced {
1134                ForcedBackend::Cpu => Ok(Self {
1135                    cpu: Some(CPUProcessor::new()),
1136                    #[cfg(target_os = "linux")]
1137                    g2d: None,
1138                    #[cfg(target_os = "linux")]
1139                    #[cfg(feature = "opengl")]
1140                    opengl: None,
1141                    forced_backend: Some(ForcedBackend::Cpu),
1142                }),
1143                ForcedBackend::G2d => {
1144                    #[cfg(target_os = "linux")]
1145                    {
1146                        let g2d = G2DProcessor::new().map_err(|e| {
1147                            Error::ForcedBackendUnavailable(format!(
1148                                "g2d forced but failed to initialize: {e:?}"
1149                            ))
1150                        })?;
1151                        Ok(Self {
1152                            cpu: None,
1153                            g2d: Some(g2d),
1154                            #[cfg(feature = "opengl")]
1155                            opengl: None,
1156                            forced_backend: Some(ForcedBackend::G2d),
1157                        })
1158                    }
1159                    #[cfg(not(target_os = "linux"))]
1160                    {
1161                        Err(Error::ForcedBackendUnavailable(
1162                            "g2d backend is only available on Linux".into(),
1163                        ))
1164                    }
1165                }
1166                ForcedBackend::OpenGl => {
1167                    #[cfg(target_os = "linux")]
1168                    #[cfg(feature = "opengl")]
1169                    {
1170                        let opengl = GLProcessorThreaded::new(config.egl_display).map_err(|e| {
1171                            Error::ForcedBackendUnavailable(format!(
1172                                "opengl forced but failed to initialize: {e:?}"
1173                            ))
1174                        })?;
1175                        Ok(Self {
1176                            cpu: None,
1177                            g2d: None,
1178                            opengl: Some(opengl),
1179                            forced_backend: Some(ForcedBackend::OpenGl),
1180                        })
1181                    }
1182                    #[cfg(not(all(target_os = "linux", feature = "opengl")))]
1183                    {
1184                        Err(Error::ForcedBackendUnavailable(
1185                            "opengl backend requires Linux with the 'opengl' feature enabled"
1186                                .into(),
1187                        ))
1188                    }
1189                }
1190            };
1191        }
1192
1193        // ── Existing DISABLE logic (unchanged) ──────────────────────
1194        #[cfg(target_os = "linux")]
1195        let g2d = if std::env::var("EDGEFIRST_DISABLE_G2D")
1196            .map(|x| x != "0" && x.to_lowercase() != "false")
1197            .unwrap_or(false)
1198        {
1199            log::debug!("EDGEFIRST_DISABLE_G2D is set");
1200            None
1201        } else {
1202            match G2DProcessor::new() {
1203                Ok(g2d_converter) => Some(g2d_converter),
1204                Err(err) => {
1205                    log::warn!("Failed to initialize G2D converter: {err:?}");
1206                    None
1207                }
1208            }
1209        };
1210
1211        #[cfg(target_os = "linux")]
1212        #[cfg(feature = "opengl")]
1213        let opengl = if std::env::var("EDGEFIRST_DISABLE_GL")
1214            .map(|x| x != "0" && x.to_lowercase() != "false")
1215            .unwrap_or(false)
1216        {
1217            log::debug!("EDGEFIRST_DISABLE_GL is set");
1218            None
1219        } else {
1220            match GLProcessorThreaded::new(config.egl_display) {
1221                Ok(gl_converter) => Some(gl_converter),
1222                Err(err) => {
1223                    log::warn!("Failed to initialize GL converter: {err:?}");
1224                    None
1225                }
1226            }
1227        };
1228
1229        let cpu = if std::env::var("EDGEFIRST_DISABLE_CPU")
1230            .map(|x| x != "0" && x.to_lowercase() != "false")
1231            .unwrap_or(false)
1232        {
1233            log::debug!("EDGEFIRST_DISABLE_CPU is set");
1234            None
1235        } else {
1236            Some(CPUProcessor::new())
1237        };
1238        Ok(Self {
1239            cpu,
1240            #[cfg(target_os = "linux")]
1241            g2d,
1242            #[cfg(target_os = "linux")]
1243            #[cfg(feature = "opengl")]
1244            opengl,
1245            forced_backend: None,
1246        })
1247    }
1248
1249    /// Sets the interpolation mode for int8 proto textures on the OpenGL
1250    /// backend. No-op if OpenGL is not available.
1251    #[cfg(target_os = "linux")]
1252    #[cfg(feature = "opengl")]
1253    pub fn set_int8_interpolation_mode(&mut self, mode: Int8InterpolationMode) -> Result<()> {
1254        if let Some(ref mut gl) = self.opengl {
1255            gl.set_int8_interpolation_mode(mode)?;
1256        }
1257        Ok(())
1258    }
1259
1260    /// Create a [`TensorDyn`] image with the best available memory backend.
1261    ///
1262    /// Priority: DMA-buf → PBO (byte-sized types: u8, i8) → system memory.
1263    ///
1264    /// Use this method instead of [`TensorDyn::image()`] when the tensor will
1265    /// be used with [`ImageProcessor::convert()`]. It selects the optimal
1266    /// memory backing (including PBO for GPU zero-copy) which direct
1267    /// allocation cannot achieve.
1268    ///
1269    /// This method is on [`ImageProcessor`] rather than [`ImageProcessorTrait`]
1270    /// because optimal allocation requires knowledge of the active compute
1271    /// backends (e.g. the GL context handle for PBO allocation). Individual
1272    /// backend implementations ([`CPUProcessor`], etc.) do not have this
1273    /// cross-backend visibility.
1274    ///
1275    /// # Arguments
1276    ///
1277    /// * `width` - Image width in pixels
1278    /// * `height` - Image height in pixels
1279    /// * `format` - Pixel format
1280    /// * `dtype` - Element data type (e.g. `DType::U8`, `DType::I8`)
1281    /// * `memory` - Optional memory type override; when `None`, the best
1282    ///   available backend is selected automatically.
1283    ///
1284    /// # Returns
1285    ///
1286    /// A [`TensorDyn`] backed by the highest-performance memory type
1287    /// available on this system.
1288    ///
1289    /// # Pitch alignment for DMA-backed allocations
1290    ///
1291    /// DMA-BUF imports into the GL backend (Mali Valhall on i.MX 95
1292    /// specifically) require every row pitch to be a multiple of
1293    /// [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`] (currently 64). When this
1294    /// method lands on `TensorMemory::Dma`, the underlying allocation is
1295    /// silently padded so the row stride satisfies that requirement.
1296    ///
1297    /// **The user-requested `width` is preserved** — `tensor.width()`
1298    /// returns the same value you passed in. The padding is carried by
1299    /// [`TensorDyn::row_stride`] / `effective_row_stride()`, which the
1300    /// GL backend reads when importing the buffer as an EGLImage.
1301    /// Callers that compute byte offsets from the tensor must use the
1302    /// stride, not `width × bytes_per_pixel`; the CPU mapping spans the
1303    /// full `stride × height` bytes.
1304    ///
1305    /// Pre-aligned widths (640, 1280, 1920, 3008, 3840 …) allocate
1306    /// exactly `width × bpp × height` bytes with no padding. PBO and
1307    /// Mem fallbacks never pad — they don't go through EGLImage import.
1308    ///
1309    /// See also [`align_width_for_gpu_pitch`] for an advisory helper
1310    /// that external callers (GStreamer plugins, video pipelines) can
1311    /// use to size their own DMA-BUFs for GL compatibility.
1312    ///
1313    /// # Errors
1314    ///
1315    /// Returns an error if all allocation strategies fail.
1316    pub fn create_image(
1317        &self,
1318        width: usize,
1319        height: usize,
1320        format: PixelFormat,
1321        dtype: DType,
1322        memory: Option<TensorMemory>,
1323    ) -> Result<TensorDyn> {
1324        // Compute the GPU-aligned row stride in bytes for this image.
1325        // `None` means either the format has no defined primary-plane bpp
1326        // (unknown future layout) or the stride calculation would overflow
1327        // — in both cases we fall back to the natural layout via the plain
1328        // `TensorDyn::image` constructor, and the slow-path warning inside
1329        // `draw_*_masks` will fire if the subsequent GL import fails.
1330        //
1331        // DMA allocation is Linux-only (see `TensorMemory::Dma` cfg gate),
1332        // so both the stride computation and the helper closure are gated
1333        // accordingly — the callers below are already Linux-only.
1334        #[cfg(target_os = "linux")]
1335        let dma_stride_bytes: Option<usize> = primary_plane_bpp(format, dtype.size())
1336            .and_then(|bpp| width.checked_mul(bpp))
1337            .and_then(align_pitch_bytes_to_gpu_alignment);
1338
1339        // Helper: allocate a DMA image, using the padded-stride constructor
1340        // when the computed stride exceeds the natural pitch, otherwise the
1341        // plain constructor (byte-identical result in the common case).
1342        #[cfg(target_os = "linux")]
1343        let try_dma = || -> Result<TensorDyn> {
1344            // Stride padding is only meaningful for packed pixel layouts
1345            // (RGBA8, BGRA8, RGB888, Grey) — the formats the GL backend
1346            // renders into. Semi-planar (NV12, NV16) and planar (PlanarRgb,
1347            // PlanarRgba) tensors go through `TensorDyn::image(...)` with
1348            // their natural layout; they're imported from camera capture
1349            // via `from_fd` far more often than allocated here, and
1350            // `Tensor::image_with_stride` explicitly rejects them.
1351            let packed = format.layout() == edgefirst_tensor::PixelLayout::Packed;
1352            match dma_stride_bytes {
1353                Some(stride)
1354                    if packed
1355                        && primary_plane_bpp(format, dtype.size())
1356                            .and_then(|bpp| width.checked_mul(bpp))
1357                            .is_some_and(|natural| stride > natural) =>
1358                {
1359                    log::debug!(
1360                        "create_image: padding row stride for {format:?} {width}x{height} \
1361                         from natural pitch to {stride} bytes for GPU alignment"
1362                    );
1363                    Ok(TensorDyn::image_with_stride(
1364                        width,
1365                        height,
1366                        format,
1367                        dtype,
1368                        stride,
1369                        Some(edgefirst_tensor::TensorMemory::Dma),
1370                    )?)
1371                }
1372                _ => Ok(TensorDyn::image(
1373                    width,
1374                    height,
1375                    format,
1376                    dtype,
1377                    Some(edgefirst_tensor::TensorMemory::Dma),
1378                )?),
1379            }
1380        };
1381
1382        // If an explicit memory type is requested, honour it directly.
1383        // On Linux, `TensorMemory::Dma` gets the padded-stride treatment;
1384        // other memory types take the user-requested width verbatim.
1385        match memory {
1386            #[cfg(target_os = "linux")]
1387            Some(TensorMemory::Dma) => {
1388                return try_dma();
1389            }
1390            Some(mem) => {
1391                return Ok(TensorDyn::image(width, height, format, dtype, Some(mem))?);
1392            }
1393            None => {}
1394        }
1395
1396        // Try DMA first on Linux — skip only when GL has explicitly selected PBO
1397        // as the preferred transfer path (PBO is better than DMA in that case).
1398        #[cfg(target_os = "linux")]
1399        {
1400            #[cfg(feature = "opengl")]
1401            let gl_uses_pbo = self
1402                .opengl
1403                .as_ref()
1404                .is_some_and(|gl| gl.transfer_backend() == opengl_headless::TransferBackend::Pbo);
1405            #[cfg(not(feature = "opengl"))]
1406            let gl_uses_pbo = false;
1407
1408            if !gl_uses_pbo {
1409                if let Ok(img) = try_dma() {
1410                    return Ok(img);
1411                }
1412            }
1413        }
1414
1415        // Try PBO (if GL available).
1416        // PBO buffers are u8-sized; the int8 shader emulates i8 output via
1417        // XOR 0x80 on the same underlying buffer, so both U8 and I8 work.
1418        #[cfg(target_os = "linux")]
1419        #[cfg(feature = "opengl")]
1420        if dtype.size() == 1 {
1421            if let Some(gl) = &self.opengl {
1422                match gl.create_pbo_image(width, height, format) {
1423                    Ok(t) => {
1424                        if dtype == DType::I8 {
1425                            // SAFETY: Tensor<u8> and Tensor<i8> are layout-
1426                            // identical (same element size, no T-dependent
1427                            // drop glue). The int8 shader applies XOR 0x80
1428                            // on the same PBO buffer. Same rationale as
1429                            // gl::processor::tensor_i8_as_u8_mut.
1430                            // Invariant: PBO tensors never have chroma
1431                            // (create_pbo_image → Tensor::wrap sets it None).
1432                            debug_assert!(
1433                                t.chroma().is_none(),
1434                                "PBO i8 transmute requires chroma == None"
1435                            );
1436                            let t_i8: Tensor<i8> = unsafe { std::mem::transmute(t) };
1437                            return Ok(TensorDyn::from(t_i8));
1438                        }
1439                        return Ok(TensorDyn::from(t));
1440                    }
1441                    Err(e) => log::debug!("PBO image creation failed, falling back to Mem: {e:?}"),
1442                }
1443            }
1444        }
1445
1446        // Fallback to Mem
1447        Ok(TensorDyn::image(
1448            width,
1449            height,
1450            format,
1451            dtype,
1452            Some(edgefirst_tensor::TensorMemory::Mem),
1453        )?)
1454    }
1455
1456    /// Import an external DMA-BUF image.
1457    ///
1458    /// Each [`PlaneDescriptor`] owns an already-duped fd; this method
1459    /// consumes the descriptors and takes ownership of those fds (whether
1460    /// the call succeeds or fails).
1461    ///
1462    /// The caller must ensure the DMA-BUF allocation is large enough for the
1463    /// specified width, height, format, and any stride/offset on the plane
1464    /// descriptors. No buffer-size validation is performed; an undersized
1465    /// buffer may cause GPU faults or EGL import failure.
1466    ///
1467    /// # Arguments
1468    ///
1469    /// * `image` - Plane descriptor for the primary (or only) plane
1470    /// * `chroma` - Optional plane descriptor for the UV chroma plane
1471    ///   (required for multiplane NV12)
1472    /// * `width` - Image width in pixels
1473    /// * `height` - Image height in pixels
1474    /// * `format` - Pixel format of the buffer
1475    /// * `dtype` - Element data type (e.g. `DType::U8`)
1476    ///
1477    /// # Returns
1478    ///
1479    /// A `TensorDyn` configured as an image.
1480    ///
1481    /// # Errors
1482    ///
1483    /// * [`Error::NotSupported`] if `chroma` is `Some` for a non-semi-planar
1484    ///   format, or multiplane NV16 (not yet supported), or the fd is not
1485    ///   DMA-backed
1486    /// * [`Error::InvalidShape`] if NV12 height is odd
1487    ///
1488    /// # Platform
1489    ///
1490    /// Linux only.
1491    ///
1492    /// # Examples
1493    ///
1494    /// ```rust,ignore
1495    /// use edgefirst_tensor::PlaneDescriptor;
1496    ///
1497    /// // Single-plane RGBA
1498    /// let pd = PlaneDescriptor::new(fd.as_fd())?;
1499    /// let src = proc.import_image(pd, None, 1920, 1080, PixelFormat::Rgba, DType::U8)?;
1500    ///
1501    /// // Multi-plane NV12 with stride
1502    /// let y_pd = PlaneDescriptor::new(y_fd.as_fd())?.with_stride(2048);
1503    /// let uv_pd = PlaneDescriptor::new(uv_fd.as_fd())?.with_stride(2048);
1504    /// let src = proc.import_image(y_pd, Some(uv_pd), 1920, 1080,
1505    ///                             PixelFormat::Nv12, DType::U8)?;
1506    /// ```
1507    #[cfg(target_os = "linux")]
1508    pub fn import_image(
1509        &self,
1510        image: edgefirst_tensor::PlaneDescriptor,
1511        chroma: Option<edgefirst_tensor::PlaneDescriptor>,
1512        width: usize,
1513        height: usize,
1514        format: PixelFormat,
1515        dtype: DType,
1516    ) -> Result<TensorDyn> {
1517        use edgefirst_tensor::{Tensor, TensorMemory};
1518
1519        // Capture stride/offset from descriptors before consuming them
1520        let image_stride = image.stride();
1521        let image_offset = image.offset();
1522        let chroma_stride = chroma.as_ref().and_then(|c| c.stride());
1523        let chroma_offset = chroma.as_ref().and_then(|c| c.offset());
1524
1525        if let Some(chroma_pd) = chroma {
1526            // ── Multiplane path ──────────────────────────────────────
1527            // Multiplane tensors are backed by Tensor<u8> (or transmuted to
1528            // Tensor<i8>). Reject other dtypes to avoid silently returning a
1529            // tensor with the wrong element type.
1530            if dtype != DType::U8 && dtype != DType::I8 {
1531                return Err(Error::NotSupported(format!(
1532                    "multiplane import only supports U8/I8, got {dtype:?}"
1533                )));
1534            }
1535            if format.layout() != PixelLayout::SemiPlanar {
1536                return Err(Error::NotSupported(format!(
1537                    "import_image with chroma requires a semi-planar format, got {format:?}"
1538                )));
1539            }
1540
1541            let chroma_h = match format {
1542                PixelFormat::Nv12 => {
1543                    if !height.is_multiple_of(2) {
1544                        return Err(Error::InvalidShape(format!(
1545                            "NV12 requires even height, got {height}"
1546                        )));
1547                    }
1548                    height / 2
1549                }
1550                // NV16 multiplane will be supported in a future release;
1551                // the GL backend currently only handles NV12 plane1 attributes.
1552                PixelFormat::Nv16 => {
1553                    return Err(Error::NotSupported(
1554                        "multiplane NV16 is not yet supported; use contiguous NV16 instead".into(),
1555                    ))
1556                }
1557                _ => {
1558                    return Err(Error::NotSupported(format!(
1559                        "unsupported semi-planar format: {format:?}"
1560                    )))
1561                }
1562            };
1563
1564            let luma = Tensor::<u8>::from_fd(image.into_fd(), &[height, width], Some("luma"))?;
1565            if luma.memory() != TensorMemory::Dma {
1566                return Err(Error::NotSupported(format!(
1567                    "luma fd must be DMA-backed, got {:?}",
1568                    luma.memory()
1569                )));
1570            }
1571
1572            let chroma_tensor =
1573                Tensor::<u8>::from_fd(chroma_pd.into_fd(), &[chroma_h, width], Some("chroma"))?;
1574            if chroma_tensor.memory() != TensorMemory::Dma {
1575                return Err(Error::NotSupported(format!(
1576                    "chroma fd must be DMA-backed, got {:?}",
1577                    chroma_tensor.memory()
1578                )));
1579            }
1580
1581            // from_planes creates the combined tensor with format set,
1582            // preserving luma's row_stride (currently None since luma was raw).
1583            let mut tensor = Tensor::<u8>::from_planes(luma, chroma_tensor, format)?;
1584
1585            // Apply stride/offset to the combined tensor (luma plane)
1586            if let Some(s) = image_stride {
1587                tensor.set_row_stride(s)?;
1588            }
1589            if let Some(o) = image_offset {
1590                tensor.set_plane_offset(o);
1591            }
1592
1593            // Apply stride/offset to the chroma sub-tensor.
1594            // The chroma tensor is a raw 2D [chroma_h, width] tensor without
1595            // format metadata, so we validate stride manually rather than
1596            // using set_row_stride (which requires format).
1597            if let Some(chroma_ref) = tensor.chroma_mut() {
1598                if let Some(s) = chroma_stride {
1599                    if s < width {
1600                        return Err(Error::InvalidShape(format!(
1601                            "chroma stride {s} < minimum {width} for {format:?}"
1602                        )));
1603                    }
1604                    chroma_ref.set_row_stride_unchecked(s);
1605                }
1606                if let Some(o) = chroma_offset {
1607                    chroma_ref.set_plane_offset(o);
1608                }
1609            }
1610
1611            if dtype == DType::I8 {
1612                // SAFETY: Tensor<u8> and Tensor<i8> have identical layout because
1613                // the struct contains only type-erased storage (OwnedFd, shape, name),
1614                // no inline T values. This assertion catches layout drift at compile time.
1615                const {
1616                    assert!(std::mem::size_of::<Tensor<u8>>() == std::mem::size_of::<Tensor<i8>>());
1617                    assert!(
1618                        std::mem::align_of::<Tensor<u8>>() == std::mem::align_of::<Tensor<i8>>()
1619                    );
1620                }
1621                let tensor_i8: Tensor<i8> = unsafe { std::mem::transmute(tensor) };
1622                return Ok(TensorDyn::from(tensor_i8));
1623            }
1624            Ok(TensorDyn::from(tensor))
1625        } else {
1626            // ── Single-plane path ────────────────────────────────────
1627            let shape = match format.layout() {
1628                PixelLayout::Packed => vec![height, width, format.channels()],
1629                PixelLayout::Planar => vec![format.channels(), height, width],
1630                PixelLayout::SemiPlanar => {
1631                    let total_h = match format {
1632                        PixelFormat::Nv12 => {
1633                            if !height.is_multiple_of(2) {
1634                                return Err(Error::InvalidShape(format!(
1635                                    "NV12 requires even height, got {height}"
1636                                )));
1637                            }
1638                            height * 3 / 2
1639                        }
1640                        PixelFormat::Nv16 => height * 2,
1641                        _ => {
1642                            return Err(Error::InvalidShape(format!(
1643                                "unknown semi-planar height multiplier for {format:?}"
1644                            )))
1645                        }
1646                    };
1647                    vec![total_h, width]
1648                }
1649                _ => {
1650                    return Err(Error::NotSupported(format!(
1651                        "unsupported pixel layout for import_image: {:?}",
1652                        format.layout()
1653                    )));
1654                }
1655            };
1656            let tensor = TensorDyn::from_fd(image.into_fd(), &shape, dtype, None)?;
1657            if tensor.memory() != TensorMemory::Dma {
1658                return Err(Error::NotSupported(format!(
1659                    "import_image requires DMA-backed fd, got {:?}",
1660                    tensor.memory()
1661                )));
1662            }
1663            let mut tensor = tensor.with_format(format)?;
1664            if let Some(s) = image_stride {
1665                tensor.set_row_stride(s)?;
1666            }
1667            if let Some(o) = image_offset {
1668                tensor.set_plane_offset(o);
1669            }
1670            Ok(tensor)
1671        }
1672    }
1673
1674    /// Decode model outputs and draw segmentation masks onto `dst`.
1675    ///
1676    /// This is the primary mask rendering API. The processor decodes via the
1677    /// provided [`Decoder`], selects the optimal rendering path (hybrid
1678    /// CPU+GL or fused GPU), and composites masks onto `dst`.
1679    ///
1680    /// Returns the detected bounding boxes.
1681    pub fn draw_masks(
1682        &mut self,
1683        decoder: &edgefirst_decoder::Decoder,
1684        outputs: &[&TensorDyn],
1685        dst: &mut TensorDyn,
1686        overlay: MaskOverlay<'_>,
1687    ) -> Result<Vec<DetectBox>> {
1688        let mut output_boxes = Vec::with_capacity(100);
1689
1690        // Try proto path first (fused rendering without materializing masks)
1691        let proto_result = decoder
1692            .decode_proto(outputs, &mut output_boxes)
1693            .map_err(|e| Error::Internal(format!("decode_proto: {e:#?}")))?;
1694
1695        if let Some(proto_data) = proto_result {
1696            self.draw_proto_masks(dst, &output_boxes, &proto_data, overlay)?;
1697        } else {
1698            // Detection-only or unsupported model: full decode + render
1699            let mut output_masks = Vec::with_capacity(100);
1700            decoder
1701                .decode(outputs, &mut output_boxes, &mut output_masks)
1702                .map_err(|e| Error::Internal(format!("decode: {e:#?}")))?;
1703            self.draw_decoded_masks(dst, &output_boxes, &output_masks, overlay)?;
1704        }
1705        Ok(output_boxes)
1706    }
1707
1708    /// Decode tracked model outputs and draw segmentation masks onto `dst`.
1709    ///
1710    /// Like [`draw_masks`](Self::draw_masks) but integrates a tracker for
1711    /// maintaining object identities across frames. The tracker runs after
1712    /// NMS but before mask extraction.
1713    ///
1714    /// Returns detected boxes and track info.
1715    #[cfg(feature = "tracker")]
1716    pub fn draw_masks_tracked<TR: edgefirst_tracker::Tracker<DetectBox>>(
1717        &mut self,
1718        decoder: &edgefirst_decoder::Decoder,
1719        tracker: &mut TR,
1720        timestamp: u64,
1721        outputs: &[&TensorDyn],
1722        dst: &mut TensorDyn,
1723        overlay: MaskOverlay<'_>,
1724    ) -> Result<(Vec<DetectBox>, Vec<edgefirst_tracker::TrackInfo>)> {
1725        let mut output_boxes = Vec::with_capacity(100);
1726        let mut output_tracks = Vec::new();
1727
1728        let proto_result = decoder
1729            .decode_proto_tracked(
1730                tracker,
1731                timestamp,
1732                outputs,
1733                &mut output_boxes,
1734                &mut output_tracks,
1735            )
1736            .map_err(|e| Error::Internal(format!("decode_proto_tracked: {e:#?}")))?;
1737
1738        if let Some(proto_data) = proto_result {
1739            self.draw_proto_masks(dst, &output_boxes, &proto_data, overlay)?;
1740        } else {
1741            // Note: decode_proto_tracked returns None for detection-only/ModelPack
1742            // models WITHOUT calling the tracker. The else branch below is the
1743            // first (and only) tracker call for those model types.
1744            let mut output_masks = Vec::with_capacity(100);
1745            decoder
1746                .decode_tracked(
1747                    tracker,
1748                    timestamp,
1749                    outputs,
1750                    &mut output_boxes,
1751                    &mut output_masks,
1752                    &mut output_tracks,
1753                )
1754                .map_err(|e| Error::Internal(format!("decode_tracked: {e:#?}")))?;
1755            self.draw_decoded_masks(dst, &output_boxes, &output_masks, overlay)?;
1756        }
1757        Ok((output_boxes, output_tracks))
1758    }
1759
1760    /// Materialize per-instance segmentation masks from raw prototype data.
1761    ///
1762    /// Computes `mask_coeff @ protos` with sigmoid activation for each detection,
1763    /// producing compact masks at prototype resolution (e.g., 160×160 crops).
1764    /// Mask values are continuous sigmoid confidence outputs quantized to u8
1765    /// (0 = background, 255 = full confidence), NOT binary thresholded.
1766    ///
1767    /// The returned [`Vec<Segmentation>`] can be:
1768    /// - Inspected or exported for analytics, IoU computation, etc.
1769    /// - Passed directly to [`ImageProcessorTrait::draw_decoded_masks`] for
1770    ///   GPU-interpolated rendering.
1771    ///
1772    /// # Performance Note
1773    ///
1774    /// Calling `materialize_masks` + `draw_decoded_masks` separately prevents
1775    /// the HAL from using its internal fused optimization path. For render-only
1776    /// use cases, prefer [`ImageProcessorTrait::draw_proto_masks`] which selects
1777    /// the fastest path automatically (currently 1.6×–27× faster on tested
1778    /// platforms). Use this method when you need access to the intermediate masks.
1779    ///
1780    /// # Errors
1781    ///
1782    /// Returns [`Error::NoConverter`] if the CPU backend is not available.
1783    pub fn materialize_masks(
1784        &mut self,
1785        detect: &[DetectBox],
1786        proto_data: &ProtoData,
1787        letterbox: Option<[f32; 4]>,
1788        resolution: MaskResolution,
1789    ) -> Result<Vec<Segmentation>> {
1790        let cpu = self.cpu.as_mut().ok_or(Error::NoConverter)?;
1791        match resolution {
1792            MaskResolution::Proto => cpu.materialize_segmentations(detect, proto_data, letterbox),
1793            MaskResolution::Scaled { width, height } => {
1794                cpu.materialize_scaled_segmentations(detect, proto_data, letterbox, width, height)
1795            }
1796        }
1797    }
1798}
1799
1800impl ImageProcessorTrait for ImageProcessor {
1801    /// Converts the source image to the destination image format and size. The
1802    /// image is cropped first, then flipped, then rotated
1803    ///
1804    /// Prefer hardware accelerators when available, falling back to CPU if
1805    /// necessary.
1806    fn convert(
1807        &mut self,
1808        src: &TensorDyn,
1809        dst: &mut TensorDyn,
1810        rotation: Rotation,
1811        flip: Flip,
1812        crop: Crop,
1813    ) -> Result<()> {
1814        let start = Instant::now();
1815        let src_fmt = src.format();
1816        let dst_fmt = dst.format();
1817        log::trace!(
1818            "convert: {src_fmt:?}({:?}/{:?}) → {dst_fmt:?}({:?}/{:?}), \
1819             rotation={rotation:?}, flip={flip:?}, backend={:?}",
1820            src.dtype(),
1821            src.memory(),
1822            dst.dtype(),
1823            dst.memory(),
1824            self.forced_backend,
1825        );
1826
1827        // ── Forced backend: no fallback chain ────────────────────────
1828        if let Some(forced) = self.forced_backend {
1829            return match forced {
1830                ForcedBackend::Cpu => {
1831                    if let Some(cpu) = self.cpu.as_mut() {
1832                        let r = cpu.convert(src, dst, rotation, flip, crop);
1833                        log::trace!(
1834                            "convert: forced=cpu result={} ({:?})",
1835                            if r.is_ok() { "ok" } else { "err" },
1836                            start.elapsed()
1837                        );
1838                        return r;
1839                    }
1840                    Err(Error::ForcedBackendUnavailable("cpu".into()))
1841                }
1842                ForcedBackend::G2d => {
1843                    #[cfg(target_os = "linux")]
1844                    if let Some(g2d) = self.g2d.as_mut() {
1845                        let r = g2d.convert(src, dst, rotation, flip, crop);
1846                        log::trace!(
1847                            "convert: forced=g2d result={} ({:?})",
1848                            if r.is_ok() { "ok" } else { "err" },
1849                            start.elapsed()
1850                        );
1851                        return r;
1852                    }
1853                    Err(Error::ForcedBackendUnavailable("g2d".into()))
1854                }
1855                ForcedBackend::OpenGl => {
1856                    #[cfg(target_os = "linux")]
1857                    #[cfg(feature = "opengl")]
1858                    if let Some(opengl) = self.opengl.as_mut() {
1859                        let r = opengl.convert(src, dst, rotation, flip, crop);
1860                        log::trace!(
1861                            "convert: forced=opengl result={} ({:?})",
1862                            if r.is_ok() { "ok" } else { "err" },
1863                            start.elapsed()
1864                        );
1865                        return r;
1866                    }
1867                    Err(Error::ForcedBackendUnavailable("opengl".into()))
1868                }
1869            };
1870        }
1871
1872        // ── Auto fallback chain: OpenGL → G2D → CPU ──────────────────
1873        #[cfg(target_os = "linux")]
1874        #[cfg(feature = "opengl")]
1875        if let Some(opengl) = self.opengl.as_mut() {
1876            match opengl.convert(src, dst, rotation, flip, crop) {
1877                Ok(_) => {
1878                    log::trace!(
1879                        "convert: auto selected=opengl for {src_fmt:?}→{dst_fmt:?} ({:?})",
1880                        start.elapsed()
1881                    );
1882                    return Ok(());
1883                }
1884                Err(e) => {
1885                    log::trace!("convert: auto opengl declined {src_fmt:?}→{dst_fmt:?}: {e}");
1886                }
1887            }
1888        }
1889
1890        #[cfg(target_os = "linux")]
1891        if let Some(g2d) = self.g2d.as_mut() {
1892            match g2d.convert(src, dst, rotation, flip, crop) {
1893                Ok(_) => {
1894                    log::trace!(
1895                        "convert: auto selected=g2d for {src_fmt:?}→{dst_fmt:?} ({:?})",
1896                        start.elapsed()
1897                    );
1898                    return Ok(());
1899                }
1900                Err(e) => {
1901                    log::trace!("convert: auto g2d declined {src_fmt:?}→{dst_fmt:?}: {e}");
1902                }
1903            }
1904        }
1905
1906        if let Some(cpu) = self.cpu.as_mut() {
1907            match cpu.convert(src, dst, rotation, flip, crop) {
1908                Ok(_) => {
1909                    log::trace!(
1910                        "convert: auto selected=cpu for {src_fmt:?}→{dst_fmt:?} ({:?})",
1911                        start.elapsed()
1912                    );
1913                    return Ok(());
1914                }
1915                Err(e) => {
1916                    log::trace!("convert: auto cpu failed {src_fmt:?}→{dst_fmt:?}: {e}");
1917                    return Err(e);
1918                }
1919            }
1920        }
1921        Err(Error::NoConverter)
1922    }
1923
1924    fn draw_decoded_masks(
1925        &mut self,
1926        dst: &mut TensorDyn,
1927        detect: &[DetectBox],
1928        segmentation: &[Segmentation],
1929        overlay: MaskOverlay<'_>,
1930    ) -> Result<()> {
1931        let start = Instant::now();
1932
1933        if let Some(bg) = overlay.background {
1934            if bg.aliases(dst) {
1935                return Err(Error::AliasedBuffers(
1936                    "background must not reference the same buffer as dst".to_string(),
1937                ));
1938            }
1939        }
1940
1941        // Un-letterbox detect boxes and segmentation bboxes for rendering when
1942        // a letterbox was applied to prepare the model input.
1943        let lb_boxes: Vec<DetectBox>;
1944        let lb_segs: Vec<Segmentation>;
1945        let (detect, segmentation) = if let Some(lb) = overlay.letterbox {
1946            lb_boxes = detect.iter().map(|&d| unletter_bbox(d, lb)).collect();
1947            // Keep segmentation bboxes in sync with the transformed detect boxes
1948            // when we have a 1:1 correspondence (instance segmentation).
1949            lb_segs = if segmentation.len() == lb_boxes.len() {
1950                segmentation
1951                    .iter()
1952                    .zip(lb_boxes.iter())
1953                    .map(|(s, d)| Segmentation {
1954                        xmin: d.bbox.xmin,
1955                        ymin: d.bbox.ymin,
1956                        xmax: d.bbox.xmax,
1957                        ymax: d.bbox.ymax,
1958                        segmentation: s.segmentation.clone(),
1959                    })
1960                    .collect()
1961            } else {
1962                segmentation.to_vec()
1963            };
1964            (lb_boxes.as_slice(), lb_segs.as_slice())
1965        } else {
1966            (detect, segmentation)
1967        };
1968        #[cfg(target_os = "linux")]
1969        let is_empty_frame = detect.is_empty() && segmentation.is_empty();
1970
1971        // ── Forced backend: no fallback chain ────────────────────────
1972        if let Some(forced) = self.forced_backend {
1973            return match forced {
1974                ForcedBackend::Cpu => {
1975                    if let Some(cpu) = self.cpu.as_mut() {
1976                        return cpu.draw_decoded_masks(dst, detect, segmentation, overlay);
1977                    }
1978                    Err(Error::ForcedBackendUnavailable("cpu".into()))
1979                }
1980                ForcedBackend::G2d => {
1981                    // G2D can only produce empty frames (clear / bg blit).
1982                    // For populated frames it has no rasterizer — fail loudly.
1983                    #[cfg(target_os = "linux")]
1984                    if let Some(g2d) = self.g2d.as_mut() {
1985                        return g2d.draw_decoded_masks(dst, detect, segmentation, overlay);
1986                    }
1987                    Err(Error::ForcedBackendUnavailable("g2d".into()))
1988                }
1989                ForcedBackend::OpenGl => {
1990                    // GL handles background natively via GPU blit, and now
1991                    // actively clears when there is no background.
1992                    #[cfg(target_os = "linux")]
1993                    #[cfg(feature = "opengl")]
1994                    if let Some(opengl) = self.opengl.as_mut() {
1995                        return opengl.draw_decoded_masks(dst, detect, segmentation, overlay);
1996                    }
1997                    Err(Error::ForcedBackendUnavailable("opengl".into()))
1998                }
1999            };
2000        }
2001
2002        // ── Auto dispatch ──────────────────────────────────────────
2003        // Empty frames prefer G2D when available — a single g2d_clear or
2004        // g2d_blit is the cheapest HW path to produce the correct output
2005        // and avoids spinning up the GL pipeline every zero-detection
2006        // frame in a triple-buffered display loop.
2007        #[cfg(target_os = "linux")]
2008        if is_empty_frame {
2009            if let Some(g2d) = self.g2d.as_mut() {
2010                match g2d.draw_decoded_masks(dst, detect, segmentation, overlay) {
2011                    Ok(_) => {
2012                        log::trace!(
2013                            "draw_decoded_masks empty frame via g2d in {:?}",
2014                            start.elapsed()
2015                        );
2016                        return Ok(());
2017                    }
2018                    Err(e) => log::trace!("g2d empty-frame path unavailable: {e:?}"),
2019                }
2020            }
2021        }
2022
2023        // Populated frames (or G2D unavailable): GL first, CPU fallback.
2024        // Both backends now own their own base-layer handling (bg blit
2025        // or clear), so we hand the overlay through untouched.
2026        #[cfg(target_os = "linux")]
2027        #[cfg(feature = "opengl")]
2028        if let Some(opengl) = self.opengl.as_mut() {
2029            log::trace!(
2030                "draw_decoded_masks started with opengl in {:?}",
2031                start.elapsed()
2032            );
2033            match opengl.draw_decoded_masks(dst, detect, segmentation, overlay) {
2034                Ok(_) => {
2035                    log::trace!("draw_decoded_masks with opengl in {:?}", start.elapsed());
2036                    return Ok(());
2037                }
2038                Err(e) => {
2039                    log::trace!("draw_decoded_masks didn't work with opengl: {e:?}")
2040                }
2041            }
2042        }
2043
2044        log::trace!(
2045            "draw_decoded_masks started with cpu in {:?}",
2046            start.elapsed()
2047        );
2048        if let Some(cpu) = self.cpu.as_mut() {
2049            match cpu.draw_decoded_masks(dst, detect, segmentation, overlay) {
2050                Ok(_) => {
2051                    log::trace!("draw_decoded_masks with cpu in {:?}", start.elapsed());
2052                    return Ok(());
2053                }
2054                Err(e) => {
2055                    log::trace!("draw_decoded_masks didn't work with cpu: {e:?}");
2056                    return Err(e);
2057                }
2058            }
2059        }
2060        Err(Error::NoConverter)
2061    }
2062
2063    fn draw_proto_masks(
2064        &mut self,
2065        dst: &mut TensorDyn,
2066        detect: &[DetectBox],
2067        proto_data: &ProtoData,
2068        overlay: MaskOverlay<'_>,
2069    ) -> Result<()> {
2070        let start = Instant::now();
2071
2072        if let Some(bg) = overlay.background {
2073            if bg.aliases(dst) {
2074                return Err(Error::AliasedBuffers(
2075                    "background must not reference the same buffer as dst".to_string(),
2076                ));
2077            }
2078        }
2079
2080        // Un-letterbox detect boxes for rendering when a letterbox was applied
2081        // to prepare the model input.  The original `detect` coords are still
2082        // passed to `materialize_segmentations` (which needs model-space coords
2083        // to correctly crop the proto tensor) alongside `overlay.letterbox` so
2084        // it can emit `Segmentation` structs in output-image space.
2085        let lb_boxes: Vec<DetectBox>;
2086        let render_detect = if let Some(lb) = overlay.letterbox {
2087            lb_boxes = detect.iter().map(|&d| unletter_bbox(d, lb)).collect();
2088            lb_boxes.as_slice()
2089        } else {
2090            detect
2091        };
2092        #[cfg(target_os = "linux")]
2093        let is_empty_frame = detect.is_empty();
2094
2095        // ── Forced backend: no fallback chain ────────────────────────
2096        if let Some(forced) = self.forced_backend {
2097            return match forced {
2098                ForcedBackend::Cpu => {
2099                    if let Some(cpu) = self.cpu.as_mut() {
2100                        return cpu.draw_proto_masks(dst, render_detect, proto_data, overlay);
2101                    }
2102                    Err(Error::ForcedBackendUnavailable("cpu".into()))
2103                }
2104                ForcedBackend::G2d => {
2105                    #[cfg(target_os = "linux")]
2106                    if let Some(g2d) = self.g2d.as_mut() {
2107                        return g2d.draw_proto_masks(dst, render_detect, proto_data, overlay);
2108                    }
2109                    Err(Error::ForcedBackendUnavailable("g2d".into()))
2110                }
2111                ForcedBackend::OpenGl => {
2112                    #[cfg(target_os = "linux")]
2113                    #[cfg(feature = "opengl")]
2114                    if let Some(opengl) = self.opengl.as_mut() {
2115                        return opengl.draw_proto_masks(dst, render_detect, proto_data, overlay);
2116                    }
2117                    Err(Error::ForcedBackendUnavailable("opengl".into()))
2118                }
2119            };
2120        }
2121
2122        // ── Auto dispatch ──────────────────────────────────────────
2123        // Empty frames: prefer G2D — cheapest HW path (clear or bg blit).
2124        #[cfg(target_os = "linux")]
2125        if is_empty_frame {
2126            if let Some(g2d) = self.g2d.as_mut() {
2127                match g2d.draw_proto_masks(dst, render_detect, proto_data, overlay) {
2128                    Ok(_) => {
2129                        log::trace!(
2130                            "draw_proto_masks empty frame via g2d in {:?}",
2131                            start.elapsed()
2132                        );
2133                        return Ok(());
2134                    }
2135                    Err(e) => log::trace!("g2d empty-frame path unavailable: {e:?}"),
2136                }
2137            }
2138        }
2139
2140        // Hybrid path: CPU materialize + GL overlay (benchmarked faster than
2141        // full-GPU draw_proto_masks on all tested platforms: 27× on imx8mp,
2142        // 4× on imx95, 2.5× on rpi5, 1.6× on x86).
2143        // GL owns its own bg-blit / glClear — we pass the overlay through.
2144        //
2145        // CPU materialize needs `&mut` for its MaskScratch buffers; GL also
2146        // needs `&mut`. The CPU borrow is scoped to its block so the
2147        // subsequent GL borrow is free to take over `self`.
2148        #[cfg(target_os = "linux")]
2149        #[cfg(feature = "opengl")]
2150        if let (Some(_), Some(_)) = (self.cpu.as_ref(), self.opengl.as_ref()) {
2151            let segmentation = match self.cpu.as_mut() {
2152                Some(cpu) => {
2153                    log::trace!(
2154                        "draw_proto_masks started with hybrid (cpu+opengl) in {:?}",
2155                        start.elapsed()
2156                    );
2157                    cpu.materialize_segmentations(detect, proto_data, overlay.letterbox)?
2158                }
2159                None => unreachable!("cpu presence checked above"),
2160            };
2161            if let Some(opengl) = self.opengl.as_mut() {
2162                match opengl.draw_decoded_masks(dst, render_detect, &segmentation, overlay) {
2163                    Ok(_) => {
2164                        log::trace!(
2165                            "draw_proto_masks with hybrid (cpu+opengl) in {:?}",
2166                            start.elapsed()
2167                        );
2168                        return Ok(());
2169                    }
2170                    Err(e) => {
2171                        log::trace!(
2172                            "draw_proto_masks hybrid path failed, falling back to cpu: {e:?}"
2173                        );
2174                    }
2175                }
2176            }
2177        }
2178
2179        let Some(cpu) = self.cpu.as_mut() else {
2180            return Err(Error::Internal(
2181                "draw_proto_masks requires CPU backend for fallback path".into(),
2182            ));
2183        };
2184        log::trace!("draw_proto_masks started with cpu in {:?}", start.elapsed());
2185        cpu.draw_proto_masks(dst, render_detect, proto_data, overlay)
2186    }
2187
2188    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()> {
2189        let start = Instant::now();
2190
2191        // ── Forced backend: no fallback chain ────────────────────────
2192        if let Some(forced) = self.forced_backend {
2193            return match forced {
2194                ForcedBackend::Cpu => {
2195                    if let Some(cpu) = self.cpu.as_mut() {
2196                        return cpu.set_class_colors(colors);
2197                    }
2198                    Err(Error::ForcedBackendUnavailable("cpu".into()))
2199                }
2200                ForcedBackend::G2d => Err(Error::NotSupported(
2201                    "g2d does not support set_class_colors".into(),
2202                )),
2203                ForcedBackend::OpenGl => {
2204                    #[cfg(target_os = "linux")]
2205                    #[cfg(feature = "opengl")]
2206                    if let Some(opengl) = self.opengl.as_mut() {
2207                        return opengl.set_class_colors(colors);
2208                    }
2209                    Err(Error::ForcedBackendUnavailable("opengl".into()))
2210                }
2211            };
2212        }
2213
2214        // skip G2D as it doesn't support rendering to image
2215
2216        #[cfg(target_os = "linux")]
2217        #[cfg(feature = "opengl")]
2218        if let Some(opengl) = self.opengl.as_mut() {
2219            log::trace!("image started with opengl in {:?}", start.elapsed());
2220            match opengl.set_class_colors(colors) {
2221                Ok(_) => {
2222                    log::trace!("colors set with opengl in {:?}", start.elapsed());
2223                    return Ok(());
2224                }
2225                Err(e) => {
2226                    log::trace!("colors didn't set with opengl: {e:?}")
2227                }
2228            }
2229        }
2230        log::trace!("image started with cpu in {:?}", start.elapsed());
2231        if let Some(cpu) = self.cpu.as_mut() {
2232            match cpu.set_class_colors(colors) {
2233                Ok(_) => {
2234                    log::trace!("colors set with cpu in {:?}", start.elapsed());
2235                    return Ok(());
2236                }
2237                Err(e) => {
2238                    log::trace!("colors didn't set with cpu: {e:?}");
2239                    return Err(e);
2240                }
2241            }
2242        }
2243        Err(Error::NoConverter)
2244    }
2245}
2246
2247// ---------------------------------------------------------------------------
2248// Image loading / saving helpers
2249// ---------------------------------------------------------------------------
2250
2251/// Read EXIF orientation from raw EXIF bytes and return (Rotation, Flip).
2252fn read_exif_orientation(exif_bytes: &[u8]) -> (Rotation, Flip) {
2253    let exifreader = exif::Reader::new();
2254    let Ok(exif_) = exifreader.read_raw(exif_bytes.to_vec()) else {
2255        return (Rotation::None, Flip::None);
2256    };
2257    let Some(orientation) = exif_.get_field(exif::Tag::Orientation, exif::In::PRIMARY) else {
2258        return (Rotation::None, Flip::None);
2259    };
2260    match orientation.value.get_uint(0) {
2261        Some(1) => (Rotation::None, Flip::None),
2262        Some(2) => (Rotation::None, Flip::Horizontal),
2263        Some(3) => (Rotation::Rotate180, Flip::None),
2264        Some(4) => (Rotation::Rotate180, Flip::Horizontal),
2265        Some(5) => (Rotation::Clockwise90, Flip::Horizontal),
2266        Some(6) => (Rotation::Clockwise90, Flip::None),
2267        Some(7) => (Rotation::CounterClockwise90, Flip::Horizontal),
2268        Some(8) => (Rotation::CounterClockwise90, Flip::None),
2269        Some(v) => {
2270            log::warn!("broken orientation EXIF value: {v}");
2271            (Rotation::None, Flip::None)
2272        }
2273        None => (Rotation::None, Flip::None),
2274    }
2275}
2276
2277/// Map a [`PixelFormat`] to the zune-jpeg `ColorSpace` for decoding.
2278/// Returns `None` for formats that the JPEG decoder cannot output directly.
2279fn pixelfmt_to_colorspace(fmt: PixelFormat) -> Option<ColorSpace> {
2280    match fmt {
2281        PixelFormat::Rgb => Some(ColorSpace::RGB),
2282        PixelFormat::Rgba => Some(ColorSpace::RGBA),
2283        PixelFormat::Grey => Some(ColorSpace::Luma),
2284        _ => None,
2285    }
2286}
2287
2288/// Map a zune-jpeg `ColorSpace` to a [`PixelFormat`].
2289fn colorspace_to_pixelfmt(cs: ColorSpace) -> Option<PixelFormat> {
2290    match cs {
2291        ColorSpace::RGB => Some(PixelFormat::Rgb),
2292        ColorSpace::RGBA => Some(PixelFormat::Rgba),
2293        ColorSpace::Luma => Some(PixelFormat::Grey),
2294        _ => None,
2295    }
2296}
2297
2298/// Load a JPEG image from raw bytes and return a [`TensorDyn`].
2299// TODO: evaluate replacing zune-jpeg with libjpeg-turbo (via `turbojpeg`
2300// crate). `tjDecompress2` accepts an explicit `pitch` parameter, which
2301// would let us decode directly into a pitch-padded DMA-BUF and drop the
2302// Mem-staging + row-copy introduced below for Mali G310 pitch alignment.
2303// Dropping zune-jpeg also gets us a 2-4× faster SIMD decode on AArch64.
2304// Blockers: adds a C dep (mozjpeg-sys / libturbojpeg) to the build;
2305// cross-compilation story needs validating with zigbuild.
2306fn load_jpeg(
2307    image: &[u8],
2308    format: Option<PixelFormat>,
2309    memory: Option<TensorMemory>,
2310) -> Result<TensorDyn> {
2311    let colour = match format {
2312        Some(f) => pixelfmt_to_colorspace(f)
2313            .ok_or_else(|| Error::NotSupported(format!("Unsupported image format {f:?}")))?,
2314        None => ColorSpace::RGB,
2315    };
2316    let options = DecoderOptions::default().jpeg_set_out_colorspace(colour);
2317    let mut decoder = JpegDecoder::new_with_options(image, options);
2318    decoder.decode_headers()?;
2319
2320    let image_info = decoder.info().ok_or(Error::Internal(
2321        "JPEG did not return decoded image info".to_string(),
2322    ))?;
2323
2324    let converted_cs = decoder
2325        .get_output_colorspace()
2326        .ok_or(Error::Internal("No output colorspace".to_string()))?;
2327
2328    let converted_fmt = colorspace_to_pixelfmt(converted_cs).ok_or(Error::NotSupported(
2329        "Unsupported JPEG decoder output".to_string(),
2330    ))?;
2331
2332    let dest_fmt = format.unwrap_or(converted_fmt);
2333
2334    let (rotation, flip) = decoder
2335        .exif()
2336        .map(|x| read_exif_orientation(x))
2337        .unwrap_or((Rotation::None, Flip::None));
2338
2339    let w = image_info.width as usize;
2340    let h = image_info.height as usize;
2341
2342    if (rotation, flip) == (Rotation::None, Flip::None) {
2343        // When caller wants DMA and the natural pitch would be rejected by
2344        // the GPU's DMA-BUF import (Mali G310 needs 64-byte pitch), decode
2345        // into a tightly-packed Mem staging buffer and row-copy into a
2346        // pitch-padded DMA tensor. zune-jpeg has no stride-aware decode,
2347        // so the Mem intermediate is unavoidable until we swap decoders
2348        // (see TODO below).
2349        #[cfg(target_os = "linux")]
2350        if let Some(aligned_pitch) = padded_dma_pitch_for(dest_fmt, w, &memory) {
2351            let staging = Tensor::<u8>::image(w, h, converted_fmt, Some(TensorMemory::Mem))?;
2352            decoder.decode_into(&mut staging.map()?)?;
2353            let packed = if converted_fmt != dest_fmt {
2354                let mut tmp = Tensor::<u8>::image(w, h, dest_fmt, Some(TensorMemory::Mem))?;
2355                CPUProcessor::convert_format_pf(&staging, &mut tmp, converted_fmt, dest_fmt)?;
2356                tmp
2357            } else {
2358                staging
2359            };
2360            let mut dma = Tensor::<u8>::image_with_stride(
2361                w,
2362                h,
2363                dest_fmt,
2364                aligned_pitch,
2365                Some(TensorMemory::Dma),
2366            )?;
2367            copy_packed_to_padded_dma(&packed, &mut dma)?;
2368            return Ok(TensorDyn::from(dma));
2369        }
2370
2371        let mut img = Tensor::<u8>::image(w, h, dest_fmt, memory)?;
2372
2373        if converted_fmt != dest_fmt {
2374            let tmp = Tensor::<u8>::image(w, h, converted_fmt, Some(TensorMemory::Mem))?;
2375            decoder.decode_into(&mut tmp.map()?)?;
2376            CPUProcessor::convert_format_pf(&tmp, &mut img, converted_fmt, dest_fmt)?;
2377            return Ok(TensorDyn::from(img));
2378        }
2379        decoder.decode_into(&mut img.map()?)?;
2380        return Ok(TensorDyn::from(img));
2381    }
2382
2383    let mut tmp = Tensor::<u8>::image(w, h, dest_fmt, Some(TensorMemory::Mem))?;
2384
2385    if converted_fmt != dest_fmt {
2386        let tmp2 = Tensor::<u8>::image(w, h, converted_fmt, Some(TensorMemory::Mem))?;
2387        decoder.decode_into(&mut tmp2.map()?)?;
2388        CPUProcessor::convert_format_pf(&tmp2, &mut tmp, converted_fmt, dest_fmt)?;
2389    } else {
2390        decoder.decode_into(&mut tmp.map()?)?;
2391    }
2392
2393    rotate_flip_to_dyn(&tmp, dest_fmt, rotation, flip, memory)
2394}
2395
2396/// Load a PNG image from raw bytes and return a [`TensorDyn`].
2397///
2398/// Supports the same destination formats as the CPU backend's format
2399/// converter (`Rgb`, `Rgba`, `Bgra`, `Grey`, etc.). Earlier revisions only
2400/// accepted `Rgb`/`Rgba`; greyscale PNGs decoded to `Grey` now work through
2401/// the same pitch-aware DMA path as JPEG. LumaA PNGs are normalised to
2402/// `Grey` inline (alpha stripped) before going through the shared CPU
2403/// converter.
2404fn load_png(
2405    image: &[u8],
2406    format: Option<PixelFormat>,
2407    memory: Option<TensorMemory>,
2408) -> Result<TensorDyn> {
2409    let dest_fmt = format.unwrap_or(PixelFormat::Rgb);
2410
2411    // Decode with add_alpha=false — any alpha upgrade/strip happens via
2412    // the CPU converter downstream so we share one code path with
2413    // load_jpeg instead of duplicating promotion logic here.
2414    let options = DecoderOptions::default()
2415        .png_set_add_alpha_channel(false)
2416        .png_set_decode_animated(false);
2417    let mut decoder = PngDecoder::new_with_options(image, options);
2418    decoder.decode_headers()?;
2419
2420    let (width, height, rotation, flip) = {
2421        let info = decoder
2422            .get_info()
2423            .ok_or_else(|| Error::Internal("PNG did not return decoded image info".to_string()))?;
2424        let (rot, flip) = info
2425            .exif
2426            .as_ref()
2427            .map(|x| read_exif_orientation(x))
2428            .unwrap_or((Rotation::None, Flip::None));
2429        (info.width, info.height, rot, flip)
2430    };
2431
2432    // Map the decoder's native colorspace onto a PixelFormat that the CPU
2433    // converter understands. LumaA has no direct PixelFormat variant so we
2434    // decode as LumaA and then strip alpha inline to get Grey.
2435    let decoder_cs = decoder
2436        .get_colorspace()
2437        .ok_or_else(|| Error::Internal("PNG decoder did not return colorspace".to_string()))?;
2438    let (decoded_fmt, strip_luma_alpha) = match decoder_cs {
2439        ColorSpace::Luma => (PixelFormat::Grey, false),
2440        ColorSpace::LumaA => (PixelFormat::Grey, true),
2441        ColorSpace::RGB => (PixelFormat::Rgb, false),
2442        ColorSpace::RGBA => (PixelFormat::Rgba, false),
2443        other => {
2444            return Err(Error::NotSupported(format!(
2445                "PNG decoder produced unsupported colorspace {other:?}"
2446            )));
2447        }
2448    };
2449
2450    // Reject destinations the CPU converter can't reach from the decoder's
2451    // output so callers get a precise error rather than a downstream map
2452    // failure. (`Grey → Grey` / `Rgb → Rgb` / etc. are identity pairs and
2453    // are always valid.)
2454    if decoded_fmt != dest_fmt
2455        && !crate::cpu::CPUProcessor::support_conversion_pf(decoded_fmt, dest_fmt)
2456    {
2457        return Err(Error::NotSupported(format!(
2458            "load_png: cannot convert decoder output {decoded_fmt:?} to {dest_fmt:?}"
2459        )));
2460    }
2461
2462    // Decode into a Mem staging buffer in the decoder's native format. For
2463    // LumaA we allocate an extra byte-pair-per-pixel buffer since our Tensor
2464    // API only knows 1-channel (Grey); after decode we compact to Grey.
2465    let staging = if strip_luma_alpha {
2466        // LumaA is 2 bytes per pixel in the raw decode; allocate a flat
2467        // Tensor large enough to hold it, then compact to Grey in place.
2468        let raw = Tensor::<u8>::new(&[height, width, 2], Some(TensorMemory::Mem), None)?;
2469        decoder.decode_into(&mut raw.map()?)?;
2470        let grey = Tensor::<u8>::image(width, height, PixelFormat::Grey, Some(TensorMemory::Mem))?;
2471        {
2472            let raw_map = raw.map()?;
2473            let mut grey_map = grey.map()?;
2474            let raw_bytes: &[u8] = &raw_map;
2475            let grey_bytes: &mut [u8] = &mut grey_map;
2476            for (pair, out) in raw_bytes.chunks_exact(2).zip(grey_bytes.iter_mut()) {
2477                *out = pair[0];
2478            }
2479        }
2480        grey
2481    } else {
2482        let staging = Tensor::<u8>::image(width, height, decoded_fmt, Some(TensorMemory::Mem))?;
2483        decoder.decode_into(&mut staging.map()?)?;
2484        staging
2485    };
2486
2487    // Optional CPU format conversion before the final memory placement.
2488    let packed = if decoded_fmt != dest_fmt {
2489        let mut tmp = Tensor::<u8>::image(width, height, dest_fmt, Some(TensorMemory::Mem))?;
2490        CPUProcessor::convert_format_pf(&staging, &mut tmp, decoded_fmt, dest_fmt)?;
2491        tmp
2492    } else {
2493        staging
2494    };
2495
2496    if (rotation, flip) != (Rotation::None, Flip::None) {
2497        return rotate_flip_to_dyn(&packed, dest_fmt, rotation, flip, memory);
2498    }
2499
2500    // Final placement. When the caller wants DMA and the natural pitch
2501    // would be rejected by the GPU's DMA-BUF import (see
2502    // `padded_dma_pitch_for`), allocate a pitch-padded DMA tensor and
2503    // row-copy. Otherwise allocate in the requested memory domain and
2504    // linear-copy — or, when the caller asked for Mem, just return the
2505    // staging tensor directly.
2506    #[cfg(target_os = "linux")]
2507    if let Some(aligned_pitch) = padded_dma_pitch_for(dest_fmt, width, &memory) {
2508        let mut dma = Tensor::<u8>::image_with_stride(
2509            width,
2510            height,
2511            dest_fmt,
2512            aligned_pitch,
2513            Some(TensorMemory::Dma),
2514        )?;
2515        copy_packed_to_padded_dma(&packed, &mut dma)?;
2516        return Ok(TensorDyn::from(dma));
2517    }
2518
2519    if matches!(memory, Some(TensorMemory::Mem)) {
2520        return Ok(TensorDyn::from(packed));
2521    }
2522    // DMA (default on Linux) or Shm with naturally-aligned pitch.
2523    let out = Tensor::<u8>::image(width, height, dest_fmt, memory)?;
2524    {
2525        let src_map = packed.map()?;
2526        let mut dst_map = out.map()?;
2527        let src_bytes: &[u8] = &src_map;
2528        let dst_bytes: &mut [u8] = &mut dst_map;
2529        dst_bytes.copy_from_slice(src_bytes);
2530    }
2531    Ok(TensorDyn::from(out))
2532}
2533
2534/// Load an image from raw bytes (JPEG or PNG) and return a [`TensorDyn`].
2535///
2536/// The optional `format` specifies the desired output pixel format (e.g.,
2537/// [`PixelFormat::Rgb`], [`PixelFormat::Rgba`]); if `None`, the native
2538/// format of the file is used (typically RGB for JPEG).
2539///
2540/// # Examples
2541/// ```rust
2542/// use edgefirst_image::load_image;
2543/// use edgefirst_tensor::PixelFormat;
2544/// # fn main() -> Result<(), edgefirst_image::Error> {
2545/// let jpeg = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/../../testdata/zidane.jpg"));
2546/// let img = load_image(jpeg, Some(PixelFormat::Rgb), None)?;
2547/// assert_eq!(img.width(), Some(1280));
2548/// assert_eq!(img.height(), Some(720));
2549/// # Ok(())
2550/// # }
2551/// ```
2552pub fn load_image(
2553    image: &[u8],
2554    format: Option<PixelFormat>,
2555    memory: Option<TensorMemory>,
2556) -> Result<TensorDyn> {
2557    if let Ok(i) = load_jpeg(image, format, memory) {
2558        return Ok(i);
2559    }
2560    if let Ok(i) = load_png(image, format, memory) {
2561        return Ok(i);
2562    }
2563    Err(Error::NotSupported(
2564        "Could not decode as jpeg or png".to_string(),
2565    ))
2566}
2567
2568/// Save a [`TensorDyn`] image as a JPEG file.
2569///
2570/// Only packed RGB and RGBA formats are supported.
2571pub fn save_jpeg(tensor: &TensorDyn, path: impl AsRef<std::path::Path>, quality: u8) -> Result<()> {
2572    let t = tensor.as_u8().ok_or(Error::UnsupportedFormat(
2573        "save_jpeg requires u8 tensor".to_string(),
2574    ))?;
2575    let fmt = t.format().ok_or(Error::NotAnImage)?;
2576    if fmt.layout() != PixelLayout::Packed {
2577        return Err(Error::NotImplemented(
2578            "Saving planar images is not supported".to_string(),
2579        ));
2580    }
2581
2582    let colour = match fmt {
2583        PixelFormat::Rgb => jpeg_encoder::ColorType::Rgb,
2584        PixelFormat::Rgba => jpeg_encoder::ColorType::Rgba,
2585        _ => {
2586            return Err(Error::NotImplemented(
2587                "Unsupported image format for saving".to_string(),
2588            ));
2589        }
2590    };
2591
2592    let w = t.width().ok_or(Error::NotAnImage)?;
2593    let h = t.height().ok_or(Error::NotAnImage)?;
2594    let encoder = jpeg_encoder::Encoder::new_file(path, quality)?;
2595    let tensor_map = t.map()?;
2596
2597    encoder.encode(&tensor_map, w as u16, h as u16, colour)?;
2598
2599    Ok(())
2600}
2601
2602pub(crate) struct FunctionTimer<T: Display> {
2603    name: T,
2604    start: std::time::Instant,
2605}
2606
2607impl<T: Display> FunctionTimer<T> {
2608    pub fn new(name: T) -> Self {
2609        Self {
2610            name,
2611            start: std::time::Instant::now(),
2612        }
2613    }
2614}
2615
2616impl<T: Display> Drop for FunctionTimer<T> {
2617    fn drop(&mut self) {
2618        log::trace!("{} elapsed: {:?}", self.name, self.start.elapsed())
2619    }
2620}
2621
2622const DEFAULT_COLORS: [[f32; 4]; 20] = [
2623    [0., 1., 0., 0.7],
2624    [1., 0.5568628, 0., 0.7],
2625    [0.25882353, 0.15294118, 0.13333333, 0.7],
2626    [0.8, 0.7647059, 0.78039216, 0.7],
2627    [0.3137255, 0.3137255, 0.3137255, 0.7],
2628    [0.1411765, 0.3098039, 0.1215686, 0.7],
2629    [1., 0.95686275, 0.5137255, 0.7],
2630    [0.3529412, 0.32156863, 0., 0.7],
2631    [0.4235294, 0.6235294, 0.6509804, 0.7],
2632    [0.5098039, 0.5098039, 0.7294118, 0.7],
2633    [0.00784314, 0.18823529, 0.29411765, 0.7],
2634    [0.0, 0.2706, 1.0, 0.7],
2635    [0.0, 0.0, 0.0, 0.7],
2636    [0.0, 0.5, 0.0, 0.7],
2637    [1.0, 0.0, 0.0, 0.7],
2638    [0.0, 0.0, 1.0, 0.7],
2639    [1.0, 0.5, 0.5, 0.7],
2640    [0.1333, 0.5451, 0.1333, 0.7],
2641    [0.1176, 0.4118, 0.8235, 0.7],
2642    [1., 1., 1., 0.7],
2643];
2644
2645const fn denorm<const M: usize, const N: usize>(a: [[f32; M]; N]) -> [[u8; M]; N] {
2646    let mut result = [[0; M]; N];
2647    let mut i = 0;
2648    while i < N {
2649        let mut j = 0;
2650        while j < M {
2651            result[i][j] = (a[i][j] * 255.0).round() as u8;
2652            j += 1;
2653        }
2654        i += 1;
2655    }
2656    result
2657}
2658
2659const DEFAULT_COLORS_U8: [[u8; 4]; 20] = denorm(DEFAULT_COLORS);
2660
2661#[cfg(test)]
2662#[cfg_attr(coverage_nightly, coverage(off))]
2663mod alignment_tests {
2664    use super::*;
2665
2666    #[test]
2667    fn align_width_rgba8_common_widths() {
2668        // RGBA8 (bpp=4, lcm(64,4)=64, so width must round to multiple of 16 px).
2669        assert_eq!(align_width_for_gpu_pitch(640, 4), 640); // 2560 byte pitch — already aligned
2670        assert_eq!(align_width_for_gpu_pitch(1280, 4), 1280); // 5120
2671        assert_eq!(align_width_for_gpu_pitch(1920, 4), 1920); // 7680
2672        assert_eq!(align_width_for_gpu_pitch(3840, 4), 3840); // 15360
2673                                                              // crowd.png case from the imx95 investigation:
2674        assert_eq!(align_width_for_gpu_pitch(3004, 4), 3008); // 12016 → 12032
2675        assert_eq!(align_width_for_gpu_pitch(3000, 4), 3008); // 12000 → 12032
2676        assert_eq!(align_width_for_gpu_pitch(17, 4), 32); // 68 → 128
2677        assert_eq!(align_width_for_gpu_pitch(1, 4), 16); // 4 → 64
2678    }
2679
2680    #[test]
2681    fn align_width_rgb888_packed() {
2682        // RGB888 (bpp=3, lcm(64,3)=192, so width must round to multiple of 64 px).
2683        assert_eq!(align_width_for_gpu_pitch(64, 3), 64); // 192 byte pitch
2684        assert_eq!(align_width_for_gpu_pitch(640, 3), 640); // 1920
2685        assert_eq!(align_width_for_gpu_pitch(1, 3), 64); // 3 → 192
2686        assert_eq!(align_width_for_gpu_pitch(65, 3), 128); // 195 → 384
2687                                                           // Verify the rounded width × bpp is a clean multiple of the LCM.
2688        for w in [3004usize, 1281, 100, 17] {
2689            let padded = align_width_for_gpu_pitch(w, 3);
2690            assert!(padded >= w);
2691            assert_eq!((padded * 3) % 64, 0);
2692            assert_eq!((padded * 3) % 3, 0);
2693        }
2694    }
2695
2696    #[test]
2697    fn align_width_grey_u8() {
2698        // Grey (bpp=1, lcm(64,1)=64, so width must round to multiple of 64 px).
2699        assert_eq!(align_width_for_gpu_pitch(64, 1), 64);
2700        assert_eq!(align_width_for_gpu_pitch(640, 1), 640);
2701        assert_eq!(align_width_for_gpu_pitch(1, 1), 64);
2702        assert_eq!(align_width_for_gpu_pitch(65, 1), 128);
2703    }
2704
2705    #[test]
2706    fn align_width_zero_inputs() {
2707        assert_eq!(align_width_for_gpu_pitch(0, 4), 0);
2708        assert_eq!(align_width_for_gpu_pitch(640, 0), 640);
2709    }
2710
2711    #[test]
2712    fn align_width_never_returns_smaller_than_input() {
2713        // Spot-check the "returned width >= input width" contract across a
2714        // range of values that would previously have hit `width * bpp`
2715        // overflow paths.
2716        for &bpp in &[1usize, 2, 3, 4, 8] {
2717            for &w in &[
2718                1usize,
2719                17,
2720                64,
2721                65,
2722                100,
2723                1280,
2724                1281,
2725                1920,
2726                3004,
2727                3072,
2728                3840,
2729                usize::MAX / 8,
2730                usize::MAX / 4,
2731                usize::MAX / 2,
2732                usize::MAX - 1,
2733                usize::MAX,
2734            ] {
2735                let aligned = align_width_for_gpu_pitch(w, bpp);
2736                assert!(
2737                    aligned >= w,
2738                    "align_width_for_gpu_pitch({w}, {bpp}) = {aligned} < {w}"
2739                );
2740            }
2741        }
2742    }
2743
2744    #[test]
2745    fn align_width_overflow_returns_unaligned_not_smaller() {
2746        // For width values close to usize::MAX, padding up would wrap. The
2747        // function must return the original width rather than wrapping or
2748        // panicking. A pre-aligned width round-trips unchanged even at the
2749        // extreme.
2750        let aligned_extreme = usize::MAX - 15; // 16-pixel boundary for RGBA8
2751        assert_eq!(
2752            align_width_for_gpu_pitch(aligned_extreme, 4),
2753            aligned_extreme
2754        );
2755        // A misaligned extreme value cannot be rounded up — the function
2756        // returns the original.
2757        let misaligned_extreme = usize::MAX - 1;
2758        let result = align_width_for_gpu_pitch(misaligned_extreme, 4);
2759        assert!(
2760            result == misaligned_extreme || result >= misaligned_extreme,
2761            "extreme misaligned width must not be rounded down to {result}"
2762        );
2763    }
2764
2765    #[test]
2766    fn checked_lcm_basic_and_overflow() {
2767        assert_eq!(checked_num_integer_lcm(64, 4), Some(64));
2768        assert_eq!(checked_num_integer_lcm(64, 3), Some(192));
2769        assert_eq!(checked_num_integer_lcm(64, 1), Some(64));
2770        assert_eq!(checked_num_integer_lcm(0, 4), Some(0));
2771        assert_eq!(checked_num_integer_lcm(64, 0), Some(0));
2772        // Coprime values whose product exceeds usize::MAX must return None.
2773        assert_eq!(
2774            checked_num_integer_lcm(usize::MAX, usize::MAX - 1),
2775            None,
2776            "coprime extreme values must overflow detect, not panic"
2777        );
2778    }
2779
2780    #[test]
2781    fn primary_plane_bpp_known_formats() {
2782        // Packed formats use channels × elem_size.
2783        assert_eq!(primary_plane_bpp(PixelFormat::Rgba, 1), Some(4));
2784        assert_eq!(primary_plane_bpp(PixelFormat::Bgra, 1), Some(4));
2785        assert_eq!(primary_plane_bpp(PixelFormat::Rgb, 1), Some(3));
2786        assert_eq!(primary_plane_bpp(PixelFormat::Grey, 1), Some(1));
2787        // Semi-planar (NV12) reports the luma plane's bpp.
2788        assert_eq!(primary_plane_bpp(PixelFormat::Nv12, 1), Some(1));
2789    }
2790}
2791
2792#[cfg(test)]
2793#[cfg_attr(coverage_nightly, coverage(off))]
2794mod image_tests {
2795    use super::*;
2796    use crate::{CPUProcessor, Rotation};
2797    #[cfg(target_os = "linux")]
2798    use edgefirst_tensor::is_dma_available;
2799    use edgefirst_tensor::{TensorMapTrait, TensorMemory, TensorTrait};
2800    use image::buffer::ConvertBuffer;
2801
2802    /// Test helper: call `ImageProcessorTrait::convert()` on two `TensorDyn`s
2803    /// by going through the `TensorDyn` API.
2804    ///
2805    /// Returns the `(src_image, dst_image)` reconstructed from the TensorDyn
2806    /// round-trip so the caller can feed them to `compare_images` etc.
2807    fn convert_img(
2808        proc: &mut dyn ImageProcessorTrait,
2809        src: TensorDyn,
2810        dst: TensorDyn,
2811        rotation: Rotation,
2812        flip: Flip,
2813        crop: Crop,
2814    ) -> (Result<()>, TensorDyn, TensorDyn) {
2815        let src_fourcc = src.format().unwrap();
2816        let dst_fourcc = dst.format().unwrap();
2817        let src_dyn = src;
2818        let mut dst_dyn = dst;
2819        let result = proc.convert(&src_dyn, &mut dst_dyn, rotation, flip, crop);
2820        let src_back = {
2821            let mut __t = src_dyn.into_u8().unwrap();
2822            __t.set_format(src_fourcc).unwrap();
2823            TensorDyn::from(__t)
2824        };
2825        let dst_back = {
2826            let mut __t = dst_dyn.into_u8().unwrap();
2827            __t.set_format(dst_fourcc).unwrap();
2828            TensorDyn::from(__t)
2829        };
2830        (result, src_back, dst_back)
2831    }
2832
2833    #[ctor::ctor]
2834    fn init() {
2835        env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
2836    }
2837
2838    macro_rules! function {
2839        () => {{
2840            fn f() {}
2841            fn type_name_of<T>(_: T) -> &'static str {
2842                std::any::type_name::<T>()
2843            }
2844            let name = type_name_of(f);
2845
2846            // Find and cut the rest of the path
2847            match &name[..name.len() - 3].rfind(':') {
2848                Some(pos) => &name[pos + 1..name.len() - 3],
2849                None => &name[..name.len() - 3],
2850            }
2851        }};
2852    }
2853
2854    #[test]
2855    fn test_invalid_crop() {
2856        let src = TensorDyn::image(100, 100, PixelFormat::Rgb, DType::U8, None).unwrap();
2857        let dst = TensorDyn::image(100, 100, PixelFormat::Rgb, DType::U8, None).unwrap();
2858
2859        let crop = Crop::new()
2860            .with_src_rect(Some(Rect::new(50, 50, 60, 60)))
2861            .with_dst_rect(Some(Rect::new(0, 0, 150, 150)));
2862
2863        let result = crop.check_crop_dyn(&src, &dst);
2864        assert!(matches!(
2865            result,
2866            Err(Error::CropInvalid(e)) if e.starts_with("Dest and Src crop invalid")
2867        ));
2868
2869        let crop = crop.with_src_rect(Some(Rect::new(0, 0, 10, 10)));
2870        let result = crop.check_crop_dyn(&src, &dst);
2871        assert!(matches!(
2872            result,
2873            Err(Error::CropInvalid(e)) if e.starts_with("Dest crop invalid")
2874        ));
2875
2876        let crop = crop
2877            .with_src_rect(Some(Rect::new(50, 50, 60, 60)))
2878            .with_dst_rect(Some(Rect::new(0, 0, 50, 50)));
2879        let result = crop.check_crop_dyn(&src, &dst);
2880        assert!(matches!(
2881            result,
2882            Err(Error::CropInvalid(e)) if e.starts_with("Src crop invalid")
2883        ));
2884
2885        let crop = crop.with_src_rect(Some(Rect::new(50, 50, 50, 50)));
2886
2887        let result = crop.check_crop_dyn(&src, &dst);
2888        assert!(result.is_ok());
2889    }
2890
2891    #[test]
2892    fn test_invalid_tensor_format() -> Result<(), Error> {
2893        // 4D tensor cannot be set to a 3-channel pixel format
2894        let mut tensor = Tensor::<u8>::new(&[720, 1280, 4, 1], None, None)?;
2895        let result = tensor.set_format(PixelFormat::Rgb);
2896        assert!(result.is_err(), "4D tensor should reject set_format");
2897
2898        // Tensor with wrong channel count for the format
2899        let mut tensor = Tensor::<u8>::new(&[720, 1280, 4], None, None)?;
2900        let result = tensor.set_format(PixelFormat::Rgb);
2901        assert!(result.is_err(), "4-channel tensor should reject RGB format");
2902
2903        Ok(())
2904    }
2905
2906    #[test]
2907    fn test_invalid_image_file() -> Result<(), Error> {
2908        let result = crate::load_image(&[123; 5000], None, None);
2909        assert!(matches!(
2910            result,
2911            Err(Error::NotSupported(e)) if e == "Could not decode as jpeg or png"));
2912
2913        Ok(())
2914    }
2915
2916    #[test]
2917    fn test_invalid_jpeg_format() -> Result<(), Error> {
2918        let result = crate::load_image(&[123; 5000], Some(PixelFormat::Yuyv), None);
2919        assert!(matches!(
2920            result,
2921            Err(Error::NotSupported(e)) if e == "Could not decode as jpeg or png"));
2922
2923        Ok(())
2924    }
2925
2926    #[test]
2927    fn test_load_resize_save() {
2928        let file = include_bytes!(concat!(
2929            env!("CARGO_MANIFEST_DIR"),
2930            "/../../testdata/zidane.jpg"
2931        ));
2932        let img = crate::load_image(file, Some(PixelFormat::Rgba), None).unwrap();
2933        assert_eq!(img.width(), Some(1280));
2934        assert_eq!(img.height(), Some(720));
2935
2936        let dst = TensorDyn::image(640, 360, PixelFormat::Rgba, DType::U8, None).unwrap();
2937        let mut converter = CPUProcessor::new();
2938        let (result, _img, dst) = convert_img(
2939            &mut converter,
2940            img,
2941            dst,
2942            Rotation::None,
2943            Flip::None,
2944            Crop::no_crop(),
2945        );
2946        result.unwrap();
2947        assert_eq!(dst.width(), Some(640));
2948        assert_eq!(dst.height(), Some(360));
2949
2950        crate::save_jpeg(&dst, "zidane_resized.jpg", 80).unwrap();
2951
2952        let file = std::fs::read("zidane_resized.jpg").unwrap();
2953        let img = crate::load_image(&file, None, None).unwrap();
2954        assert_eq!(img.width(), Some(640));
2955        assert_eq!(img.height(), Some(360));
2956        assert_eq!(img.format().unwrap(), PixelFormat::Rgb);
2957    }
2958
2959    #[test]
2960    fn test_from_tensor_planar() -> Result<(), Error> {
2961        let mut tensor = Tensor::new(&[3, 720, 1280], None, None)?;
2962        tensor.map()?.copy_from_slice(include_bytes!(concat!(
2963            env!("CARGO_MANIFEST_DIR"),
2964            "/../../testdata/camera720p.8bps"
2965        )));
2966        let planar = {
2967            tensor
2968                .set_format(PixelFormat::PlanarRgb)
2969                .map_err(|e| crate::Error::Internal(e.to_string()))?;
2970            TensorDyn::from(tensor)
2971        };
2972
2973        let rbga = load_bytes_to_tensor(
2974            1280,
2975            720,
2976            PixelFormat::Rgba,
2977            None,
2978            include_bytes!(concat!(
2979                env!("CARGO_MANIFEST_DIR"),
2980                "/../../testdata/camera720p.rgba"
2981            )),
2982        )?;
2983        compare_images_convert_to_rgb(&planar, &rbga, 0.98, function!());
2984
2985        Ok(())
2986    }
2987
2988    #[test]
2989    fn test_from_tensor_invalid_format() {
2990        // PixelFormat::from_fourcc_str returns None for unknown FourCC codes.
2991        // Since there's no "TEST" pixel format, this validates graceful handling.
2992        assert!(PixelFormat::from_fourcc(u32::from_le_bytes(*b"TEST")).is_none());
2993    }
2994
2995    #[test]
2996    #[should_panic(expected = "Failed to save planar RGB image")]
2997    fn test_save_planar() {
2998        let planar_img = load_bytes_to_tensor(
2999            1280,
3000            720,
3001            PixelFormat::PlanarRgb,
3002            None,
3003            include_bytes!(concat!(
3004                env!("CARGO_MANIFEST_DIR"),
3005                "/../../testdata/camera720p.8bps"
3006            )),
3007        )
3008        .unwrap();
3009
3010        let save_path = "/tmp/planar_rgb.jpg";
3011        crate::save_jpeg(&planar_img, save_path, 90).expect("Failed to save planar RGB image");
3012    }
3013
3014    #[test]
3015    #[should_panic(expected = "Failed to save YUYV image")]
3016    fn test_save_yuyv() {
3017        let planar_img = load_bytes_to_tensor(
3018            1280,
3019            720,
3020            PixelFormat::Yuyv,
3021            None,
3022            include_bytes!(concat!(
3023                env!("CARGO_MANIFEST_DIR"),
3024                "/../../testdata/camera720p.yuyv"
3025            )),
3026        )
3027        .unwrap();
3028
3029        let save_path = "/tmp/yuyv.jpg";
3030        crate::save_jpeg(&planar_img, save_path, 90).expect("Failed to save YUYV image");
3031    }
3032
3033    #[test]
3034    fn test_rotation_angle() {
3035        assert_eq!(Rotation::from_degrees_clockwise(0), Rotation::None);
3036        assert_eq!(Rotation::from_degrees_clockwise(90), Rotation::Clockwise90);
3037        assert_eq!(Rotation::from_degrees_clockwise(180), Rotation::Rotate180);
3038        assert_eq!(
3039            Rotation::from_degrees_clockwise(270),
3040            Rotation::CounterClockwise90
3041        );
3042        assert_eq!(Rotation::from_degrees_clockwise(360), Rotation::None);
3043        assert_eq!(Rotation::from_degrees_clockwise(450), Rotation::Clockwise90);
3044        assert_eq!(Rotation::from_degrees_clockwise(540), Rotation::Rotate180);
3045        assert_eq!(
3046            Rotation::from_degrees_clockwise(630),
3047            Rotation::CounterClockwise90
3048        );
3049    }
3050
3051    #[test]
3052    #[should_panic(expected = "rotation angle is not a multiple of 90")]
3053    fn test_rotation_angle_panic() {
3054        Rotation::from_degrees_clockwise(361);
3055    }
3056
3057    #[test]
3058    fn test_disable_env_var() -> Result<(), Error> {
3059        // EDGEFIRST_FORCE_BACKEND takes precedence over EDGEFIRST_DISABLE_*,
3060        // so clear it for the duration of this test to avoid races with
3061        // test_force_backend_cpu running in parallel.
3062        let saved_force = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
3063        unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") };
3064
3065        #[cfg(target_os = "linux")]
3066        {
3067            let original = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
3068            unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
3069            let converter = ImageProcessor::new()?;
3070            match original {
3071                Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
3072                None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
3073            }
3074            assert!(converter.g2d.is_none());
3075        }
3076
3077        #[cfg(target_os = "linux")]
3078        #[cfg(feature = "opengl")]
3079        {
3080            let original = std::env::var("EDGEFIRST_DISABLE_GL").ok();
3081            unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
3082            let converter = ImageProcessor::new()?;
3083            match original {
3084                Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
3085                None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
3086            }
3087            assert!(converter.opengl.is_none());
3088        }
3089
3090        let original = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
3091        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
3092        let converter = ImageProcessor::new()?;
3093        match original {
3094            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
3095            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
3096        }
3097        assert!(converter.cpu.is_none());
3098
3099        let original_cpu = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
3100        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
3101        let original_gl = std::env::var("EDGEFIRST_DISABLE_GL").ok();
3102        unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
3103        let original_g2d = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
3104        unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
3105        let mut converter = ImageProcessor::new()?;
3106
3107        let src = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None)?;
3108        let dst = TensorDyn::image(640, 360, PixelFormat::Rgba, DType::U8, None)?;
3109        let (result, _src, _dst) = convert_img(
3110            &mut converter,
3111            src,
3112            dst,
3113            Rotation::None,
3114            Flip::None,
3115            Crop::no_crop(),
3116        );
3117        assert!(matches!(result, Err(Error::NoConverter)));
3118
3119        match original_cpu {
3120            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
3121            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
3122        }
3123        match original_gl {
3124            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
3125            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
3126        }
3127        match original_g2d {
3128            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
3129            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
3130        }
3131        match saved_force {
3132            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
3133            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
3134        }
3135
3136        Ok(())
3137    }
3138
3139    #[test]
3140    fn test_unsupported_conversion() {
3141        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
3142        let dst = TensorDyn::image(640, 360, PixelFormat::Nv12, DType::U8, None).unwrap();
3143        let mut converter = ImageProcessor::new().unwrap();
3144        let (result, _src, _dst) = convert_img(
3145            &mut converter,
3146            src,
3147            dst,
3148            Rotation::None,
3149            Flip::None,
3150            Crop::no_crop(),
3151        );
3152        log::debug!("result: {:?}", result);
3153        assert!(matches!(
3154            result,
3155            Err(Error::NotSupported(e)) if e.starts_with("Conversion from NV12 to NV12")
3156        ));
3157    }
3158
3159    #[test]
3160    fn test_load_grey() {
3161        let grey_img = crate::load_image(
3162            include_bytes!(concat!(
3163                env!("CARGO_MANIFEST_DIR"),
3164                "/../../testdata/grey.jpg"
3165            )),
3166            Some(PixelFormat::Rgba),
3167            None,
3168        )
3169        .unwrap();
3170
3171        let grey_but_rgb_img = crate::load_image(
3172            include_bytes!(concat!(
3173                env!("CARGO_MANIFEST_DIR"),
3174                "/../../testdata/grey-rgb.jpg"
3175            )),
3176            Some(PixelFormat::Rgba),
3177            None,
3178        )
3179        .unwrap();
3180
3181        compare_images(&grey_img, &grey_but_rgb_img, 0.99, function!());
3182    }
3183
3184    #[test]
3185    fn test_new_nv12() {
3186        let nv12 = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
3187        assert_eq!(nv12.height(), Some(720));
3188        assert_eq!(nv12.width(), Some(1280));
3189        assert_eq!(nv12.format().unwrap(), PixelFormat::Nv12);
3190        // PixelFormat::Nv12.channels() returns 1 (luma plane channel count)
3191        assert_eq!(nv12.format().unwrap().channels(), 1);
3192        assert!(nv12.format().is_some_and(
3193            |f| f.layout() == PixelLayout::Planar || f.layout() == PixelLayout::SemiPlanar
3194        ))
3195    }
3196
3197    #[test]
3198    #[cfg(target_os = "linux")]
3199    fn test_new_image_converter() {
3200        let dst_width = 640;
3201        let dst_height = 360;
3202        let file = include_bytes!(concat!(
3203            env!("CARGO_MANIFEST_DIR"),
3204            "/../../testdata/zidane.jpg"
3205        ))
3206        .to_vec();
3207        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3208
3209        let mut converter = ImageProcessor::new().unwrap();
3210        let converter_dst = converter
3211            .create_image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None)
3212            .unwrap();
3213        let (result, src, converter_dst) = convert_img(
3214            &mut converter,
3215            src,
3216            converter_dst,
3217            Rotation::None,
3218            Flip::None,
3219            Crop::no_crop(),
3220        );
3221        result.unwrap();
3222
3223        let cpu_dst =
3224            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3225        let mut cpu_converter = CPUProcessor::new();
3226        let (result, _src, cpu_dst) = convert_img(
3227            &mut cpu_converter,
3228            src,
3229            cpu_dst,
3230            Rotation::None,
3231            Flip::None,
3232            Crop::no_crop(),
3233        );
3234        result.unwrap();
3235
3236        compare_images(&converter_dst, &cpu_dst, 0.98, function!());
3237    }
3238
3239    #[test]
3240    #[cfg(target_os = "linux")]
3241    fn test_create_image_dtype_i8() {
3242        let mut converter = ImageProcessor::new().unwrap();
3243
3244        // I8 image should allocate successfully via create_image
3245        let dst = converter
3246            .create_image(320, 240, PixelFormat::Rgb, DType::I8, None)
3247            .unwrap();
3248        assert_eq!(dst.dtype(), DType::I8);
3249        assert!(dst.width() == Some(320));
3250        assert!(dst.height() == Some(240));
3251        assert_eq!(dst.format(), Some(PixelFormat::Rgb));
3252
3253        // U8 for comparison
3254        let dst_u8 = converter
3255            .create_image(320, 240, PixelFormat::Rgb, DType::U8, None)
3256            .unwrap();
3257        assert_eq!(dst_u8.dtype(), DType::U8);
3258
3259        // Convert into I8 dst should succeed
3260        let file = include_bytes!(concat!(
3261            env!("CARGO_MANIFEST_DIR"),
3262            "/../../testdata/zidane.jpg"
3263        ))
3264        .to_vec();
3265        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3266        let mut dst_i8 = converter
3267            .create_image(320, 240, PixelFormat::Rgb, DType::I8, None)
3268            .unwrap();
3269        converter
3270            .convert(
3271                &src,
3272                &mut dst_i8,
3273                Rotation::None,
3274                Flip::None,
3275                Crop::no_crop(),
3276            )
3277            .unwrap();
3278    }
3279
3280    #[test]
3281    #[cfg(target_os = "linux")]
3282    fn test_create_image_nv12_dma_non_aligned_width() {
3283        // Regression for C2: create_image must not apply stride padding to
3284        // non-packed formats. NV12 is semi-planar (PixelLayout::SemiPlanar),
3285        // so the try_dma path should fall through to the plain
3286        // TensorDyn::image allocation for any width, regardless of the
3287        // 64-byte GPU pitch alignment.
3288        let converter = ImageProcessor::new().unwrap();
3289
3290        // 100 is intentionally not a multiple of 64 (the Mali pitch
3291        // alignment) to prove that non-packed layouts do not take the
3292        // stride-padded branch.
3293        let result = converter.create_image(
3294            100,
3295            64,
3296            PixelFormat::Nv12,
3297            DType::U8,
3298            Some(TensorMemory::Dma),
3299        );
3300
3301        match result {
3302            Ok(img) => {
3303                assert_eq!(img.width(), Some(100));
3304                assert_eq!(img.height(), Some(64));
3305                assert_eq!(img.format(), Some(PixelFormat::Nv12));
3306                // Non-packed formats must never carry a row_stride override.
3307                assert!(
3308                    img.row_stride().is_none(),
3309                    "NV12 must not be stride-padded by create_image",
3310                );
3311            }
3312            Err(e) => {
3313                // Accept skip on hosts without a dma-heap, but never the
3314                // "NotImplemented" we used to return for non-packed layouts.
3315                let msg = format!("{e}");
3316                assert!(
3317                    !msg.contains("image_with_stride"),
3318                    "NV12 should not hit the stride-padded path: {msg}",
3319                );
3320            }
3321        }
3322    }
3323
3324    #[test]
3325    #[ignore] // Hangs on desktop platforms where DMA-buf is unavailable and PBO
3326              // fallback triggers a GPU driver hang during SHM→texture upload (e.g.,
3327              // NVIDIA without /dev/dma_heap permissions). Works on embedded targets.
3328    fn test_crop_skip() {
3329        let file = include_bytes!(concat!(
3330            env!("CARGO_MANIFEST_DIR"),
3331            "/../../testdata/zidane.jpg"
3332        ))
3333        .to_vec();
3334        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3335
3336        let mut converter = ImageProcessor::new().unwrap();
3337        let converter_dst = converter
3338            .create_image(1280, 720, PixelFormat::Rgba, DType::U8, None)
3339            .unwrap();
3340        let crop = Crop::new()
3341            .with_src_rect(Some(Rect::new(0, 0, 640, 640)))
3342            .with_dst_rect(Some(Rect::new(0, 0, 640, 640)));
3343        let (result, src, converter_dst) = convert_img(
3344            &mut converter,
3345            src,
3346            converter_dst,
3347            Rotation::None,
3348            Flip::None,
3349            crop,
3350        );
3351        result.unwrap();
3352
3353        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
3354        let mut cpu_converter = CPUProcessor::new();
3355        let (result, _src, cpu_dst) = convert_img(
3356            &mut cpu_converter,
3357            src,
3358            cpu_dst,
3359            Rotation::None,
3360            Flip::None,
3361            crop,
3362        );
3363        result.unwrap();
3364
3365        compare_images(&converter_dst, &cpu_dst, 0.99999, function!());
3366    }
3367
3368    #[test]
3369    fn test_invalid_pixel_format() {
3370        // PixelFormat::from_fourcc returns None for unknown formats,
3371        // so TensorDyn::image cannot be called with an invalid format.
3372        assert!(PixelFormat::from_fourcc(u32::from_le_bytes(*b"TEST")).is_none());
3373    }
3374
3375    // Helper function to check if G2D library is available (Linux/i.MX8 only)
3376    #[cfg(target_os = "linux")]
3377    static G2D_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
3378
3379    #[cfg(target_os = "linux")]
3380    fn is_g2d_available() -> bool {
3381        *G2D_AVAILABLE.get_or_init(|| G2DProcessor::new().is_ok())
3382    }
3383
3384    #[cfg(target_os = "linux")]
3385    #[cfg(feature = "opengl")]
3386    static GL_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
3387
3388    #[cfg(target_os = "linux")]
3389    #[cfg(feature = "opengl")]
3390    // Helper function to check if OpenGL is available
3391    fn is_opengl_available() -> bool {
3392        #[cfg(all(target_os = "linux", feature = "opengl"))]
3393        {
3394            *GL_AVAILABLE.get_or_init(|| GLProcessorThreaded::new(None).is_ok())
3395        }
3396
3397        #[cfg(not(all(target_os = "linux", feature = "opengl")))]
3398        {
3399            false
3400        }
3401    }
3402
3403    #[test]
3404    fn test_load_jpeg_with_exif() {
3405        let file = include_bytes!(concat!(
3406            env!("CARGO_MANIFEST_DIR"),
3407            "/../../testdata/zidane_rotated_exif.jpg"
3408        ))
3409        .to_vec();
3410        let loaded = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3411
3412        assert_eq!(loaded.height(), Some(1280));
3413        assert_eq!(loaded.width(), Some(720));
3414
3415        let file = include_bytes!(concat!(
3416            env!("CARGO_MANIFEST_DIR"),
3417            "/../../testdata/zidane.jpg"
3418        ))
3419        .to_vec();
3420        let cpu_src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3421
3422        let (dst_width, dst_height) = (cpu_src.height().unwrap(), cpu_src.width().unwrap());
3423
3424        let cpu_dst =
3425            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3426        let mut cpu_converter = CPUProcessor::new();
3427
3428        let (result, _cpu_src, cpu_dst) = convert_img(
3429            &mut cpu_converter,
3430            cpu_src,
3431            cpu_dst,
3432            Rotation::Clockwise90,
3433            Flip::None,
3434            Crop::no_crop(),
3435        );
3436        result.unwrap();
3437
3438        compare_images(&loaded, &cpu_dst, 0.98, function!());
3439    }
3440
3441    #[test]
3442    fn test_load_png_with_exif() {
3443        let file = include_bytes!(concat!(
3444            env!("CARGO_MANIFEST_DIR"),
3445            "/../../testdata/zidane_rotated_exif_180.png"
3446        ))
3447        .to_vec();
3448        let loaded = crate::load_png(&file, Some(PixelFormat::Rgba), None).unwrap();
3449
3450        assert_eq!(loaded.height(), Some(720));
3451        assert_eq!(loaded.width(), Some(1280));
3452
3453        let file = include_bytes!(concat!(
3454            env!("CARGO_MANIFEST_DIR"),
3455            "/../../testdata/zidane.jpg"
3456        ))
3457        .to_vec();
3458        let cpu_src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3459
3460        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
3461        let mut cpu_converter = CPUProcessor::new();
3462
3463        let (result, _cpu_src, cpu_dst) = convert_img(
3464            &mut cpu_converter,
3465            cpu_src,
3466            cpu_dst,
3467            Rotation::Rotate180,
3468            Flip::None,
3469            Crop::no_crop(),
3470        );
3471        result.unwrap();
3472
3473        compare_images(&loaded, &cpu_dst, 0.98, function!());
3474    }
3475
3476    /// Synthesise an RGB JPEG with a deterministic pattern at `(width, height)`
3477    /// using the workspace's `jpeg-encoder` crate (the `image` crate is
3478    /// compiled without its JPEG feature). Used to exercise the decoder /
3479    /// pitch-padding paths for arbitrary dimensions without having to bundle
3480    /// a fixture file per test size.
3481    #[cfg(target_os = "linux")]
3482    fn make_rgb_jpeg(width: u32, height: u32) -> Vec<u8> {
3483        let mut bytes = Vec::with_capacity((width * height * 3) as usize);
3484        for y in 0..height {
3485            for x in 0..width {
3486                bytes.push(((x + y) & 0xFF) as u8);
3487                bytes.push(((x.wrapping_mul(3)) & 0xFF) as u8);
3488                bytes.push(((y.wrapping_mul(5)) & 0xFF) as u8);
3489            }
3490        }
3491        let mut out = Vec::new();
3492        let encoder = jpeg_encoder::Encoder::new(&mut out, 85);
3493        encoder
3494            .encode(
3495                &bytes,
3496                width as u16,
3497                height as u16,
3498                jpeg_encoder::ColorType::Rgb,
3499            )
3500            .expect("jpeg-encoder must succeed on trivial input");
3501        out
3502    }
3503
3504    /// End-to-end: a 375×333 RGBA JPEG (width NOT divisible by 4) loaded
3505    /// via the pitch-padded DMA path and letterboxed through the GL
3506    /// backend must produce correct output. Before the Rgba/Bgra
3507    /// width%4 relaxation in `DmaImportAttrs::from_tensor`, this case
3508    /// failed the pre-check and forced a CPU texture upload fallback;
3509    /// with the relaxation, EGL import succeeds at the driver level and
3510    /// the GL fast path runs. Output correctness is checked against a
3511    /// CPU reference (convert ran with `EDGEFIRST_FORCE_BACKEND=cpu`).
3512    #[test]
3513    #[cfg(target_os = "linux")]
3514    #[cfg(feature = "opengl")]
3515    fn test_convert_rgba_non_4_aligned_width_end_to_end() {
3516        use edgefirst_tensor::is_dma_available;
3517        if !is_dma_available() {
3518            eprintln!(
3519                "SKIPPED: test_convert_rgba_non_4_aligned_width_end_to_end — DMA not available"
3520            );
3521            return;
3522        }
3523        // 375 is the canonical failure width from dataset loaders —
3524        // 375 * 4 = 1500 bytes/row, pitch-padded to 1536. Width%4 = 3,
3525        // so the old pre-check rejected it; new code accepts it.
3526        let jpeg = make_rgb_jpeg(375, 333);
3527        let src_gl = crate::load_jpeg(&jpeg, Some(PixelFormat::Rgba), None).unwrap();
3528        assert_eq!(src_gl.width(), Some(375));
3529        // Row stride must still be pitch-padded (separate concern from width).
3530        let stride = src_gl.row_stride().unwrap();
3531        assert_eq!(stride, 1536, "expected padded pitch 1536, got {stride}");
3532
3533        // GL-backed convert into a pitch-aligned 640×640 Rgba dest.
3534        let mut gl_proc = ImageProcessor::new().unwrap();
3535        let gl_dst = gl_proc
3536            .create_image(640, 640, PixelFormat::Rgba, DType::U8, None)
3537            .unwrap();
3538        let (r_gl, _src_gl, gl_dst) = convert_img(
3539            &mut gl_proc,
3540            src_gl,
3541            gl_dst,
3542            Rotation::None,
3543            Flip::None,
3544            Crop::no_crop(),
3545        );
3546        r_gl.expect("GL-backed convert must succeed for 375x333 Rgba src");
3547
3548        // CPU reference via a fresh load so the two paths start from
3549        // byte-identical inputs. `with_config(backend=Cpu)` forces the
3550        // CPU-only processor regardless of which backends the host has
3551        // available.
3552        let src_cpu =
3553            crate::load_jpeg(&jpeg, Some(PixelFormat::Rgba), Some(TensorMemory::Mem)).unwrap();
3554        let mut cpu_proc = ImageProcessor::with_config(ImageProcessorConfig {
3555            backend: ComputeBackend::Cpu,
3556            ..Default::default()
3557        })
3558        .unwrap();
3559        let cpu_dst = TensorDyn::image(
3560            640,
3561            640,
3562            PixelFormat::Rgba,
3563            DType::U8,
3564            Some(TensorMemory::Mem),
3565        )
3566        .unwrap();
3567        let (r_cpu, _src_cpu, cpu_dst) = convert_img(
3568            &mut cpu_proc,
3569            src_cpu,
3570            cpu_dst,
3571            Rotation::None,
3572            Flip::None,
3573            Crop::no_crop(),
3574        );
3575        r_cpu.unwrap();
3576
3577        // Structural similarity: the GL path may have gone through EGL
3578        // import OR fallen back to CPU texture upload — either way, the
3579        // output must match the CPU reference closely.
3580        compare_images(&gl_dst, &cpu_dst, 0.95, function!());
3581    }
3582
3583    /// Regression lock: loading a JPEG at a non-64-aligned RGBA pitch (e.g.
3584    /// 500×333 → natural pitch 2000, needs to be padded to 2048) must go
3585    /// through `image_with_stride` and set `row_stride()` / `effective_row_stride()`
3586    /// to the padded value. The earlier pitch-padding commit fixed this in
3587    /// `load_jpeg`; a regression would surface as `row_stride == None` or
3588    /// `effective_row_stride == 2000`.
3589    #[test]
3590    #[cfg(target_os = "linux")]
3591    fn test_load_jpeg_rgba_non_aligned_pitch_padded_dma() {
3592        use edgefirst_tensor::is_dma_available;
3593        if !is_dma_available() {
3594            eprintln!(
3595                "SKIPPED: test_load_jpeg_rgba_non_aligned_pitch_padded_dma — DMA not available"
3596            );
3597            return;
3598        }
3599        // Widths that force a non-64-aligned natural RGBA pitch. All three
3600        // are divisible by 4 so the EGL width-alignment pre-check passes.
3601        // The pitch-padding fix is what makes these importable at all.
3602        for &w in &[500u32, 612, 428] {
3603            let jpeg = make_rgb_jpeg(w, 333);
3604            let loaded = crate::load_jpeg(&jpeg, Some(PixelFormat::Rgba), None).unwrap();
3605            let natural = (w as usize) * 4;
3606            let aligned = crate::align_pitch_bytes_to_gpu_alignment(natural).unwrap();
3607            assert!(
3608                aligned > natural,
3609                "test sanity: width {w} should be unaligned"
3610            );
3611            let stride = loaded
3612                .row_stride()
3613                .expect("padded DMA path must set an explicit row_stride — regression if None");
3614            assert_eq!(
3615                stride, aligned,
3616                "width {w}: expected padded stride {aligned}, got {stride} \
3617                 (regression: pitch-padding branch skipped?)"
3618            );
3619            let eff = loaded.effective_row_stride().unwrap();
3620            assert_eq!(
3621                eff, aligned,
3622                "effective_row_stride must match stored stride"
3623            );
3624            assert_eq!(loaded.width(), Some(w as usize));
3625            assert_eq!(loaded.height(), Some(333));
3626        }
3627    }
3628
3629    /// `padded_dma_pitch_for` must respect the caller's memory choice and
3630    /// must NOT route into the pitch-padded DMA path when the caller left
3631    /// the choice to the allocator (`None`) but DMA is unavailable on the
3632    /// host. The padded path requires `image_with_stride`, which always
3633    /// allocates DMA — taking it on a system without `/dev/dma_heap`
3634    /// would convert a normally-working image load into a hard failure
3635    /// (since `Tensor::image(..., None)` would have fallen back to
3636    /// SHM/Mem).
3637    #[test]
3638    #[cfg(target_os = "linux")]
3639    fn test_padded_dma_pitch_for_respects_memory_choice() {
3640        use edgefirst_tensor::{is_dma_available, TensorMemory};
3641
3642        // 500×4 = 2000 → padded to 2048 by GPU alignment. Use it for
3643        // every case so any "no padding" answer is unambiguous.
3644        let unaligned_w = 500;
3645
3646        // Caller asks for Mem / Shm: never pad, regardless of DMA.
3647        assert_eq!(
3648            crate::padded_dma_pitch_for(PixelFormat::Rgba, unaligned_w, &Some(TensorMemory::Mem),),
3649            None,
3650            "Mem must never trigger DMA padding"
3651        );
3652        assert_eq!(
3653            crate::padded_dma_pitch_for(PixelFormat::Rgba, unaligned_w, &Some(TensorMemory::Shm),),
3654            None,
3655            "Shm must never trigger DMA padding"
3656        );
3657
3658        // Caller explicitly asks for DMA: always pad if width needs it.
3659        // Even if the runtime can't actually allocate DMA, the caller
3660        // owns that decision and the resulting allocation error is
3661        // their problem, not ours.
3662        assert_eq!(
3663            crate::padded_dma_pitch_for(PixelFormat::Rgba, unaligned_w, &Some(TensorMemory::Dma),),
3664            Some(2048),
3665            "explicit Dma must pad regardless of runtime DMA availability"
3666        );
3667
3668        // Caller leaves it to the allocator: behaviour depends on
3669        // host-runtime DMA availability. This is the case the fix
3670        // guards against.
3671        let none_result = crate::padded_dma_pitch_for(PixelFormat::Rgba, unaligned_w, &None);
3672        if is_dma_available() {
3673            assert_eq!(
3674                none_result,
3675                Some(2048),
3676                "memory=None + DMA available → pad (will route through DMA)"
3677            );
3678        } else {
3679            assert_eq!(
3680                none_result, None,
3681                "memory=None + DMA unavailable → must NOT pad (would force \
3682                 image_with_stride into a DMA-only allocation that fails). \
3683                 Regression: padded_dma_pitch_for ignored is_dma_available()."
3684            );
3685        }
3686    }
3687
3688    // Synthesise a small greyscale PNG in memory at `(width, height)` with a
3689    // deterministic ramp pattern so multiple tests can cross-check output
3690    // without bundling an extra fixture file.
3691    fn make_grey_png(width: u32, height: u32) -> Vec<u8> {
3692        let mut bytes = Vec::with_capacity((width * height) as usize);
3693        for y in 0..height {
3694            for x in 0..width {
3695                bytes.push(((x + y) & 0xFF) as u8);
3696            }
3697        }
3698        let img = image::GrayImage::from_vec(width, height, bytes).unwrap();
3699        let mut buf = Vec::new();
3700        img.write_to(&mut std::io::Cursor::new(&mut buf), image::ImageFormat::Png)
3701            .unwrap();
3702        buf
3703    }
3704
3705    /// Greyscale PNG with a width that forces a pitch-misaligned natural
3706    /// row stride (612 bytes is not a multiple of the 64-byte GPU pitch
3707    /// alignment) must still load via the pitch-padded DMA path. Gated on
3708    /// DMA availability because `image_with_stride` is DMA-only.
3709    #[test]
3710    #[cfg(target_os = "linux")]
3711    fn test_load_png_grey_misaligned_width_dma() {
3712        use edgefirst_tensor::is_dma_available;
3713        if !is_dma_available() {
3714            eprintln!("SKIPPED: test_load_png_grey_misaligned_width_dma — DMA not available");
3715            return;
3716        }
3717        let png = make_grey_png(612, 388);
3718        let loaded = crate::load_png(&png, Some(PixelFormat::Grey), None).unwrap();
3719        assert_eq!(loaded.width(), Some(612));
3720        assert_eq!(loaded.height(), Some(388));
3721        assert_eq!(loaded.format(), Some(PixelFormat::Grey));
3722
3723        // Round-trip pixels — natural-pitch DMA-BUFs pad the stride so we
3724        // must indirect through row_stride() rather than assume width.
3725        let map = loaded.as_u8().unwrap().map().unwrap();
3726        let stride = loaded.row_stride().unwrap_or(612);
3727        assert!(stride >= 612);
3728        let bytes: &[u8] = &map;
3729        for y in 0..388usize {
3730            for x in 0..612usize {
3731                let expected = ((x + y) & 0xFF) as u8;
3732                let got = bytes[y * stride + x];
3733                assert_eq!(
3734                    got, expected,
3735                    "grey png mismatch at ({x},{y}): got {got} expected {expected}"
3736                );
3737            }
3738        }
3739    }
3740
3741    /// Greyscale PNG loaded with explicit Mem backing — runs on any
3742    /// platform (no DMA permission requirement) and covers the
3743    /// decoder-native Luma → Grey no-conversion path.
3744    #[test]
3745    fn test_load_png_grey_mem() {
3746        use edgefirst_tensor::TensorMemory;
3747        let png = make_grey_png(612, 100);
3748        let loaded =
3749            crate::load_png(&png, Some(PixelFormat::Grey), Some(TensorMemory::Mem)).unwrap();
3750        assert_eq!(loaded.width(), Some(612));
3751        assert_eq!(loaded.height(), Some(100));
3752        assert_eq!(loaded.format(), Some(PixelFormat::Grey));
3753        let map = loaded.as_u8().unwrap().map().unwrap();
3754        let bytes: &[u8] = &map;
3755        // Mem allocation uses the natural pitch — 612 bytes per row, exact.
3756        assert_eq!(bytes.len(), 612 * 100);
3757        for y in 0..100 {
3758            for x in 0..612 {
3759                assert_eq!(bytes[y * 612 + x], ((x + y) & 0xFF) as u8);
3760            }
3761        }
3762    }
3763
3764    /// Greyscale PNG decoded into RGB — exercises the decoder-colorspace
3765    /// mismatch path (Luma → Rgb via CPU converter). Uses Mem memory to
3766    /// stay portable to host-side test environments.
3767    #[test]
3768    fn test_load_png_grey_to_rgb_mem() {
3769        use edgefirst_tensor::TensorMemory;
3770        let png = make_grey_png(620, 240);
3771        let loaded =
3772            crate::load_png(&png, Some(PixelFormat::Rgb), Some(TensorMemory::Mem)).unwrap();
3773        assert_eq!(loaded.width(), Some(620));
3774        assert_eq!(loaded.height(), Some(240));
3775        assert_eq!(loaded.format(), Some(PixelFormat::Rgb));
3776
3777        // Greyscale promoted to RGB replicates luma into each channel.
3778        let map = loaded.as_u8().unwrap().map().unwrap();
3779        let bytes: &[u8] = &map;
3780        for (x, y) in [(0usize, 0usize), (100, 50), (619, 239)] {
3781            let expected = ((x + y) & 0xFF) as u8;
3782            let off = (y * 620 + x) * 3;
3783            assert_eq!(bytes[off], expected, "R@{x},{y}");
3784            assert_eq!(bytes[off + 1], expected, "G@{x},{y}");
3785            assert_eq!(bytes[off + 2], expected, "B@{x},{y}");
3786        }
3787    }
3788
3789    #[test]
3790    #[cfg(target_os = "linux")]
3791    fn test_g2d_resize() {
3792        if !is_g2d_available() {
3793            eprintln!("SKIPPED: test_g2d_resize - G2D library (libg2d.so.2) not available");
3794            return;
3795        }
3796        if !is_dma_available() {
3797            eprintln!(
3798                "SKIPPED: test_g2d_resize - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3799            );
3800            return;
3801        }
3802
3803        let dst_width = 640;
3804        let dst_height = 360;
3805        let file = include_bytes!(concat!(
3806            env!("CARGO_MANIFEST_DIR"),
3807            "/../../testdata/zidane.jpg"
3808        ))
3809        .to_vec();
3810        let src =
3811            crate::load_image(&file, Some(PixelFormat::Rgba), Some(TensorMemory::Dma)).unwrap();
3812
3813        let g2d_dst = TensorDyn::image(
3814            dst_width,
3815            dst_height,
3816            PixelFormat::Rgba,
3817            DType::U8,
3818            Some(TensorMemory::Dma),
3819        )
3820        .unwrap();
3821        let mut g2d_converter = G2DProcessor::new().unwrap();
3822        let (result, src, g2d_dst) = convert_img(
3823            &mut g2d_converter,
3824            src,
3825            g2d_dst,
3826            Rotation::None,
3827            Flip::None,
3828            Crop::no_crop(),
3829        );
3830        result.unwrap();
3831
3832        let cpu_dst =
3833            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3834        let mut cpu_converter = CPUProcessor::new();
3835        let (result, _src, cpu_dst) = convert_img(
3836            &mut cpu_converter,
3837            src,
3838            cpu_dst,
3839            Rotation::None,
3840            Flip::None,
3841            Crop::no_crop(),
3842        );
3843        result.unwrap();
3844
3845        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
3846    }
3847
3848    #[test]
3849    #[cfg(target_os = "linux")]
3850    #[cfg(feature = "opengl")]
3851    fn test_opengl_resize() {
3852        if !is_opengl_available() {
3853            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3854            return;
3855        }
3856
3857        let dst_width = 640;
3858        let dst_height = 360;
3859        let file = include_bytes!(concat!(
3860            env!("CARGO_MANIFEST_DIR"),
3861            "/../../testdata/zidane.jpg"
3862        ))
3863        .to_vec();
3864        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3865
3866        let cpu_dst =
3867            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3868        let mut cpu_converter = CPUProcessor::new();
3869        let (result, src, cpu_dst) = convert_img(
3870            &mut cpu_converter,
3871            src,
3872            cpu_dst,
3873            Rotation::None,
3874            Flip::None,
3875            Crop::no_crop(),
3876        );
3877        result.unwrap();
3878
3879        let mut src = src;
3880        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3881
3882        for _ in 0..5 {
3883            let gl_dst =
3884                TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None)
3885                    .unwrap();
3886            let (result, src_back, gl_dst) = convert_img(
3887                &mut gl_converter,
3888                src,
3889                gl_dst,
3890                Rotation::None,
3891                Flip::None,
3892                Crop::no_crop(),
3893            );
3894            result.unwrap();
3895            src = src_back;
3896
3897            compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3898        }
3899    }
3900
3901    #[test]
3902    #[cfg(target_os = "linux")]
3903    #[cfg(feature = "opengl")]
3904    fn test_opengl_10_threads() {
3905        if !is_opengl_available() {
3906            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3907            return;
3908        }
3909
3910        let handles: Vec<_> = (0..10)
3911            .map(|i| {
3912                std::thread::Builder::new()
3913                    .name(format!("Thread {i}"))
3914                    .spawn(test_opengl_resize)
3915                    .unwrap()
3916            })
3917            .collect();
3918        handles.into_iter().for_each(|h| {
3919            if let Err(e) = h.join() {
3920                std::panic::resume_unwind(e)
3921            }
3922        });
3923    }
3924
3925    #[test]
3926    #[cfg(target_os = "linux")]
3927    #[cfg(feature = "opengl")]
3928    fn test_opengl_grey() {
3929        if !is_opengl_available() {
3930            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3931            return;
3932        }
3933
3934        let img = crate::load_image(
3935            include_bytes!(concat!(
3936                env!("CARGO_MANIFEST_DIR"),
3937                "/../../testdata/grey.jpg"
3938            )),
3939            Some(PixelFormat::Grey),
3940            None,
3941        )
3942        .unwrap();
3943
3944        let gl_dst = TensorDyn::image(640, 640, PixelFormat::Grey, DType::U8, None).unwrap();
3945        let cpu_dst = TensorDyn::image(640, 640, PixelFormat::Grey, DType::U8, None).unwrap();
3946
3947        let mut converter = CPUProcessor::new();
3948
3949        let (result, img, cpu_dst) = convert_img(
3950            &mut converter,
3951            img,
3952            cpu_dst,
3953            Rotation::None,
3954            Flip::None,
3955            Crop::no_crop(),
3956        );
3957        result.unwrap();
3958
3959        let mut gl = GLProcessorThreaded::new(None).unwrap();
3960        let (result, _img, gl_dst) = convert_img(
3961            &mut gl,
3962            img,
3963            gl_dst,
3964            Rotation::None,
3965            Flip::None,
3966            Crop::no_crop(),
3967        );
3968        result.unwrap();
3969
3970        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3971    }
3972
3973    #[test]
3974    #[cfg(target_os = "linux")]
3975    fn test_g2d_src_crop() {
3976        if !is_g2d_available() {
3977            eprintln!("SKIPPED: test_g2d_src_crop - G2D library (libg2d.so.2) not available");
3978            return;
3979        }
3980        if !is_dma_available() {
3981            eprintln!(
3982                "SKIPPED: test_g2d_src_crop - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3983            );
3984            return;
3985        }
3986
3987        let dst_width = 640;
3988        let dst_height = 640;
3989        let file = include_bytes!(concat!(
3990            env!("CARGO_MANIFEST_DIR"),
3991            "/../../testdata/zidane.jpg"
3992        ))
3993        .to_vec();
3994        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3995
3996        let cpu_dst =
3997            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3998        let mut cpu_converter = CPUProcessor::new();
3999        let crop = Crop {
4000            src_rect: Some(Rect {
4001                left: 0,
4002                top: 0,
4003                width: 640,
4004                height: 360,
4005            }),
4006            dst_rect: None,
4007            dst_color: None,
4008        };
4009        let (result, src, cpu_dst) = convert_img(
4010            &mut cpu_converter,
4011            src,
4012            cpu_dst,
4013            Rotation::None,
4014            Flip::None,
4015            crop,
4016        );
4017        result.unwrap();
4018
4019        let g2d_dst =
4020            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4021        let mut g2d_converter = G2DProcessor::new().unwrap();
4022        let (result, _src, g2d_dst) = convert_img(
4023            &mut g2d_converter,
4024            src,
4025            g2d_dst,
4026            Rotation::None,
4027            Flip::None,
4028            crop,
4029        );
4030        result.unwrap();
4031
4032        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
4033    }
4034
4035    #[test]
4036    #[cfg(target_os = "linux")]
4037    fn test_g2d_dst_crop() {
4038        if !is_g2d_available() {
4039            eprintln!("SKIPPED: test_g2d_dst_crop - G2D library (libg2d.so.2) not available");
4040            return;
4041        }
4042        if !is_dma_available() {
4043            eprintln!(
4044                "SKIPPED: test_g2d_dst_crop - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4045            );
4046            return;
4047        }
4048
4049        let dst_width = 640;
4050        let dst_height = 640;
4051        let file = include_bytes!(concat!(
4052            env!("CARGO_MANIFEST_DIR"),
4053            "/../../testdata/zidane.jpg"
4054        ))
4055        .to_vec();
4056        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
4057
4058        let cpu_dst =
4059            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4060        let mut cpu_converter = CPUProcessor::new();
4061        let crop = Crop {
4062            src_rect: None,
4063            dst_rect: Some(Rect::new(100, 100, 512, 288)),
4064            dst_color: None,
4065        };
4066        let (result, src, cpu_dst) = convert_img(
4067            &mut cpu_converter,
4068            src,
4069            cpu_dst,
4070            Rotation::None,
4071            Flip::None,
4072            crop,
4073        );
4074        result.unwrap();
4075
4076        let g2d_dst =
4077            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4078        let mut g2d_converter = G2DProcessor::new().unwrap();
4079        let (result, _src, g2d_dst) = convert_img(
4080            &mut g2d_converter,
4081            src,
4082            g2d_dst,
4083            Rotation::None,
4084            Flip::None,
4085            crop,
4086        );
4087        result.unwrap();
4088
4089        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
4090    }
4091
4092    #[test]
4093    #[cfg(target_os = "linux")]
4094    fn test_g2d_all_rgba() {
4095        if !is_g2d_available() {
4096            eprintln!("SKIPPED: test_g2d_all_rgba - G2D library (libg2d.so.2) not available");
4097            return;
4098        }
4099        if !is_dma_available() {
4100            eprintln!(
4101                "SKIPPED: test_g2d_all_rgba - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4102            );
4103            return;
4104        }
4105
4106        let dst_width = 640;
4107        let dst_height = 640;
4108        let file = include_bytes!(concat!(
4109            env!("CARGO_MANIFEST_DIR"),
4110            "/../../testdata/zidane.jpg"
4111        ))
4112        .to_vec();
4113        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
4114        let src_dyn = src;
4115
4116        let mut cpu_dst =
4117            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4118        let mut cpu_converter = CPUProcessor::new();
4119        let mut g2d_dst =
4120            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4121        let mut g2d_converter = G2DProcessor::new().unwrap();
4122
4123        let crop = Crop {
4124            src_rect: Some(Rect::new(50, 120, 1024, 576)),
4125            dst_rect: Some(Rect::new(100, 100, 512, 288)),
4126            dst_color: None,
4127        };
4128
4129        for rot in [
4130            Rotation::None,
4131            Rotation::Clockwise90,
4132            Rotation::Rotate180,
4133            Rotation::CounterClockwise90,
4134        ] {
4135            cpu_dst
4136                .as_u8()
4137                .unwrap()
4138                .map()
4139                .unwrap()
4140                .as_mut_slice()
4141                .fill(114);
4142            g2d_dst
4143                .as_u8()
4144                .unwrap()
4145                .map()
4146                .unwrap()
4147                .as_mut_slice()
4148                .fill(114);
4149            for flip in [Flip::None, Flip::Horizontal, Flip::Vertical] {
4150                let mut cpu_dst_dyn = cpu_dst;
4151                cpu_converter
4152                    .convert(&src_dyn, &mut cpu_dst_dyn, Rotation::None, Flip::None, crop)
4153                    .unwrap();
4154                cpu_dst = {
4155                    let mut __t = cpu_dst_dyn.into_u8().unwrap();
4156                    __t.set_format(PixelFormat::Rgba).unwrap();
4157                    TensorDyn::from(__t)
4158                };
4159
4160                let mut g2d_dst_dyn = g2d_dst;
4161                g2d_converter
4162                    .convert(&src_dyn, &mut g2d_dst_dyn, Rotation::None, Flip::None, crop)
4163                    .unwrap();
4164                g2d_dst = {
4165                    let mut __t = g2d_dst_dyn.into_u8().unwrap();
4166                    __t.set_format(PixelFormat::Rgba).unwrap();
4167                    TensorDyn::from(__t)
4168                };
4169
4170                compare_images(
4171                    &g2d_dst,
4172                    &cpu_dst,
4173                    0.98,
4174                    &format!("{} {:?} {:?}", function!(), rot, flip),
4175                );
4176            }
4177        }
4178    }
4179
4180    #[test]
4181    #[cfg(target_os = "linux")]
4182    #[cfg(feature = "opengl")]
4183    fn test_opengl_src_crop() {
4184        if !is_opengl_available() {
4185            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4186            return;
4187        }
4188
4189        let dst_width = 640;
4190        let dst_height = 360;
4191        let file = include_bytes!(concat!(
4192            env!("CARGO_MANIFEST_DIR"),
4193            "/../../testdata/zidane.jpg"
4194        ))
4195        .to_vec();
4196        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
4197        let crop = Crop {
4198            src_rect: Some(Rect {
4199                left: 320,
4200                top: 180,
4201                width: 1280 - 320,
4202                height: 720 - 180,
4203            }),
4204            dst_rect: None,
4205            dst_color: None,
4206        };
4207
4208        let cpu_dst =
4209            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4210        let mut cpu_converter = CPUProcessor::new();
4211        let (result, src, cpu_dst) = convert_img(
4212            &mut cpu_converter,
4213            src,
4214            cpu_dst,
4215            Rotation::None,
4216            Flip::None,
4217            crop,
4218        );
4219        result.unwrap();
4220
4221        let gl_dst =
4222            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4223        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4224        let (result, _src, gl_dst) = convert_img(
4225            &mut gl_converter,
4226            src,
4227            gl_dst,
4228            Rotation::None,
4229            Flip::None,
4230            crop,
4231        );
4232        result.unwrap();
4233
4234        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
4235    }
4236
4237    #[test]
4238    #[cfg(target_os = "linux")]
4239    #[cfg(feature = "opengl")]
4240    fn test_opengl_dst_crop() {
4241        if !is_opengl_available() {
4242            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4243            return;
4244        }
4245
4246        let dst_width = 640;
4247        let dst_height = 640;
4248        let file = include_bytes!(concat!(
4249            env!("CARGO_MANIFEST_DIR"),
4250            "/../../testdata/zidane.jpg"
4251        ))
4252        .to_vec();
4253        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
4254
4255        let cpu_dst =
4256            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4257        let mut cpu_converter = CPUProcessor::new();
4258        let crop = Crop {
4259            src_rect: None,
4260            dst_rect: Some(Rect::new(100, 100, 512, 288)),
4261            dst_color: None,
4262        };
4263        let (result, src, cpu_dst) = convert_img(
4264            &mut cpu_converter,
4265            src,
4266            cpu_dst,
4267            Rotation::None,
4268            Flip::None,
4269            crop,
4270        );
4271        result.unwrap();
4272
4273        let gl_dst =
4274            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4275        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4276        let (result, _src, gl_dst) = convert_img(
4277            &mut gl_converter,
4278            src,
4279            gl_dst,
4280            Rotation::None,
4281            Flip::None,
4282            crop,
4283        );
4284        result.unwrap();
4285
4286        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
4287    }
4288
4289    #[test]
4290    #[cfg(target_os = "linux")]
4291    #[cfg(feature = "opengl")]
4292    fn test_opengl_all_rgba() {
4293        if !is_opengl_available() {
4294            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4295            return;
4296        }
4297
4298        let dst_width = 640;
4299        let dst_height = 640;
4300        let file = include_bytes!(concat!(
4301            env!("CARGO_MANIFEST_DIR"),
4302            "/../../testdata/zidane.jpg"
4303        ))
4304        .to_vec();
4305
4306        let mut cpu_converter = CPUProcessor::new();
4307
4308        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4309
4310        let mut mem = vec![None, Some(TensorMemory::Mem), Some(TensorMemory::Shm)];
4311        if is_dma_available() {
4312            mem.push(Some(TensorMemory::Dma));
4313        }
4314        let crop = Crop {
4315            src_rect: Some(Rect::new(50, 120, 1024, 576)),
4316            dst_rect: Some(Rect::new(100, 100, 512, 288)),
4317            dst_color: None,
4318        };
4319        for m in mem {
4320            let src = crate::load_image(&file, Some(PixelFormat::Rgba), m).unwrap();
4321            let src_dyn = src;
4322
4323            for rot in [
4324                Rotation::None,
4325                Rotation::Clockwise90,
4326                Rotation::Rotate180,
4327                Rotation::CounterClockwise90,
4328            ] {
4329                for flip in [Flip::None, Flip::Horizontal, Flip::Vertical] {
4330                    let cpu_dst =
4331                        TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, m)
4332                            .unwrap();
4333                    let gl_dst =
4334                        TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, m)
4335                            .unwrap();
4336                    cpu_dst
4337                        .as_u8()
4338                        .unwrap()
4339                        .map()
4340                        .unwrap()
4341                        .as_mut_slice()
4342                        .fill(114);
4343                    gl_dst
4344                        .as_u8()
4345                        .unwrap()
4346                        .map()
4347                        .unwrap()
4348                        .as_mut_slice()
4349                        .fill(114);
4350
4351                    let mut cpu_dst_dyn = cpu_dst;
4352                    cpu_converter
4353                        .convert(&src_dyn, &mut cpu_dst_dyn, Rotation::None, Flip::None, crop)
4354                        .unwrap();
4355                    let cpu_dst = {
4356                        let mut __t = cpu_dst_dyn.into_u8().unwrap();
4357                        __t.set_format(PixelFormat::Rgba).unwrap();
4358                        TensorDyn::from(__t)
4359                    };
4360
4361                    let mut gl_dst_dyn = gl_dst;
4362                    gl_converter
4363                        .convert(&src_dyn, &mut gl_dst_dyn, Rotation::None, Flip::None, crop)
4364                        .map_err(|e| {
4365                            log::error!("error mem {m:?} rot {rot:?} error: {e:?}");
4366                            e
4367                        })
4368                        .unwrap();
4369                    let gl_dst = {
4370                        let mut __t = gl_dst_dyn.into_u8().unwrap();
4371                        __t.set_format(PixelFormat::Rgba).unwrap();
4372                        TensorDyn::from(__t)
4373                    };
4374
4375                    compare_images(
4376                        &gl_dst,
4377                        &cpu_dst,
4378                        0.98,
4379                        &format!("{} {:?} {:?}", function!(), rot, flip),
4380                    );
4381                }
4382            }
4383        }
4384    }
4385
4386    #[test]
4387    #[cfg(target_os = "linux")]
4388    fn test_cpu_rotate() {
4389        for rot in [
4390            Rotation::Clockwise90,
4391            Rotation::Rotate180,
4392            Rotation::CounterClockwise90,
4393        ] {
4394            test_cpu_rotate_(rot);
4395        }
4396    }
4397
4398    #[cfg(target_os = "linux")]
4399    fn test_cpu_rotate_(rot: Rotation) {
4400        // This test rotates the image 4 times and checks that the image was returned to
4401        // be the same Currently doesn't check if rotations actually rotated in
4402        // right direction
4403        let file = include_bytes!(concat!(
4404            env!("CARGO_MANIFEST_DIR"),
4405            "/../../testdata/zidane.jpg"
4406        ))
4407        .to_vec();
4408
4409        let unchanged_src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
4410        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
4411
4412        let (dst_width, dst_height) = match rot {
4413            Rotation::None | Rotation::Rotate180 => (src.width().unwrap(), src.height().unwrap()),
4414            Rotation::Clockwise90 | Rotation::CounterClockwise90 => {
4415                (src.height().unwrap(), src.width().unwrap())
4416            }
4417        };
4418
4419        let cpu_dst =
4420            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4421        let mut cpu_converter = CPUProcessor::new();
4422
4423        // After rotating 4 times, the image should be the same as the original
4424
4425        let (result, src, cpu_dst) = convert_img(
4426            &mut cpu_converter,
4427            src,
4428            cpu_dst,
4429            rot,
4430            Flip::None,
4431            Crop::no_crop(),
4432        );
4433        result.unwrap();
4434
4435        let (result, cpu_dst, src) = convert_img(
4436            &mut cpu_converter,
4437            cpu_dst,
4438            src,
4439            rot,
4440            Flip::None,
4441            Crop::no_crop(),
4442        );
4443        result.unwrap();
4444
4445        let (result, src, cpu_dst) = convert_img(
4446            &mut cpu_converter,
4447            src,
4448            cpu_dst,
4449            rot,
4450            Flip::None,
4451            Crop::no_crop(),
4452        );
4453        result.unwrap();
4454
4455        let (result, _cpu_dst, src) = convert_img(
4456            &mut cpu_converter,
4457            cpu_dst,
4458            src,
4459            rot,
4460            Flip::None,
4461            Crop::no_crop(),
4462        );
4463        result.unwrap();
4464
4465        compare_images(&src, &unchanged_src, 0.98, function!());
4466    }
4467
4468    #[test]
4469    #[cfg(target_os = "linux")]
4470    #[cfg(feature = "opengl")]
4471    fn test_opengl_rotate() {
4472        if !is_opengl_available() {
4473            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4474            return;
4475        }
4476
4477        let size = (1280, 720);
4478        let mut mem = vec![None, Some(TensorMemory::Shm), Some(TensorMemory::Mem)];
4479
4480        if is_dma_available() {
4481            mem.push(Some(TensorMemory::Dma));
4482        }
4483        for m in mem {
4484            for rot in [
4485                Rotation::Clockwise90,
4486                Rotation::Rotate180,
4487                Rotation::CounterClockwise90,
4488            ] {
4489                test_opengl_rotate_(size, rot, m);
4490            }
4491        }
4492    }
4493
4494    #[cfg(target_os = "linux")]
4495    #[cfg(feature = "opengl")]
4496    fn test_opengl_rotate_(
4497        size: (usize, usize),
4498        rot: Rotation,
4499        tensor_memory: Option<TensorMemory>,
4500    ) {
4501        let (dst_width, dst_height) = match rot {
4502            Rotation::None | Rotation::Rotate180 => size,
4503            Rotation::Clockwise90 | Rotation::CounterClockwise90 => (size.1, size.0),
4504        };
4505
4506        let file = include_bytes!(concat!(
4507            env!("CARGO_MANIFEST_DIR"),
4508            "/../../testdata/zidane.jpg"
4509        ))
4510        .to_vec();
4511        let src = crate::load_image(&file, Some(PixelFormat::Rgba), tensor_memory).unwrap();
4512
4513        let cpu_dst =
4514            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4515        let mut cpu_converter = CPUProcessor::new();
4516
4517        let (result, mut src, cpu_dst) = convert_img(
4518            &mut cpu_converter,
4519            src,
4520            cpu_dst,
4521            rot,
4522            Flip::None,
4523            Crop::no_crop(),
4524        );
4525        result.unwrap();
4526
4527        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4528
4529        for _ in 0..5 {
4530            let gl_dst = TensorDyn::image(
4531                dst_width,
4532                dst_height,
4533                PixelFormat::Rgba,
4534                DType::U8,
4535                tensor_memory,
4536            )
4537            .unwrap();
4538            let (result, src_back, gl_dst) = convert_img(
4539                &mut gl_converter,
4540                src,
4541                gl_dst,
4542                rot,
4543                Flip::None,
4544                Crop::no_crop(),
4545            );
4546            result.unwrap();
4547            src = src_back;
4548            compare_images(&gl_dst, &cpu_dst, 0.98, function!());
4549        }
4550    }
4551
4552    #[test]
4553    #[cfg(target_os = "linux")]
4554    fn test_g2d_rotate() {
4555        if !is_g2d_available() {
4556            eprintln!("SKIPPED: test_g2d_rotate - G2D library (libg2d.so.2) not available");
4557            return;
4558        }
4559        if !is_dma_available() {
4560            eprintln!(
4561                "SKIPPED: test_g2d_rotate - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4562            );
4563            return;
4564        }
4565
4566        let size = (1280, 720);
4567        for rot in [
4568            Rotation::Clockwise90,
4569            Rotation::Rotate180,
4570            Rotation::CounterClockwise90,
4571        ] {
4572            test_g2d_rotate_(size, rot);
4573        }
4574    }
4575
4576    #[cfg(target_os = "linux")]
4577    fn test_g2d_rotate_(size: (usize, usize), rot: Rotation) {
4578        let (dst_width, dst_height) = match rot {
4579            Rotation::None | Rotation::Rotate180 => size,
4580            Rotation::Clockwise90 | Rotation::CounterClockwise90 => (size.1, size.0),
4581        };
4582
4583        let file = include_bytes!(concat!(
4584            env!("CARGO_MANIFEST_DIR"),
4585            "/../../testdata/zidane.jpg"
4586        ))
4587        .to_vec();
4588        let src =
4589            crate::load_image(&file, Some(PixelFormat::Rgba), Some(TensorMemory::Dma)).unwrap();
4590
4591        let cpu_dst =
4592            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4593        let mut cpu_converter = CPUProcessor::new();
4594
4595        let (result, src, cpu_dst) = convert_img(
4596            &mut cpu_converter,
4597            src,
4598            cpu_dst,
4599            rot,
4600            Flip::None,
4601            Crop::no_crop(),
4602        );
4603        result.unwrap();
4604
4605        let g2d_dst = TensorDyn::image(
4606            dst_width,
4607            dst_height,
4608            PixelFormat::Rgba,
4609            DType::U8,
4610            Some(TensorMemory::Dma),
4611        )
4612        .unwrap();
4613        let mut g2d_converter = G2DProcessor::new().unwrap();
4614
4615        let (result, _src, g2d_dst) = convert_img(
4616            &mut g2d_converter,
4617            src,
4618            g2d_dst,
4619            rot,
4620            Flip::None,
4621            Crop::no_crop(),
4622        );
4623        result.unwrap();
4624
4625        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
4626    }
4627
4628    #[test]
4629    fn test_rgba_to_yuyv_resize_cpu() {
4630        let src = load_bytes_to_tensor(
4631            1280,
4632            720,
4633            PixelFormat::Rgba,
4634            None,
4635            include_bytes!(concat!(
4636                env!("CARGO_MANIFEST_DIR"),
4637                "/../../testdata/camera720p.rgba"
4638            )),
4639        )
4640        .unwrap();
4641
4642        let (dst_width, dst_height) = (640, 360);
4643
4644        let dst =
4645            TensorDyn::image(dst_width, dst_height, PixelFormat::Yuyv, DType::U8, None).unwrap();
4646
4647        let dst_through_yuyv =
4648            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4649        let dst_direct =
4650            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4651
4652        let mut cpu_converter = CPUProcessor::new();
4653
4654        let (result, src, dst) = convert_img(
4655            &mut cpu_converter,
4656            src,
4657            dst,
4658            Rotation::None,
4659            Flip::None,
4660            Crop::no_crop(),
4661        );
4662        result.unwrap();
4663
4664        let (result, _dst, dst_through_yuyv) = convert_img(
4665            &mut cpu_converter,
4666            dst,
4667            dst_through_yuyv,
4668            Rotation::None,
4669            Flip::None,
4670            Crop::no_crop(),
4671        );
4672        result.unwrap();
4673
4674        let (result, _src, dst_direct) = convert_img(
4675            &mut cpu_converter,
4676            src,
4677            dst_direct,
4678            Rotation::None,
4679            Flip::None,
4680            Crop::no_crop(),
4681        );
4682        result.unwrap();
4683
4684        compare_images(&dst_through_yuyv, &dst_direct, 0.98, function!());
4685    }
4686
4687    #[test]
4688    #[cfg(target_os = "linux")]
4689    #[cfg(feature = "opengl")]
4690    #[ignore = "opengl doesn't support rendering to PixelFormat::Yuyv texture"]
4691    fn test_rgba_to_yuyv_resize_opengl() {
4692        if !is_opengl_available() {
4693            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4694            return;
4695        }
4696
4697        if !is_dma_available() {
4698            eprintln!(
4699                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
4700                function!()
4701            );
4702            return;
4703        }
4704
4705        let src = load_bytes_to_tensor(
4706            1280,
4707            720,
4708            PixelFormat::Rgba,
4709            None,
4710            include_bytes!(concat!(
4711                env!("CARGO_MANIFEST_DIR"),
4712                "/../../testdata/camera720p.rgba"
4713            )),
4714        )
4715        .unwrap();
4716
4717        let (dst_width, dst_height) = (640, 360);
4718
4719        let dst = TensorDyn::image(
4720            dst_width,
4721            dst_height,
4722            PixelFormat::Yuyv,
4723            DType::U8,
4724            Some(TensorMemory::Dma),
4725        )
4726        .unwrap();
4727
4728        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4729
4730        let (result, src, dst) = convert_img(
4731            &mut gl_converter,
4732            src,
4733            dst,
4734            Rotation::None,
4735            Flip::None,
4736            Crop::new()
4737                .with_dst_rect(Some(Rect::new(100, 100, 100, 100)))
4738                .with_dst_color(Some([255, 255, 255, 255])),
4739        );
4740        result.unwrap();
4741
4742        std::fs::write(
4743            "rgba_to_yuyv_opengl.yuyv",
4744            dst.as_u8().unwrap().map().unwrap().as_slice(),
4745        )
4746        .unwrap();
4747        let cpu_dst = TensorDyn::image(
4748            dst_width,
4749            dst_height,
4750            PixelFormat::Yuyv,
4751            DType::U8,
4752            Some(TensorMemory::Dma),
4753        )
4754        .unwrap();
4755        let (result, _src, cpu_dst) = convert_img(
4756            &mut CPUProcessor::new(),
4757            src,
4758            cpu_dst,
4759            Rotation::None,
4760            Flip::None,
4761            Crop::no_crop(),
4762        );
4763        result.unwrap();
4764
4765        compare_images_convert_to_rgb(&dst, &cpu_dst, 0.98, function!());
4766    }
4767
4768    #[test]
4769    #[cfg(target_os = "linux")]
4770    fn test_rgba_to_yuyv_resize_g2d() {
4771        if !is_g2d_available() {
4772            eprintln!(
4773                "SKIPPED: test_rgba_to_yuyv_resize_g2d - G2D library (libg2d.so.2) not available"
4774            );
4775            return;
4776        }
4777        if !is_dma_available() {
4778            eprintln!(
4779                "SKIPPED: test_rgba_to_yuyv_resize_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4780            );
4781            return;
4782        }
4783
4784        let src = load_bytes_to_tensor(
4785            1280,
4786            720,
4787            PixelFormat::Rgba,
4788            Some(TensorMemory::Dma),
4789            include_bytes!(concat!(
4790                env!("CARGO_MANIFEST_DIR"),
4791                "/../../testdata/camera720p.rgba"
4792            )),
4793        )
4794        .unwrap();
4795
4796        let (dst_width, dst_height) = (1280, 720);
4797
4798        let cpu_dst = TensorDyn::image(
4799            dst_width,
4800            dst_height,
4801            PixelFormat::Yuyv,
4802            DType::U8,
4803            Some(TensorMemory::Dma),
4804        )
4805        .unwrap();
4806
4807        let g2d_dst = TensorDyn::image(
4808            dst_width,
4809            dst_height,
4810            PixelFormat::Yuyv,
4811            DType::U8,
4812            Some(TensorMemory::Dma),
4813        )
4814        .unwrap();
4815
4816        let mut g2d_converter = G2DProcessor::new().unwrap();
4817        let crop = Crop {
4818            src_rect: None,
4819            dst_rect: Some(Rect::new(100, 100, 2, 2)),
4820            dst_color: None,
4821        };
4822
4823        g2d_dst
4824            .as_u8()
4825            .unwrap()
4826            .map()
4827            .unwrap()
4828            .as_mut_slice()
4829            .fill(128);
4830        let (result, src, g2d_dst) = convert_img(
4831            &mut g2d_converter,
4832            src,
4833            g2d_dst,
4834            Rotation::None,
4835            Flip::None,
4836            crop,
4837        );
4838        result.unwrap();
4839
4840        let cpu_dst_img = cpu_dst;
4841        cpu_dst_img
4842            .as_u8()
4843            .unwrap()
4844            .map()
4845            .unwrap()
4846            .as_mut_slice()
4847            .fill(128);
4848        let (result, _src, cpu_dst) = convert_img(
4849            &mut CPUProcessor::new(),
4850            src,
4851            cpu_dst_img,
4852            Rotation::None,
4853            Flip::None,
4854            crop,
4855        );
4856        result.unwrap();
4857
4858        compare_images_convert_to_rgb(&cpu_dst, &g2d_dst, 0.98, function!());
4859    }
4860
4861    #[test]
4862    fn test_yuyv_to_rgba_cpu() {
4863        let file = include_bytes!(concat!(
4864            env!("CARGO_MANIFEST_DIR"),
4865            "/../../testdata/camera720p.yuyv"
4866        ))
4867        .to_vec();
4868        let src = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
4869        src.as_u8()
4870            .unwrap()
4871            .map()
4872            .unwrap()
4873            .as_mut_slice()
4874            .copy_from_slice(&file);
4875
4876        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4877        let mut cpu_converter = CPUProcessor::new();
4878
4879        let (result, _src, dst) = convert_img(
4880            &mut cpu_converter,
4881            src,
4882            dst,
4883            Rotation::None,
4884            Flip::None,
4885            Crop::no_crop(),
4886        );
4887        result.unwrap();
4888
4889        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4890        target_image
4891            .as_u8()
4892            .unwrap()
4893            .map()
4894            .unwrap()
4895            .as_mut_slice()
4896            .copy_from_slice(include_bytes!(concat!(
4897                env!("CARGO_MANIFEST_DIR"),
4898                "/../../testdata/camera720p.rgba"
4899            )));
4900
4901        compare_images(&dst, &target_image, 0.98, function!());
4902    }
4903
4904    #[test]
4905    fn test_yuyv_to_rgb_cpu() {
4906        let file = include_bytes!(concat!(
4907            env!("CARGO_MANIFEST_DIR"),
4908            "/../../testdata/camera720p.yuyv"
4909        ))
4910        .to_vec();
4911        let src = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
4912        src.as_u8()
4913            .unwrap()
4914            .map()
4915            .unwrap()
4916            .as_mut_slice()
4917            .copy_from_slice(&file);
4918
4919        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4920        let mut cpu_converter = CPUProcessor::new();
4921
4922        let (result, _src, dst) = convert_img(
4923            &mut cpu_converter,
4924            src,
4925            dst,
4926            Rotation::None,
4927            Flip::None,
4928            Crop::no_crop(),
4929        );
4930        result.unwrap();
4931
4932        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4933        target_image
4934            .as_u8()
4935            .unwrap()
4936            .map()
4937            .unwrap()
4938            .as_mut_slice()
4939            .as_chunks_mut::<3>()
4940            .0
4941            .iter_mut()
4942            .zip(
4943                include_bytes!(concat!(
4944                    env!("CARGO_MANIFEST_DIR"),
4945                    "/../../testdata/camera720p.rgba"
4946                ))
4947                .as_chunks::<4>()
4948                .0,
4949            )
4950            .for_each(|(dst, src)| *dst = [src[0], src[1], src[2]]);
4951
4952        compare_images(&dst, &target_image, 0.98, function!());
4953    }
4954
4955    #[test]
4956    #[cfg(target_os = "linux")]
4957    fn test_yuyv_to_rgba_g2d() {
4958        if !is_g2d_available() {
4959            eprintln!("SKIPPED: test_yuyv_to_rgba_g2d - G2D library (libg2d.so.2) not available");
4960            return;
4961        }
4962        if !is_dma_available() {
4963            eprintln!(
4964                "SKIPPED: test_yuyv_to_rgba_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4965            );
4966            return;
4967        }
4968
4969        let src = load_bytes_to_tensor(
4970            1280,
4971            720,
4972            PixelFormat::Yuyv,
4973            None,
4974            include_bytes!(concat!(
4975                env!("CARGO_MANIFEST_DIR"),
4976                "/../../testdata/camera720p.yuyv"
4977            )),
4978        )
4979        .unwrap();
4980
4981        let dst = TensorDyn::image(
4982            1280,
4983            720,
4984            PixelFormat::Rgba,
4985            DType::U8,
4986            Some(TensorMemory::Dma),
4987        )
4988        .unwrap();
4989        let mut g2d_converter = G2DProcessor::new().unwrap();
4990
4991        let (result, _src, dst) = convert_img(
4992            &mut g2d_converter,
4993            src,
4994            dst,
4995            Rotation::None,
4996            Flip::None,
4997            Crop::no_crop(),
4998        );
4999        result.unwrap();
5000
5001        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5002        target_image
5003            .as_u8()
5004            .unwrap()
5005            .map()
5006            .unwrap()
5007            .as_mut_slice()
5008            .copy_from_slice(include_bytes!(concat!(
5009                env!("CARGO_MANIFEST_DIR"),
5010                "/../../testdata/camera720p.rgba"
5011            )));
5012
5013        compare_images(&dst, &target_image, 0.98, function!());
5014    }
5015
5016    #[test]
5017    #[cfg(target_os = "linux")]
5018    #[cfg(feature = "opengl")]
5019    fn test_yuyv_to_rgba_opengl() {
5020        if !is_opengl_available() {
5021            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5022            return;
5023        }
5024        if !is_dma_available() {
5025            eprintln!(
5026                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
5027                function!()
5028            );
5029            return;
5030        }
5031
5032        let src = load_bytes_to_tensor(
5033            1280,
5034            720,
5035            PixelFormat::Yuyv,
5036            Some(TensorMemory::Dma),
5037            include_bytes!(concat!(
5038                env!("CARGO_MANIFEST_DIR"),
5039                "/../../testdata/camera720p.yuyv"
5040            )),
5041        )
5042        .unwrap();
5043
5044        let dst = TensorDyn::image(
5045            1280,
5046            720,
5047            PixelFormat::Rgba,
5048            DType::U8,
5049            Some(TensorMemory::Dma),
5050        )
5051        .unwrap();
5052        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
5053
5054        let (result, _src, dst) = convert_img(
5055            &mut gl_converter,
5056            src,
5057            dst,
5058            Rotation::None,
5059            Flip::None,
5060            Crop::no_crop(),
5061        );
5062        result.unwrap();
5063
5064        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5065        target_image
5066            .as_u8()
5067            .unwrap()
5068            .map()
5069            .unwrap()
5070            .as_mut_slice()
5071            .copy_from_slice(include_bytes!(concat!(
5072                env!("CARGO_MANIFEST_DIR"),
5073                "/../../testdata/camera720p.rgba"
5074            )));
5075
5076        compare_images(&dst, &target_image, 0.98, function!());
5077    }
5078
5079    #[test]
5080    #[cfg(target_os = "linux")]
5081    fn test_yuyv_to_rgb_g2d() {
5082        if !is_g2d_available() {
5083            eprintln!("SKIPPED: test_yuyv_to_rgb_g2d - G2D library (libg2d.so.2) not available");
5084            return;
5085        }
5086        if !is_dma_available() {
5087            eprintln!(
5088                "SKIPPED: test_yuyv_to_rgb_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
5089            );
5090            return;
5091        }
5092
5093        let src = load_bytes_to_tensor(
5094            1280,
5095            720,
5096            PixelFormat::Yuyv,
5097            None,
5098            include_bytes!(concat!(
5099                env!("CARGO_MANIFEST_DIR"),
5100                "/../../testdata/camera720p.yuyv"
5101            )),
5102        )
5103        .unwrap();
5104
5105        let g2d_dst = TensorDyn::image(
5106            1280,
5107            720,
5108            PixelFormat::Rgb,
5109            DType::U8,
5110            Some(TensorMemory::Dma),
5111        )
5112        .unwrap();
5113        let mut g2d_converter = G2DProcessor::new().unwrap();
5114
5115        let (result, src, g2d_dst) = convert_img(
5116            &mut g2d_converter,
5117            src,
5118            g2d_dst,
5119            Rotation::None,
5120            Flip::None,
5121            Crop::no_crop(),
5122        );
5123        result.unwrap();
5124
5125        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5126        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
5127
5128        let (result, _src, cpu_dst) = convert_img(
5129            &mut cpu_converter,
5130            src,
5131            cpu_dst,
5132            Rotation::None,
5133            Flip::None,
5134            Crop::no_crop(),
5135        );
5136        result.unwrap();
5137
5138        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
5139    }
5140
5141    #[test]
5142    #[cfg(target_os = "linux")]
5143    fn test_yuyv_to_yuyv_resize_g2d() {
5144        if !is_g2d_available() {
5145            eprintln!(
5146                "SKIPPED: test_yuyv_to_yuyv_resize_g2d - G2D library (libg2d.so.2) not available"
5147            );
5148            return;
5149        }
5150        if !is_dma_available() {
5151            eprintln!(
5152                "SKIPPED: test_yuyv_to_yuyv_resize_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
5153            );
5154            return;
5155        }
5156
5157        let src = load_bytes_to_tensor(
5158            1280,
5159            720,
5160            PixelFormat::Yuyv,
5161            None,
5162            include_bytes!(concat!(
5163                env!("CARGO_MANIFEST_DIR"),
5164                "/../../testdata/camera720p.yuyv"
5165            )),
5166        )
5167        .unwrap();
5168
5169        let g2d_dst = TensorDyn::image(
5170            600,
5171            400,
5172            PixelFormat::Yuyv,
5173            DType::U8,
5174            Some(TensorMemory::Dma),
5175        )
5176        .unwrap();
5177        let mut g2d_converter = G2DProcessor::new().unwrap();
5178
5179        let (result, src, g2d_dst) = convert_img(
5180            &mut g2d_converter,
5181            src,
5182            g2d_dst,
5183            Rotation::None,
5184            Flip::None,
5185            Crop::no_crop(),
5186        );
5187        result.unwrap();
5188
5189        let cpu_dst = TensorDyn::image(600, 400, PixelFormat::Yuyv, DType::U8, None).unwrap();
5190        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
5191
5192        let (result, _src, cpu_dst) = convert_img(
5193            &mut cpu_converter,
5194            src,
5195            cpu_dst,
5196            Rotation::None,
5197            Flip::None,
5198            Crop::no_crop(),
5199        );
5200        result.unwrap();
5201
5202        // TODO: compare PixelFormat::Yuyv and PixelFormat::Yuyv images without having to convert them to PixelFormat::Rgb
5203        compare_images_convert_to_rgb(&g2d_dst, &cpu_dst, 0.98, function!());
5204    }
5205
5206    #[test]
5207    fn test_yuyv_to_rgba_resize_cpu() {
5208        let src = load_bytes_to_tensor(
5209            1280,
5210            720,
5211            PixelFormat::Yuyv,
5212            None,
5213            include_bytes!(concat!(
5214                env!("CARGO_MANIFEST_DIR"),
5215                "/../../testdata/camera720p.yuyv"
5216            )),
5217        )
5218        .unwrap();
5219
5220        let (dst_width, dst_height) = (960, 540);
5221
5222        let dst =
5223            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
5224        let mut cpu_converter = CPUProcessor::new();
5225
5226        let (result, _src, dst) = convert_img(
5227            &mut cpu_converter,
5228            src,
5229            dst,
5230            Rotation::None,
5231            Flip::None,
5232            Crop::no_crop(),
5233        );
5234        result.unwrap();
5235
5236        let dst_target =
5237            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
5238        let src_target = load_bytes_to_tensor(
5239            1280,
5240            720,
5241            PixelFormat::Rgba,
5242            None,
5243            include_bytes!(concat!(
5244                env!("CARGO_MANIFEST_DIR"),
5245                "/../../testdata/camera720p.rgba"
5246            )),
5247        )
5248        .unwrap();
5249        let (result, _src_target, dst_target) = convert_img(
5250            &mut cpu_converter,
5251            src_target,
5252            dst_target,
5253            Rotation::None,
5254            Flip::None,
5255            Crop::no_crop(),
5256        );
5257        result.unwrap();
5258
5259        compare_images(&dst, &dst_target, 0.98, function!());
5260    }
5261
5262    #[test]
5263    #[cfg(target_os = "linux")]
5264    fn test_yuyv_to_rgba_crop_flip_g2d() {
5265        if !is_g2d_available() {
5266            eprintln!(
5267                "SKIPPED: test_yuyv_to_rgba_crop_flip_g2d - G2D library (libg2d.so.2) not available"
5268            );
5269            return;
5270        }
5271        if !is_dma_available() {
5272            eprintln!(
5273                "SKIPPED: test_yuyv_to_rgba_crop_flip_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
5274            );
5275            return;
5276        }
5277
5278        let src = load_bytes_to_tensor(
5279            1280,
5280            720,
5281            PixelFormat::Yuyv,
5282            Some(TensorMemory::Dma),
5283            include_bytes!(concat!(
5284                env!("CARGO_MANIFEST_DIR"),
5285                "/../../testdata/camera720p.yuyv"
5286            )),
5287        )
5288        .unwrap();
5289
5290        let (dst_width, dst_height) = (640, 640);
5291
5292        let dst_g2d = TensorDyn::image(
5293            dst_width,
5294            dst_height,
5295            PixelFormat::Rgba,
5296            DType::U8,
5297            Some(TensorMemory::Dma),
5298        )
5299        .unwrap();
5300        let mut g2d_converter = G2DProcessor::new().unwrap();
5301        let crop = Crop {
5302            src_rect: Some(Rect {
5303                left: 20,
5304                top: 15,
5305                width: 400,
5306                height: 300,
5307            }),
5308            dst_rect: None,
5309            dst_color: None,
5310        };
5311
5312        let (result, src, dst_g2d) = convert_img(
5313            &mut g2d_converter,
5314            src,
5315            dst_g2d,
5316            Rotation::None,
5317            Flip::Horizontal,
5318            crop,
5319        );
5320        result.unwrap();
5321
5322        let dst_cpu = TensorDyn::image(
5323            dst_width,
5324            dst_height,
5325            PixelFormat::Rgba,
5326            DType::U8,
5327            Some(TensorMemory::Dma),
5328        )
5329        .unwrap();
5330        let mut cpu_converter = CPUProcessor::new();
5331
5332        let (result, _src, dst_cpu) = convert_img(
5333            &mut cpu_converter,
5334            src,
5335            dst_cpu,
5336            Rotation::None,
5337            Flip::Horizontal,
5338            crop,
5339        );
5340        result.unwrap();
5341        compare_images(&dst_g2d, &dst_cpu, 0.98, function!());
5342    }
5343
5344    #[test]
5345    #[cfg(target_os = "linux")]
5346    #[cfg(feature = "opengl")]
5347    fn test_yuyv_to_rgba_crop_flip_opengl() {
5348        if !is_opengl_available() {
5349            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5350            return;
5351        }
5352
5353        if !is_dma_available() {
5354            eprintln!(
5355                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
5356                function!()
5357            );
5358            return;
5359        }
5360
5361        let src = load_bytes_to_tensor(
5362            1280,
5363            720,
5364            PixelFormat::Yuyv,
5365            Some(TensorMemory::Dma),
5366            include_bytes!(concat!(
5367                env!("CARGO_MANIFEST_DIR"),
5368                "/../../testdata/camera720p.yuyv"
5369            )),
5370        )
5371        .unwrap();
5372
5373        let (dst_width, dst_height) = (640, 640);
5374
5375        let dst_gl = TensorDyn::image(
5376            dst_width,
5377            dst_height,
5378            PixelFormat::Rgba,
5379            DType::U8,
5380            Some(TensorMemory::Dma),
5381        )
5382        .unwrap();
5383        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
5384        let crop = Crop {
5385            src_rect: Some(Rect {
5386                left: 20,
5387                top: 15,
5388                width: 400,
5389                height: 300,
5390            }),
5391            dst_rect: None,
5392            dst_color: None,
5393        };
5394
5395        let (result, src, dst_gl) = convert_img(
5396            &mut gl_converter,
5397            src,
5398            dst_gl,
5399            Rotation::None,
5400            Flip::Horizontal,
5401            crop,
5402        );
5403        result.unwrap();
5404
5405        let dst_cpu = TensorDyn::image(
5406            dst_width,
5407            dst_height,
5408            PixelFormat::Rgba,
5409            DType::U8,
5410            Some(TensorMemory::Dma),
5411        )
5412        .unwrap();
5413        let mut cpu_converter = CPUProcessor::new();
5414
5415        let (result, _src, dst_cpu) = convert_img(
5416            &mut cpu_converter,
5417            src,
5418            dst_cpu,
5419            Rotation::None,
5420            Flip::Horizontal,
5421            crop,
5422        );
5423        result.unwrap();
5424        compare_images(&dst_gl, &dst_cpu, 0.98, function!());
5425    }
5426
5427    #[test]
5428    fn test_vyuy_to_rgba_cpu() {
5429        let file = include_bytes!(concat!(
5430            env!("CARGO_MANIFEST_DIR"),
5431            "/../../testdata/camera720p.vyuy"
5432        ))
5433        .to_vec();
5434        let src = TensorDyn::image(1280, 720, PixelFormat::Vyuy, DType::U8, None).unwrap();
5435        src.as_u8()
5436            .unwrap()
5437            .map()
5438            .unwrap()
5439            .as_mut_slice()
5440            .copy_from_slice(&file);
5441
5442        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5443        let mut cpu_converter = CPUProcessor::new();
5444
5445        let (result, _src, dst) = convert_img(
5446            &mut cpu_converter,
5447            src,
5448            dst,
5449            Rotation::None,
5450            Flip::None,
5451            Crop::no_crop(),
5452        );
5453        result.unwrap();
5454
5455        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5456        target_image
5457            .as_u8()
5458            .unwrap()
5459            .map()
5460            .unwrap()
5461            .as_mut_slice()
5462            .copy_from_slice(include_bytes!(concat!(
5463                env!("CARGO_MANIFEST_DIR"),
5464                "/../../testdata/camera720p.rgba"
5465            )));
5466
5467        compare_images(&dst, &target_image, 0.98, function!());
5468    }
5469
5470    #[test]
5471    fn test_vyuy_to_rgb_cpu() {
5472        let file = include_bytes!(concat!(
5473            env!("CARGO_MANIFEST_DIR"),
5474            "/../../testdata/camera720p.vyuy"
5475        ))
5476        .to_vec();
5477        let src = TensorDyn::image(1280, 720, PixelFormat::Vyuy, DType::U8, None).unwrap();
5478        src.as_u8()
5479            .unwrap()
5480            .map()
5481            .unwrap()
5482            .as_mut_slice()
5483            .copy_from_slice(&file);
5484
5485        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5486        let mut cpu_converter = CPUProcessor::new();
5487
5488        let (result, _src, dst) = convert_img(
5489            &mut cpu_converter,
5490            src,
5491            dst,
5492            Rotation::None,
5493            Flip::None,
5494            Crop::no_crop(),
5495        );
5496        result.unwrap();
5497
5498        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5499        target_image
5500            .as_u8()
5501            .unwrap()
5502            .map()
5503            .unwrap()
5504            .as_mut_slice()
5505            .as_chunks_mut::<3>()
5506            .0
5507            .iter_mut()
5508            .zip(
5509                include_bytes!(concat!(
5510                    env!("CARGO_MANIFEST_DIR"),
5511                    "/../../testdata/camera720p.rgba"
5512                ))
5513                .as_chunks::<4>()
5514                .0,
5515            )
5516            .for_each(|(dst, src)| *dst = [src[0], src[1], src[2]]);
5517
5518        compare_images(&dst, &target_image, 0.98, function!());
5519    }
5520
5521    #[test]
5522    #[cfg(target_os = "linux")]
5523    #[ignore = "G2D does not support VYUY; re-enable when hardware support is added"]
5524    fn test_vyuy_to_rgba_g2d() {
5525        if !is_g2d_available() {
5526            eprintln!("SKIPPED: test_vyuy_to_rgba_g2d - G2D library (libg2d.so.2) not available");
5527            return;
5528        }
5529        if !is_dma_available() {
5530            eprintln!(
5531                "SKIPPED: test_vyuy_to_rgba_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
5532            );
5533            return;
5534        }
5535
5536        let src = load_bytes_to_tensor(
5537            1280,
5538            720,
5539            PixelFormat::Vyuy,
5540            None,
5541            include_bytes!(concat!(
5542                env!("CARGO_MANIFEST_DIR"),
5543                "/../../testdata/camera720p.vyuy"
5544            )),
5545        )
5546        .unwrap();
5547
5548        let dst = TensorDyn::image(
5549            1280,
5550            720,
5551            PixelFormat::Rgba,
5552            DType::U8,
5553            Some(TensorMemory::Dma),
5554        )
5555        .unwrap();
5556        let mut g2d_converter = G2DProcessor::new().unwrap();
5557
5558        let (result, _src, dst) = convert_img(
5559            &mut g2d_converter,
5560            src,
5561            dst,
5562            Rotation::None,
5563            Flip::None,
5564            Crop::no_crop(),
5565        );
5566        match result {
5567            Err(Error::G2D(_)) => {
5568                eprintln!("SKIPPED: test_vyuy_to_rgba_g2d - G2D does not support PixelFormat::Vyuy format");
5569                return;
5570            }
5571            r => r.unwrap(),
5572        }
5573
5574        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5575        target_image
5576            .as_u8()
5577            .unwrap()
5578            .map()
5579            .unwrap()
5580            .as_mut_slice()
5581            .copy_from_slice(include_bytes!(concat!(
5582                env!("CARGO_MANIFEST_DIR"),
5583                "/../../testdata/camera720p.rgba"
5584            )));
5585
5586        compare_images(&dst, &target_image, 0.98, function!());
5587    }
5588
5589    #[test]
5590    #[cfg(target_os = "linux")]
5591    #[ignore = "G2D does not support VYUY; re-enable when hardware support is added"]
5592    fn test_vyuy_to_rgb_g2d() {
5593        if !is_g2d_available() {
5594            eprintln!("SKIPPED: test_vyuy_to_rgb_g2d - G2D library (libg2d.so.2) not available");
5595            return;
5596        }
5597        if !is_dma_available() {
5598            eprintln!(
5599                "SKIPPED: test_vyuy_to_rgb_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
5600            );
5601            return;
5602        }
5603
5604        let src = load_bytes_to_tensor(
5605            1280,
5606            720,
5607            PixelFormat::Vyuy,
5608            None,
5609            include_bytes!(concat!(
5610                env!("CARGO_MANIFEST_DIR"),
5611                "/../../testdata/camera720p.vyuy"
5612            )),
5613        )
5614        .unwrap();
5615
5616        let g2d_dst = TensorDyn::image(
5617            1280,
5618            720,
5619            PixelFormat::Rgb,
5620            DType::U8,
5621            Some(TensorMemory::Dma),
5622        )
5623        .unwrap();
5624        let mut g2d_converter = G2DProcessor::new().unwrap();
5625
5626        let (result, src, g2d_dst) = convert_img(
5627            &mut g2d_converter,
5628            src,
5629            g2d_dst,
5630            Rotation::None,
5631            Flip::None,
5632            Crop::no_crop(),
5633        );
5634        match result {
5635            Err(Error::G2D(_)) => {
5636                eprintln!(
5637                    "SKIPPED: test_vyuy_to_rgb_g2d - G2D does not support PixelFormat::Vyuy format"
5638                );
5639                return;
5640            }
5641            r => r.unwrap(),
5642        }
5643
5644        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5645        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
5646
5647        let (result, _src, cpu_dst) = convert_img(
5648            &mut cpu_converter,
5649            src,
5650            cpu_dst,
5651            Rotation::None,
5652            Flip::None,
5653            Crop::no_crop(),
5654        );
5655        result.unwrap();
5656
5657        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
5658    }
5659
5660    #[test]
5661    #[cfg(target_os = "linux")]
5662    #[cfg(feature = "opengl")]
5663    fn test_vyuy_to_rgba_opengl() {
5664        if !is_opengl_available() {
5665            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5666            return;
5667        }
5668        if !is_dma_available() {
5669            eprintln!(
5670                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
5671                function!()
5672            );
5673            return;
5674        }
5675
5676        let src = load_bytes_to_tensor(
5677            1280,
5678            720,
5679            PixelFormat::Vyuy,
5680            Some(TensorMemory::Dma),
5681            include_bytes!(concat!(
5682                env!("CARGO_MANIFEST_DIR"),
5683                "/../../testdata/camera720p.vyuy"
5684            )),
5685        )
5686        .unwrap();
5687
5688        let dst = TensorDyn::image(
5689            1280,
5690            720,
5691            PixelFormat::Rgba,
5692            DType::U8,
5693            Some(TensorMemory::Dma),
5694        )
5695        .unwrap();
5696        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
5697
5698        let (result, _src, dst) = convert_img(
5699            &mut gl_converter,
5700            src,
5701            dst,
5702            Rotation::None,
5703            Flip::None,
5704            Crop::no_crop(),
5705        );
5706        match result {
5707            Err(Error::NotSupported(_)) => {
5708                eprintln!(
5709                    "SKIPPED: {} - OpenGL does not support PixelFormat::Vyuy DMA format",
5710                    function!()
5711                );
5712                return;
5713            }
5714            r => r.unwrap(),
5715        }
5716
5717        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5718        target_image
5719            .as_u8()
5720            .unwrap()
5721            .map()
5722            .unwrap()
5723            .as_mut_slice()
5724            .copy_from_slice(include_bytes!(concat!(
5725                env!("CARGO_MANIFEST_DIR"),
5726                "/../../testdata/camera720p.rgba"
5727            )));
5728
5729        compare_images(&dst, &target_image, 0.98, function!());
5730    }
5731
5732    #[test]
5733    fn test_nv12_to_rgba_cpu() {
5734        let file = include_bytes!(concat!(
5735            env!("CARGO_MANIFEST_DIR"),
5736            "/../../testdata/zidane.nv12"
5737        ))
5738        .to_vec();
5739        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5740        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5741            .copy_from_slice(&file);
5742
5743        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5744        let mut cpu_converter = CPUProcessor::new();
5745
5746        let (result, _src, dst) = convert_img(
5747            &mut cpu_converter,
5748            src,
5749            dst,
5750            Rotation::None,
5751            Flip::None,
5752            Crop::no_crop(),
5753        );
5754        result.unwrap();
5755
5756        let target_image = crate::load_image(
5757            include_bytes!(concat!(
5758                env!("CARGO_MANIFEST_DIR"),
5759                "/../../testdata/zidane.jpg"
5760            )),
5761            Some(PixelFormat::Rgba),
5762            None,
5763        )
5764        .unwrap();
5765
5766        compare_images(&dst, &target_image, 0.98, function!());
5767    }
5768
5769    #[test]
5770    fn test_nv12_to_rgb_cpu() {
5771        let file = include_bytes!(concat!(
5772            env!("CARGO_MANIFEST_DIR"),
5773            "/../../testdata/zidane.nv12"
5774        ))
5775        .to_vec();
5776        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5777        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5778            .copy_from_slice(&file);
5779
5780        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5781        let mut cpu_converter = CPUProcessor::new();
5782
5783        let (result, _src, dst) = convert_img(
5784            &mut cpu_converter,
5785            src,
5786            dst,
5787            Rotation::None,
5788            Flip::None,
5789            Crop::no_crop(),
5790        );
5791        result.unwrap();
5792
5793        let target_image = crate::load_image(
5794            include_bytes!(concat!(
5795                env!("CARGO_MANIFEST_DIR"),
5796                "/../../testdata/zidane.jpg"
5797            )),
5798            Some(PixelFormat::Rgb),
5799            None,
5800        )
5801        .unwrap();
5802
5803        compare_images(&dst, &target_image, 0.98, function!());
5804    }
5805
5806    #[test]
5807    fn test_nv12_to_grey_cpu() {
5808        let file = include_bytes!(concat!(
5809            env!("CARGO_MANIFEST_DIR"),
5810            "/../../testdata/zidane.nv12"
5811        ))
5812        .to_vec();
5813        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5814        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5815            .copy_from_slice(&file);
5816
5817        let dst = TensorDyn::image(1280, 720, PixelFormat::Grey, DType::U8, None).unwrap();
5818        let mut cpu_converter = CPUProcessor::new();
5819
5820        let (result, _src, dst) = convert_img(
5821            &mut cpu_converter,
5822            src,
5823            dst,
5824            Rotation::None,
5825            Flip::None,
5826            Crop::no_crop(),
5827        );
5828        result.unwrap();
5829
5830        let target_image = crate::load_image(
5831            include_bytes!(concat!(
5832                env!("CARGO_MANIFEST_DIR"),
5833                "/../../testdata/zidane.jpg"
5834            )),
5835            Some(PixelFormat::Grey),
5836            None,
5837        )
5838        .unwrap();
5839
5840        compare_images(&dst, &target_image, 0.98, function!());
5841    }
5842
5843    #[test]
5844    fn test_nv12_to_yuyv_cpu() {
5845        let file = include_bytes!(concat!(
5846            env!("CARGO_MANIFEST_DIR"),
5847            "/../../testdata/zidane.nv12"
5848        ))
5849        .to_vec();
5850        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5851        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5852            .copy_from_slice(&file);
5853
5854        let dst = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
5855        let mut cpu_converter = CPUProcessor::new();
5856
5857        let (result, _src, dst) = convert_img(
5858            &mut cpu_converter,
5859            src,
5860            dst,
5861            Rotation::None,
5862            Flip::None,
5863            Crop::no_crop(),
5864        );
5865        result.unwrap();
5866
5867        let target_image = crate::load_image(
5868            include_bytes!(concat!(
5869                env!("CARGO_MANIFEST_DIR"),
5870                "/../../testdata/zidane.jpg"
5871            )),
5872            Some(PixelFormat::Rgb),
5873            None,
5874        )
5875        .unwrap();
5876
5877        compare_images_convert_to_rgb(&dst, &target_image, 0.98, function!());
5878    }
5879
5880    #[test]
5881    fn test_cpu_resize_planar_rgb() {
5882        let src = TensorDyn::image(4, 4, PixelFormat::Rgba, DType::U8, None).unwrap();
5883        #[rustfmt::skip]
5884        let src_image = [
5885                    255, 0, 0, 255,     0, 255, 0, 255,     0, 0, 255, 255,     255, 255, 0, 255,
5886                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5887                    0, 0, 255, 0,       0, 255, 255, 255,   255, 255, 0, 0,     0, 0, 0, 255,
5888                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5889        ];
5890        src.as_u8()
5891            .unwrap()
5892            .map()
5893            .unwrap()
5894            .as_mut_slice()
5895            .copy_from_slice(&src_image);
5896
5897        let cpu_dst = TensorDyn::image(5, 5, PixelFormat::PlanarRgb, DType::U8, None).unwrap();
5898        let mut cpu_converter = CPUProcessor::new();
5899
5900        let (result, _src, cpu_dst) = convert_img(
5901            &mut cpu_converter,
5902            src,
5903            cpu_dst,
5904            Rotation::None,
5905            Flip::None,
5906            Crop::new()
5907                .with_dst_rect(Some(Rect {
5908                    left: 1,
5909                    top: 1,
5910                    width: 4,
5911                    height: 4,
5912                }))
5913                .with_dst_color(Some([114, 114, 114, 255])),
5914        );
5915        result.unwrap();
5916
5917        #[rustfmt::skip]
5918        let expected_dst = [
5919            114, 114, 114, 114, 114,    114, 255, 0, 0, 255,    114, 255, 0, 255, 255,      114, 0, 0, 255, 0,        114, 255, 0, 255, 255,
5920            114, 114, 114, 114, 114,    114, 0, 255, 0, 255,    114, 0, 0, 0, 0,            114, 0, 255, 255, 0,      114, 0, 0, 0, 0,
5921            114, 114, 114, 114, 114,    114, 0, 0, 255, 0,      114, 0, 0, 255, 255,        114, 255, 255, 0, 0,      114, 0, 0, 255, 255,
5922        ];
5923
5924        assert_eq!(
5925            cpu_dst.as_u8().unwrap().map().unwrap().as_slice(),
5926            &expected_dst
5927        );
5928    }
5929
5930    #[test]
5931    fn test_cpu_resize_planar_rgba() {
5932        let src = TensorDyn::image(4, 4, PixelFormat::Rgba, DType::U8, None).unwrap();
5933        #[rustfmt::skip]
5934        let src_image = [
5935                    255, 0, 0, 255,     0, 255, 0, 255,     0, 0, 255, 255,     255, 255, 0, 255,
5936                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5937                    0, 0, 255, 0,       0, 255, 255, 255,   255, 255, 0, 0,     0, 0, 0, 255,
5938                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5939        ];
5940        src.as_u8()
5941            .unwrap()
5942            .map()
5943            .unwrap()
5944            .as_mut_slice()
5945            .copy_from_slice(&src_image);
5946
5947        let cpu_dst = TensorDyn::image(5, 5, PixelFormat::PlanarRgba, DType::U8, None).unwrap();
5948        let mut cpu_converter = CPUProcessor::new();
5949
5950        let (result, _src, cpu_dst) = convert_img(
5951            &mut cpu_converter,
5952            src,
5953            cpu_dst,
5954            Rotation::None,
5955            Flip::None,
5956            Crop::new()
5957                .with_dst_rect(Some(Rect {
5958                    left: 1,
5959                    top: 1,
5960                    width: 4,
5961                    height: 4,
5962                }))
5963                .with_dst_color(Some([114, 114, 114, 255])),
5964        );
5965        result.unwrap();
5966
5967        #[rustfmt::skip]
5968        let expected_dst = [
5969            114, 114, 114, 114, 114,    114, 255, 0, 0, 255,        114, 255, 0, 255, 255,      114, 0, 0, 255, 0,        114, 255, 0, 255, 255,
5970            114, 114, 114, 114, 114,    114, 0, 255, 0, 255,        114, 0, 0, 0, 0,            114, 0, 255, 255, 0,      114, 0, 0, 0, 0,
5971            114, 114, 114, 114, 114,    114, 0, 0, 255, 0,          114, 0, 0, 255, 255,        114, 255, 255, 0, 0,      114, 0, 0, 255, 255,
5972            255, 255, 255, 255, 255,    255, 255, 255, 255, 255,    255, 0, 255, 0, 255,        255, 0, 255, 0, 255,      255, 0, 255, 0, 255,
5973        ];
5974
5975        assert_eq!(
5976            cpu_dst.as_u8().unwrap().map().unwrap().as_slice(),
5977            &expected_dst
5978        );
5979    }
5980
5981    #[test]
5982    #[cfg(target_os = "linux")]
5983    #[cfg(feature = "opengl")]
5984    fn test_opengl_resize_planar_rgb() {
5985        if !is_opengl_available() {
5986            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5987            return;
5988        }
5989
5990        if !is_dma_available() {
5991            eprintln!(
5992                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
5993                function!()
5994            );
5995            return;
5996        }
5997
5998        let dst_width = 640;
5999        let dst_height = 640;
6000        let file = include_bytes!(concat!(
6001            env!("CARGO_MANIFEST_DIR"),
6002            "/../../testdata/test_image.jpg"
6003        ))
6004        .to_vec();
6005        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
6006
6007        let cpu_dst = TensorDyn::image(
6008            dst_width,
6009            dst_height,
6010            PixelFormat::PlanarRgb,
6011            DType::U8,
6012            None,
6013        )
6014        .unwrap();
6015        let mut cpu_converter = CPUProcessor::new();
6016        let (result, src, cpu_dst) = convert_img(
6017            &mut cpu_converter,
6018            src,
6019            cpu_dst,
6020            Rotation::None,
6021            Flip::None,
6022            Crop::no_crop(),
6023        );
6024        result.unwrap();
6025        let crop_letterbox = Crop::new()
6026            .with_dst_rect(Some(Rect {
6027                left: 102,
6028                top: 102,
6029                width: 440,
6030                height: 440,
6031            }))
6032            .with_dst_color(Some([114, 114, 114, 114]));
6033        let (result, src, cpu_dst) = convert_img(
6034            &mut cpu_converter,
6035            src,
6036            cpu_dst,
6037            Rotation::None,
6038            Flip::None,
6039            crop_letterbox,
6040        );
6041        result.unwrap();
6042
6043        let gl_dst = TensorDyn::image(
6044            dst_width,
6045            dst_height,
6046            PixelFormat::PlanarRgb,
6047            DType::U8,
6048            None,
6049        )
6050        .unwrap();
6051        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
6052
6053        let (result, _src, gl_dst) = convert_img(
6054            &mut gl_converter,
6055            src,
6056            gl_dst,
6057            Rotation::None,
6058            Flip::None,
6059            crop_letterbox,
6060        );
6061        result.unwrap();
6062        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
6063    }
6064
6065    #[test]
6066    fn test_cpu_resize_nv16() {
6067        let file = include_bytes!(concat!(
6068            env!("CARGO_MANIFEST_DIR"),
6069            "/../../testdata/zidane.jpg"
6070        ))
6071        .to_vec();
6072        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
6073
6074        let cpu_nv16_dst = TensorDyn::image(640, 640, PixelFormat::Nv16, DType::U8, None).unwrap();
6075        let cpu_rgb_dst = TensorDyn::image(640, 640, PixelFormat::Rgb, DType::U8, None).unwrap();
6076        let mut cpu_converter = CPUProcessor::new();
6077        let crop = Crop::new()
6078            .with_dst_rect(Some(Rect {
6079                left: 20,
6080                top: 140,
6081                width: 600,
6082                height: 360,
6083            }))
6084            .with_dst_color(Some([255, 128, 0, 255]));
6085
6086        let (result, src, cpu_nv16_dst) = convert_img(
6087            &mut cpu_converter,
6088            src,
6089            cpu_nv16_dst,
6090            Rotation::None,
6091            Flip::None,
6092            crop,
6093        );
6094        result.unwrap();
6095
6096        let (result, _src, cpu_rgb_dst) = convert_img(
6097            &mut cpu_converter,
6098            src,
6099            cpu_rgb_dst,
6100            Rotation::None,
6101            Flip::None,
6102            crop,
6103        );
6104        result.unwrap();
6105        compare_images_convert_to_rgb(&cpu_nv16_dst, &cpu_rgb_dst, 0.99, function!());
6106    }
6107
6108    fn load_bytes_to_tensor(
6109        width: usize,
6110        height: usize,
6111        format: PixelFormat,
6112        memory: Option<TensorMemory>,
6113        bytes: &[u8],
6114    ) -> Result<TensorDyn, Error> {
6115        let src = TensorDyn::image(width, height, format, DType::U8, memory)?;
6116        src.as_u8()
6117            .unwrap()
6118            .map()?
6119            .as_mut_slice()
6120            .copy_from_slice(bytes);
6121        Ok(src)
6122    }
6123
6124    fn compare_images(img1: &TensorDyn, img2: &TensorDyn, threshold: f64, name: &str) {
6125        assert_eq!(img1.height(), img2.height(), "Heights differ");
6126        assert_eq!(img1.width(), img2.width(), "Widths differ");
6127        assert_eq!(
6128            img1.format().unwrap(),
6129            img2.format().unwrap(),
6130            "PixelFormat differ"
6131        );
6132        assert!(
6133            matches!(
6134                img1.format().unwrap(),
6135                PixelFormat::Rgb | PixelFormat::Rgba | PixelFormat::Grey | PixelFormat::PlanarRgb
6136            ),
6137            "format must be Rgb or Rgba for comparison"
6138        );
6139
6140        let image1 = match img1.format().unwrap() {
6141            PixelFormat::Rgb => image::RgbImage::from_vec(
6142                img1.width().unwrap() as u32,
6143                img1.height().unwrap() as u32,
6144                img1.as_u8().unwrap().map().unwrap().to_vec(),
6145            )
6146            .unwrap(),
6147            PixelFormat::Rgba => image::RgbaImage::from_vec(
6148                img1.width().unwrap() as u32,
6149                img1.height().unwrap() as u32,
6150                img1.as_u8().unwrap().map().unwrap().to_vec(),
6151            )
6152            .unwrap()
6153            .convert(),
6154            PixelFormat::Grey => image::GrayImage::from_vec(
6155                img1.width().unwrap() as u32,
6156                img1.height().unwrap() as u32,
6157                img1.as_u8().unwrap().map().unwrap().to_vec(),
6158            )
6159            .unwrap()
6160            .convert(),
6161            PixelFormat::PlanarRgb => image::GrayImage::from_vec(
6162                img1.width().unwrap() as u32,
6163                (img1.height().unwrap() * 3) as u32,
6164                img1.as_u8().unwrap().map().unwrap().to_vec(),
6165            )
6166            .unwrap()
6167            .convert(),
6168            _ => return,
6169        };
6170
6171        let image2 = match img2.format().unwrap() {
6172            PixelFormat::Rgb => image::RgbImage::from_vec(
6173                img2.width().unwrap() as u32,
6174                img2.height().unwrap() as u32,
6175                img2.as_u8().unwrap().map().unwrap().to_vec(),
6176            )
6177            .unwrap(),
6178            PixelFormat::Rgba => image::RgbaImage::from_vec(
6179                img2.width().unwrap() as u32,
6180                img2.height().unwrap() as u32,
6181                img2.as_u8().unwrap().map().unwrap().to_vec(),
6182            )
6183            .unwrap()
6184            .convert(),
6185            PixelFormat::Grey => image::GrayImage::from_vec(
6186                img2.width().unwrap() as u32,
6187                img2.height().unwrap() as u32,
6188                img2.as_u8().unwrap().map().unwrap().to_vec(),
6189            )
6190            .unwrap()
6191            .convert(),
6192            PixelFormat::PlanarRgb => image::GrayImage::from_vec(
6193                img2.width().unwrap() as u32,
6194                (img2.height().unwrap() * 3) as u32,
6195                img2.as_u8().unwrap().map().unwrap().to_vec(),
6196            )
6197            .unwrap()
6198            .convert(),
6199            _ => return,
6200        };
6201
6202        let similarity = image_compare::rgb_similarity_structure(
6203            &image_compare::Algorithm::RootMeanSquared,
6204            &image1,
6205            &image2,
6206        )
6207        .expect("Image Comparison failed");
6208        if similarity.score < threshold {
6209            // image1.save(format!("{name}_1.png"));
6210            // image2.save(format!("{name}_2.png"));
6211            similarity
6212                .image
6213                .to_color_map()
6214                .save(format!("{name}.png"))
6215                .unwrap();
6216            panic!(
6217                "{name}: converted image and target image have similarity score too low: {} < {}",
6218                similarity.score, threshold
6219            )
6220        }
6221    }
6222
6223    fn compare_images_convert_to_rgb(
6224        img1: &TensorDyn,
6225        img2: &TensorDyn,
6226        threshold: f64,
6227        name: &str,
6228    ) {
6229        assert_eq!(img1.height(), img2.height(), "Heights differ");
6230        assert_eq!(img1.width(), img2.width(), "Widths differ");
6231
6232        let mut img_rgb1 = TensorDyn::image(
6233            img1.width().unwrap(),
6234            img1.height().unwrap(),
6235            PixelFormat::Rgb,
6236            DType::U8,
6237            Some(TensorMemory::Mem),
6238        )
6239        .unwrap();
6240        let mut img_rgb2 = TensorDyn::image(
6241            img1.width().unwrap(),
6242            img1.height().unwrap(),
6243            PixelFormat::Rgb,
6244            DType::U8,
6245            Some(TensorMemory::Mem),
6246        )
6247        .unwrap();
6248        let mut __cv = CPUProcessor::default();
6249        let r1 = __cv.convert(
6250            img1,
6251            &mut img_rgb1,
6252            crate::Rotation::None,
6253            crate::Flip::None,
6254            crate::Crop::default(),
6255        );
6256        let r2 = __cv.convert(
6257            img2,
6258            &mut img_rgb2,
6259            crate::Rotation::None,
6260            crate::Flip::None,
6261            crate::Crop::default(),
6262        );
6263        if r1.is_err() || r2.is_err() {
6264            // Fallback: compare raw bytes as greyscale strip
6265            let w = img1.width().unwrap() as u32;
6266            let data1 = img1.as_u8().unwrap().map().unwrap().to_vec();
6267            let data2 = img2.as_u8().unwrap().map().unwrap().to_vec();
6268            let h1 = (data1.len() as u32) / w;
6269            let h2 = (data2.len() as u32) / w;
6270            let g1 = image::GrayImage::from_vec(w, h1, data1).unwrap();
6271            let g2 = image::GrayImage::from_vec(w, h2, data2).unwrap();
6272            let similarity = image_compare::gray_similarity_structure(
6273                &image_compare::Algorithm::RootMeanSquared,
6274                &g1,
6275                &g2,
6276            )
6277            .expect("Image Comparison failed");
6278            if similarity.score < threshold {
6279                panic!(
6280                    "{name}: converted image and target image have similarity score too low: {} < {}",
6281                    similarity.score, threshold
6282                )
6283            }
6284            return;
6285        }
6286
6287        let image1 = image::RgbImage::from_vec(
6288            img_rgb1.width().unwrap() as u32,
6289            img_rgb1.height().unwrap() as u32,
6290            img_rgb1.as_u8().unwrap().map().unwrap().to_vec(),
6291        )
6292        .unwrap();
6293
6294        let image2 = image::RgbImage::from_vec(
6295            img_rgb2.width().unwrap() as u32,
6296            img_rgb2.height().unwrap() as u32,
6297            img_rgb2.as_u8().unwrap().map().unwrap().to_vec(),
6298        )
6299        .unwrap();
6300
6301        let similarity = image_compare::rgb_similarity_structure(
6302            &image_compare::Algorithm::RootMeanSquared,
6303            &image1,
6304            &image2,
6305        )
6306        .expect("Image Comparison failed");
6307        if similarity.score < threshold {
6308            // image1.save(format!("{name}_1.png"));
6309            // image2.save(format!("{name}_2.png"));
6310            similarity
6311                .image
6312                .to_color_map()
6313                .save(format!("{name}.png"))
6314                .unwrap();
6315            panic!(
6316                "{name}: converted image and target image have similarity score too low: {} < {}",
6317                similarity.score, threshold
6318            )
6319        }
6320    }
6321
6322    // =========================================================================
6323    // PixelFormat::Nv12 Format Tests
6324    // =========================================================================
6325
6326    #[test]
6327    fn test_nv12_image_creation() {
6328        let width = 640;
6329        let height = 480;
6330        let img = TensorDyn::image(width, height, PixelFormat::Nv12, DType::U8, None).unwrap();
6331
6332        assert_eq!(img.width(), Some(width));
6333        assert_eq!(img.height(), Some(height));
6334        assert_eq!(img.format().unwrap(), PixelFormat::Nv12);
6335        // PixelFormat::Nv12 uses shape [H*3/2, W] to store Y plane + UV plane
6336        assert_eq!(img.as_u8().unwrap().shape(), &[height * 3 / 2, width]);
6337    }
6338
6339    #[test]
6340    fn test_nv12_channels() {
6341        let img = TensorDyn::image(640, 480, PixelFormat::Nv12, DType::U8, None).unwrap();
6342        // PixelFormat::Nv12.channels() returns 1 (luma plane)
6343        assert_eq!(img.format().unwrap().channels(), 1);
6344    }
6345
6346    // =========================================================================
6347    // Tensor Format Metadata Tests
6348    // =========================================================================
6349
6350    #[test]
6351    fn test_tensor_set_format_planar() {
6352        let mut tensor = Tensor::<u8>::new(&[3, 480, 640], None, None).unwrap();
6353        tensor.set_format(PixelFormat::PlanarRgb).unwrap();
6354        assert_eq!(tensor.format(), Some(PixelFormat::PlanarRgb));
6355        assert_eq!(tensor.width(), Some(640));
6356        assert_eq!(tensor.height(), Some(480));
6357    }
6358
6359    #[test]
6360    fn test_tensor_set_format_interleaved() {
6361        let mut tensor = Tensor::<u8>::new(&[480, 640, 4], None, None).unwrap();
6362        tensor.set_format(PixelFormat::Rgba).unwrap();
6363        assert_eq!(tensor.format(), Some(PixelFormat::Rgba));
6364        assert_eq!(tensor.width(), Some(640));
6365        assert_eq!(tensor.height(), Some(480));
6366    }
6367
6368    #[test]
6369    fn test_tensordyn_image_rgb() {
6370        let img = TensorDyn::image(640, 480, PixelFormat::Rgb, DType::U8, None).unwrap();
6371        assert_eq!(img.width(), Some(640));
6372        assert_eq!(img.height(), Some(480));
6373        assert_eq!(img.format(), Some(PixelFormat::Rgb));
6374    }
6375
6376    #[test]
6377    fn test_tensordyn_image_planar_rgb() {
6378        let img = TensorDyn::image(640, 480, PixelFormat::PlanarRgb, DType::U8, None).unwrap();
6379        assert_eq!(img.width(), Some(640));
6380        assert_eq!(img.height(), Some(480));
6381        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
6382    }
6383
6384    #[test]
6385    fn test_rgb_int8_format() {
6386        // Int8 variant: same PixelFormat::Rgb but with DType::I8
6387        let img = TensorDyn::image(
6388            1280,
6389            720,
6390            PixelFormat::Rgb,
6391            DType::I8,
6392            Some(TensorMemory::Mem),
6393        )
6394        .unwrap();
6395        assert_eq!(img.width(), Some(1280));
6396        assert_eq!(img.height(), Some(720));
6397        assert_eq!(img.format(), Some(PixelFormat::Rgb));
6398        assert_eq!(img.dtype(), DType::I8);
6399    }
6400
6401    #[test]
6402    fn test_planar_rgb_int8_format() {
6403        let img = TensorDyn::image(
6404            1280,
6405            720,
6406            PixelFormat::PlanarRgb,
6407            DType::I8,
6408            Some(TensorMemory::Mem),
6409        )
6410        .unwrap();
6411        assert_eq!(img.width(), Some(1280));
6412        assert_eq!(img.height(), Some(720));
6413        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
6414        assert_eq!(img.dtype(), DType::I8);
6415    }
6416
6417    #[test]
6418    fn test_rgb_from_tensor() {
6419        let mut tensor = Tensor::<u8>::new(&[720, 1280, 3], None, None).unwrap();
6420        tensor.set_format(PixelFormat::Rgb).unwrap();
6421        let img = TensorDyn::from(tensor);
6422        assert_eq!(img.width(), Some(1280));
6423        assert_eq!(img.height(), Some(720));
6424        assert_eq!(img.format(), Some(PixelFormat::Rgb));
6425    }
6426
6427    #[test]
6428    fn test_planar_rgb_from_tensor() {
6429        let mut tensor = Tensor::<u8>::new(&[3, 720, 1280], None, None).unwrap();
6430        tensor.set_format(PixelFormat::PlanarRgb).unwrap();
6431        let img = TensorDyn::from(tensor);
6432        assert_eq!(img.width(), Some(1280));
6433        assert_eq!(img.height(), Some(720));
6434        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
6435    }
6436
6437    #[test]
6438    fn test_dtype_determines_int8() {
6439        // DType::I8 indicates int8 data
6440        let u8_img = TensorDyn::image(64, 64, PixelFormat::Rgb, DType::U8, None).unwrap();
6441        let i8_img = TensorDyn::image(64, 64, PixelFormat::Rgb, DType::I8, None).unwrap();
6442        assert_eq!(u8_img.dtype(), DType::U8);
6443        assert_eq!(i8_img.dtype(), DType::I8);
6444    }
6445
6446    #[test]
6447    fn test_pixel_layout_packed_vs_planar() {
6448        // Packed vs planar layout classification
6449        assert_eq!(PixelFormat::Rgb.layout(), PixelLayout::Packed);
6450        assert_eq!(PixelFormat::Rgba.layout(), PixelLayout::Packed);
6451        assert_eq!(PixelFormat::PlanarRgb.layout(), PixelLayout::Planar);
6452        assert_eq!(PixelFormat::Nv12.layout(), PixelLayout::SemiPlanar);
6453    }
6454
6455    /// Integration test that exercises the PBO-to-PBO convert path.
6456    /// Uses ImageProcessor::create_image() to allocate PBO-backed tensors,
6457    /// then converts between them. Skipped when GL is unavailable or the
6458    /// backend is not PBO (e.g. DMA-buf systems).
6459    #[cfg(target_os = "linux")]
6460    #[cfg(feature = "opengl")]
6461    #[test]
6462    fn test_convert_pbo_to_pbo() {
6463        let mut converter = ImageProcessor::new().unwrap();
6464
6465        // Skip if GL is not available or backend is not PBO
6466        let is_pbo = converter
6467            .opengl
6468            .as_ref()
6469            .is_some_and(|gl| gl.transfer_backend() == opengl_headless::TransferBackend::Pbo);
6470        if !is_pbo {
6471            eprintln!("Skipping test_convert_pbo_to_pbo: backend is not PBO");
6472            return;
6473        }
6474
6475        let src_w = 640;
6476        let src_h = 480;
6477        let dst_w = 320;
6478        let dst_h = 240;
6479
6480        // Create PBO-backed source image
6481        let pbo_src = converter
6482            .create_image(src_w, src_h, PixelFormat::Rgba, DType::U8, None)
6483            .unwrap();
6484        assert_eq!(
6485            pbo_src.as_u8().unwrap().memory(),
6486            TensorMemory::Pbo,
6487            "create_image should produce a PBO tensor"
6488        );
6489
6490        // Fill source PBO with test pattern: load JPEG then convert Mem→PBO
6491        let file = include_bytes!(concat!(
6492            env!("CARGO_MANIFEST_DIR"),
6493            "/../../testdata/zidane.jpg"
6494        ))
6495        .to_vec();
6496        let jpeg_src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
6497
6498        // Resize JPEG into a Mem temp of the right size, then copy into PBO
6499        let mem_src = TensorDyn::image(
6500            src_w,
6501            src_h,
6502            PixelFormat::Rgba,
6503            DType::U8,
6504            Some(TensorMemory::Mem),
6505        )
6506        .unwrap();
6507        let (result, _jpeg_src, mem_src) = convert_img(
6508            &mut CPUProcessor::new(),
6509            jpeg_src,
6510            mem_src,
6511            Rotation::None,
6512            Flip::None,
6513            Crop::no_crop(),
6514        );
6515        result.unwrap();
6516
6517        // Copy pixel data into the PBO source by mapping it
6518        {
6519            let src_data = mem_src.as_u8().unwrap().map().unwrap();
6520            let mut pbo_map = pbo_src.as_u8().unwrap().map().unwrap();
6521            pbo_map.copy_from_slice(&src_data);
6522        }
6523
6524        // Create PBO-backed destination image
6525        let pbo_dst = converter
6526            .create_image(dst_w, dst_h, PixelFormat::Rgba, DType::U8, None)
6527            .unwrap();
6528        assert_eq!(pbo_dst.as_u8().unwrap().memory(), TensorMemory::Pbo);
6529
6530        // Convert PBO→PBO (this exercises convert_pbo_to_pbo)
6531        let mut pbo_dst = pbo_dst;
6532        let result = converter.convert(
6533            &pbo_src,
6534            &mut pbo_dst,
6535            Rotation::None,
6536            Flip::None,
6537            Crop::no_crop(),
6538        );
6539        result.unwrap();
6540
6541        // Verify: compare with CPU-only conversion of the same input
6542        let cpu_dst = TensorDyn::image(
6543            dst_w,
6544            dst_h,
6545            PixelFormat::Rgba,
6546            DType::U8,
6547            Some(TensorMemory::Mem),
6548        )
6549        .unwrap();
6550        let (result, _mem_src, cpu_dst) = convert_img(
6551            &mut CPUProcessor::new(),
6552            mem_src,
6553            cpu_dst,
6554            Rotation::None,
6555            Flip::None,
6556            Crop::no_crop(),
6557        );
6558        result.unwrap();
6559
6560        let pbo_dst_img = {
6561            let mut __t = pbo_dst.into_u8().unwrap();
6562            __t.set_format(PixelFormat::Rgba).unwrap();
6563            TensorDyn::from(__t)
6564        };
6565        compare_images(&pbo_dst_img, &cpu_dst, 0.95, function!());
6566        log::info!("test_convert_pbo_to_pbo: PASS — PBO-to-PBO convert matches CPU reference");
6567    }
6568
6569    #[test]
6570    fn test_image_bgra() {
6571        let img = TensorDyn::image(
6572            640,
6573            480,
6574            PixelFormat::Bgra,
6575            DType::U8,
6576            Some(edgefirst_tensor::TensorMemory::Mem),
6577        )
6578        .unwrap();
6579        assert_eq!(img.width(), Some(640));
6580        assert_eq!(img.height(), Some(480));
6581        assert_eq!(img.format().unwrap().channels(), 4);
6582        assert_eq!(img.format().unwrap(), PixelFormat::Bgra);
6583    }
6584
6585    // ========================================================================
6586    // Tests for EDGEFIRST_FORCE_BACKEND env var
6587    // ========================================================================
6588
6589    #[test]
6590    fn test_force_backend_cpu() {
6591        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6592        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
6593        let result = ImageProcessor::new();
6594        match original {
6595            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6596            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6597        }
6598        let converter = result.unwrap();
6599        assert!(converter.cpu.is_some());
6600        assert_eq!(converter.forced_backend, Some(ForcedBackend::Cpu));
6601    }
6602
6603    #[test]
6604    fn test_force_backend_invalid() {
6605        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6606        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "invalid") };
6607        let result = ImageProcessor::new();
6608        match original {
6609            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6610            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6611        }
6612        assert!(
6613            matches!(&result, Err(Error::ForcedBackendUnavailable(s)) if s.contains("unknown")),
6614            "invalid backend value should return ForcedBackendUnavailable error: {result:?}"
6615        );
6616    }
6617
6618    #[test]
6619    fn test_force_backend_unset() {
6620        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6621        unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") };
6622        let result = ImageProcessor::new();
6623        match original {
6624            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6625            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6626        }
6627        let converter = result.unwrap();
6628        assert!(converter.forced_backend.is_none());
6629    }
6630
6631    // ========================================================================
6632    // Tests for hybrid mask path error handling
6633    // ========================================================================
6634
6635    #[test]
6636    fn test_draw_proto_masks_no_cpu_returns_error() {
6637        // Disable CPU backend to trigger the error path
6638        let original_cpu = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
6639        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
6640        let original_gl = std::env::var("EDGEFIRST_DISABLE_GL").ok();
6641        unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
6642        let original_g2d = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
6643        unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
6644
6645        let result = ImageProcessor::new();
6646
6647        match original_cpu {
6648            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
6649            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
6650        }
6651        match original_gl {
6652            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
6653            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
6654        }
6655        match original_g2d {
6656            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
6657            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
6658        }
6659
6660        let mut converter = result.unwrap();
6661        assert!(converter.cpu.is_none(), "CPU should be disabled");
6662
6663        let dst = TensorDyn::image(
6664            640,
6665            480,
6666            PixelFormat::Rgba,
6667            DType::U8,
6668            Some(TensorMemory::Mem),
6669        )
6670        .unwrap();
6671        let mut dst_dyn = dst;
6672        let det = [DetectBox {
6673            bbox: edgefirst_decoder::BoundingBox {
6674                xmin: 0.1,
6675                ymin: 0.1,
6676                xmax: 0.5,
6677                ymax: 0.5,
6678            },
6679            score: 0.9,
6680            label: 0,
6681        }];
6682        let proto_data = {
6683            use edgefirst_tensor::{Tensor, TensorDyn};
6684            let coeff_t = Tensor::<f32>::from_slice(&[0.5_f32; 4], &[1, 4]).unwrap();
6685            let protos_t =
6686                Tensor::<f32>::from_slice(&vec![0.0_f32; 8 * 8 * 4], &[8, 8, 4]).unwrap();
6687            ProtoData {
6688                mask_coefficients: TensorDyn::F32(coeff_t),
6689                protos: TensorDyn::F32(protos_t),
6690            }
6691        };
6692        let result =
6693            converter.draw_proto_masks(&mut dst_dyn, &det, &proto_data, Default::default());
6694        assert!(
6695            matches!(&result, Err(Error::Internal(s)) if s.contains("CPU backend")),
6696            "draw_proto_masks without CPU should return Internal error: {result:?}"
6697        );
6698    }
6699
6700    #[test]
6701    fn test_draw_proto_masks_cpu_fallback_works() {
6702        // Force CPU-only backend to ensure the CPU fallback path executes
6703        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6704        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
6705        let result = ImageProcessor::new();
6706        match original {
6707            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6708            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6709        }
6710
6711        let mut converter = result.unwrap();
6712        assert!(converter.cpu.is_some());
6713
6714        let dst = TensorDyn::image(
6715            64,
6716            64,
6717            PixelFormat::Rgba,
6718            DType::U8,
6719            Some(TensorMemory::Mem),
6720        )
6721        .unwrap();
6722        let mut dst_dyn = dst;
6723        let det = [DetectBox {
6724            bbox: edgefirst_decoder::BoundingBox {
6725                xmin: 0.1,
6726                ymin: 0.1,
6727                xmax: 0.5,
6728                ymax: 0.5,
6729            },
6730            score: 0.9,
6731            label: 0,
6732        }];
6733        let proto_data = {
6734            use edgefirst_tensor::{Tensor, TensorDyn};
6735            let coeff_t = Tensor::<f32>::from_slice(&[0.5_f32; 4], &[1, 4]).unwrap();
6736            let protos_t =
6737                Tensor::<f32>::from_slice(&vec![0.0_f32; 8 * 8 * 4], &[8, 8, 4]).unwrap();
6738            ProtoData {
6739                mask_coefficients: TensorDyn::F32(coeff_t),
6740                protos: TensorDyn::F32(protos_t),
6741            }
6742        };
6743        let result =
6744            converter.draw_proto_masks(&mut dst_dyn, &det, &proto_data, Default::default());
6745        assert!(result.is_ok(), "CPU fallback path should work: {result:?}");
6746    }
6747
6748    // ============================================================
6749    // draw_decoded_masks / draw_proto_masks — 4-scenario pixel-
6750    // verified tests. Exercises each backend against the full
6751    // output-contract matrix:
6752    //
6753    //   | detections | background | expected dst             |
6754    //   |------------|------------|--------------------------|
6755    //   | empty      | none       | fully cleared (0x00)     |
6756    //   | empty      | set        | fully equal to bg        |
6757    //   | set        | none       | cleared outside box +    |
6758    //   |            |            | mask-coloured inside     |
6759    //   | set        | set        | bg outside box + mask    |
6760    //   |            |            | blended inside           |
6761    //
6762    // Every test pre-fills dst with a non-zero "dirty" pattern so
6763    // that any silent `return Ok(())` leaks the pattern into the
6764    // asserted output and fails loudly.
6765    // ============================================================
6766
6767    /// Run `body` with `EDGEFIRST_FORCE_BACKEND` temporarily set (or
6768    /// removed), restoring the prior value afterward. Tests are mutated
6769    /// env-serialized via the process-wide `FORCE_BACKEND_MUTEX`.
6770    fn with_force_backend<R>(value: Option<&str>, body: impl FnOnce() -> R) -> R {
6771        use std::sync::{Mutex, MutexGuard, OnceLock};
6772        static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
6773        let _guard: MutexGuard<()> = LOCK
6774            .get_or_init(|| Mutex::new(()))
6775            .lock()
6776            .unwrap_or_else(|e| e.into_inner());
6777        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6778        match value {
6779            Some(v) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", v) },
6780            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6781        }
6782        let r = body();
6783        match original {
6784            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6785            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6786        }
6787        r
6788    }
6789
6790    /// Allocate an RGBA image tensor and pre-fill every byte with a
6791    /// distinctive non-zero pattern. Any test that relies on the old
6792    /// "dst is already cleared" assumption will see this pattern leak
6793    /// through to the output and fail.
6794    fn make_dirty_dst(w: usize, h: usize, mem: Option<TensorMemory>) -> TensorDyn {
6795        let dst = TensorDyn::image(w, h, PixelFormat::Rgba, DType::U8, mem).unwrap();
6796        {
6797            use edgefirst_tensor::TensorMapTrait;
6798            let u8t = dst.as_u8().unwrap();
6799            let mut map = u8t.map().unwrap();
6800            for (i, b) in map.as_mut_slice().iter_mut().enumerate() {
6801                *b = 0xA0u8.wrapping_add((i as u8) & 0x3F);
6802            }
6803        }
6804        dst
6805    }
6806
6807    /// Allocate an RGBA background filled with a constant colour.
6808    fn make_bg(w: usize, h: usize, mem: Option<TensorMemory>, rgba: [u8; 4]) -> TensorDyn {
6809        let bg = TensorDyn::image(w, h, PixelFormat::Rgba, DType::U8, mem).unwrap();
6810        {
6811            use edgefirst_tensor::TensorMapTrait;
6812            let u8t = bg.as_u8().unwrap();
6813            let mut map = u8t.map().unwrap();
6814            for chunk in map.as_mut_slice().chunks_exact_mut(4) {
6815                chunk.copy_from_slice(&rgba);
6816            }
6817        }
6818        bg
6819    }
6820
6821    fn pixel_at(dst: &TensorDyn, x: usize, y: usize) -> [u8; 4] {
6822        use edgefirst_tensor::TensorMapTrait;
6823        let w = dst.width().unwrap();
6824        let off = (y * w + x) * 4;
6825        let u8t = dst.as_u8().unwrap();
6826        let map = u8t.map().unwrap();
6827        let s = map.as_slice();
6828        [s[off], s[off + 1], s[off + 2], s[off + 3]]
6829    }
6830
6831    fn assert_every_pixel_eq(dst: &TensorDyn, expected: [u8; 4], case: &str) {
6832        use edgefirst_tensor::TensorMapTrait;
6833        let u8t = dst.as_u8().unwrap();
6834        let map = u8t.map().unwrap();
6835        for (i, chunk) in map.as_slice().chunks_exact(4).enumerate() {
6836            assert_eq!(
6837                chunk, &expected,
6838                "{case}: pixel idx {i} = {chunk:?}, expected {expected:?}"
6839            );
6840        }
6841    }
6842
6843    /// Scenario 1: empty detections, empty segmentation, no background
6844    /// → dst must be fully cleared to 0x00000000.
6845    fn scenario_empty_no_bg(processor: &mut ImageProcessor, case: &str) {
6846        let mut dst = make_dirty_dst(64, 64, None);
6847        processor
6848            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::default())
6849            .unwrap_or_else(|e| panic!("{case}/decoded_masks empty+no-bg failed: {e:?}"));
6850        assert_every_pixel_eq(&dst, [0, 0, 0, 0], &format!("{case}/decoded"));
6851
6852        let mut dst = make_dirty_dst(64, 64, None);
6853        let proto = {
6854            use edgefirst_tensor::{Tensor, TensorDyn};
6855            // Placeholder (no detections); shape [1, 4] to keep the tensor well-formed.
6856            let coeff_t = Tensor::<f32>::from_slice(&[0.0_f32; 4], &[1, 4]).unwrap();
6857            let protos_t =
6858                Tensor::<f32>::from_slice(&vec![0.0_f32; 8 * 8 * 4], &[8, 8, 4]).unwrap();
6859            ProtoData {
6860                mask_coefficients: TensorDyn::F32(coeff_t),
6861                protos: TensorDyn::F32(protos_t),
6862            }
6863        };
6864        processor
6865            .draw_proto_masks(&mut dst, &[], &proto, MaskOverlay::default())
6866            .unwrap_or_else(|e| panic!("{case}/proto_masks empty+no-bg failed: {e:?}"));
6867        assert_every_pixel_eq(&dst, [0, 0, 0, 0], &format!("{case}/proto"));
6868    }
6869
6870    /// Scenario 2: empty detections, empty segmentation, background set
6871    /// → dst must be fully equal to bg.
6872    fn scenario_empty_with_bg(processor: &mut ImageProcessor, case: &str) {
6873        let bg_color = [42, 99, 200, 255];
6874        let bg = make_bg(64, 64, None, bg_color);
6875        let overlay = MaskOverlay::new().with_background(&bg);
6876
6877        let mut dst = make_dirty_dst(64, 64, None);
6878        processor
6879            .draw_decoded_masks(&mut dst, &[], &[], overlay)
6880            .unwrap_or_else(|e| panic!("{case}/decoded_masks empty+bg failed: {e:?}"));
6881        assert_every_pixel_eq(&dst, bg_color, &format!("{case}/decoded bg blit"));
6882
6883        let mut dst = make_dirty_dst(64, 64, None);
6884        let proto = {
6885            use edgefirst_tensor::{Tensor, TensorDyn};
6886            // Placeholder (no detections); shape [1, 4] to keep the tensor well-formed.
6887            let coeff_t = Tensor::<f32>::from_slice(&[0.0_f32; 4], &[1, 4]).unwrap();
6888            let protos_t =
6889                Tensor::<f32>::from_slice(&vec![0.0_f32; 8 * 8 * 4], &[8, 8, 4]).unwrap();
6890            ProtoData {
6891                mask_coefficients: TensorDyn::F32(coeff_t),
6892                protos: TensorDyn::F32(protos_t),
6893            }
6894        };
6895        processor
6896            .draw_proto_masks(&mut dst, &[], &proto, overlay)
6897            .unwrap_or_else(|e| panic!("{case}/proto_masks empty+bg failed: {e:?}"));
6898        assert_every_pixel_eq(&dst, bg_color, &format!("{case}/proto bg blit"));
6899    }
6900
6901    /// Scenario 3: one detection with a fully-opaque segmentation fill,
6902    /// no background → outside the box dst must be 0x00, inside it must
6903    /// be a non-zero mask colour (the render_segmentation output).
6904    fn scenario_detect_no_bg(processor: &mut ImageProcessor, case: &str) {
6905        use edgefirst_decoder::Segmentation;
6906        use ndarray::Array3;
6907        processor
6908            .set_class_colors(&[[200, 80, 40, 255]])
6909            .expect("set_class_colors");
6910
6911        let detect = DetectBox {
6912            bbox: [0.25, 0.25, 0.75, 0.75].into(),
6913            score: 0.99,
6914            label: 0,
6915        };
6916        let seg_arr = Array3::from_shape_fn((4, 4, 1), |_| 255u8);
6917        let seg = Segmentation {
6918            segmentation: seg_arr,
6919            xmin: 0.25,
6920            ymin: 0.25,
6921            xmax: 0.75,
6922            ymax: 0.75,
6923        };
6924
6925        let mut dst = make_dirty_dst(64, 64, None);
6926        processor
6927            .draw_decoded_masks(&mut dst, &[detect], &[seg], MaskOverlay::default())
6928            .unwrap_or_else(|e| panic!("{case}/decoded_masks detect+no-bg failed: {e:?}"));
6929
6930        // Outside the bbox (corner): must be cleared black.
6931        let corner = pixel_at(&dst, 2, 2);
6932        assert_eq!(
6933            corner,
6934            [0, 0, 0, 0],
6935            "{case}/decoded: corner (2,2) leaked dirty pattern: {corner:?}"
6936        );
6937        // Inside the bbox (center): the mask colour must be visible.
6938        // Any non-zero pixel is acceptable — exact rendering varies
6939        // between backends (GL smoothstep, CPU nearest).
6940        let center = pixel_at(&dst, 32, 32);
6941        assert!(
6942            center != [0, 0, 0, 0],
6943            "{case}/decoded: center (32,32) was not coloured: {center:?}"
6944        );
6945    }
6946
6947    /// Scenario 4: detection + background. Outside the box must match
6948    /// bg; inside the box must NOT match bg (mask blended on top).
6949    fn scenario_detect_with_bg(processor: &mut ImageProcessor, case: &str) {
6950        use edgefirst_decoder::Segmentation;
6951        use ndarray::Array3;
6952        processor
6953            .set_class_colors(&[[200, 80, 40, 255]])
6954            .expect("set_class_colors");
6955        let bg_color = [10, 20, 30, 255];
6956        let bg = make_bg(64, 64, None, bg_color);
6957
6958        let detect = DetectBox {
6959            bbox: [0.25, 0.25, 0.75, 0.75].into(),
6960            score: 0.99,
6961            label: 0,
6962        };
6963        let seg_arr = Array3::from_shape_fn((4, 4, 1), |_| 255u8);
6964        let seg = Segmentation {
6965            segmentation: seg_arr,
6966            xmin: 0.25,
6967            ymin: 0.25,
6968            xmax: 0.75,
6969            ymax: 0.75,
6970        };
6971
6972        let overlay = MaskOverlay::new().with_background(&bg);
6973        let mut dst = make_dirty_dst(64, 64, None);
6974        processor
6975            .draw_decoded_masks(&mut dst, &[detect], &[seg], overlay)
6976            .unwrap_or_else(|e| panic!("{case}/decoded_masks detect+bg failed: {e:?}"));
6977
6978        // Outside the bbox (corner): bg colour.
6979        let corner = pixel_at(&dst, 2, 2);
6980        assert_eq!(
6981            corner, bg_color,
6982            "{case}/decoded: corner (2,2) should show bg {bg_color:?} got {corner:?}"
6983        );
6984        // Inside the bbox (center): mask blended on bg, must differ from
6985        // pure bg (alpha-blend with mask colour produces a distinct shade).
6986        let center = pixel_at(&dst, 32, 32);
6987        assert!(
6988            center != bg_color,
6989            "{case}/decoded: center (32,32) should differ from bg {bg_color:?}, got {center:?}"
6990        );
6991    }
6992
6993    /// Run all 4 scenarios against the processor. Skip gracefully if
6994    /// construction fails (backend unavailable on this host).
6995    fn run_all_scenarios(
6996        force_backend: Option<&'static str>,
6997        case: &'static str,
6998        require_dma_for_bg: bool,
6999    ) {
7000        if require_dma_for_bg && !edgefirst_tensor::is_dma_available() {
7001            eprintln!("SKIPPED: {case} — DMA not available on this host");
7002            return;
7003        }
7004        let processor_result = with_force_backend(force_backend, ImageProcessor::new);
7005        let mut processor = match processor_result {
7006            Ok(p) => p,
7007            Err(e) => {
7008                eprintln!("SKIPPED: {case} — backend init failed: {e:?}");
7009                return;
7010            }
7011        };
7012        scenario_empty_no_bg(&mut processor, case);
7013        scenario_empty_with_bg(&mut processor, case);
7014        scenario_detect_no_bg(&mut processor, case);
7015        scenario_detect_with_bg(&mut processor, case);
7016    }
7017
7018    #[test]
7019    fn test_draw_masks_4_scenarios_cpu() {
7020        run_all_scenarios(Some("cpu"), "cpu", false);
7021    }
7022
7023    #[test]
7024    fn test_draw_masks_4_scenarios_auto() {
7025        run_all_scenarios(None, "auto", false);
7026    }
7027
7028    #[cfg(target_os = "linux")]
7029    #[cfg(feature = "opengl")]
7030    #[test]
7031    fn test_draw_masks_4_scenarios_opengl() {
7032        run_all_scenarios(Some("opengl"), "opengl", false);
7033    }
7034
7035    /// G2D forced backend: exercises the zero-detection empty-frame
7036    /// paths via `g2d_clear` and `g2d_blit`. Scenarios 3 and 4 (with
7037    /// detections) expect `NotImplemented` since G2D has no rasterizer
7038    /// for boxes / masks.
7039    #[cfg(target_os = "linux")]
7040    #[test]
7041    fn test_draw_masks_zero_detection_g2d_forced() {
7042        if !edgefirst_tensor::is_dma_available() {
7043            eprintln!("SKIPPED: g2d forced — DMA not available on this host");
7044            return;
7045        }
7046        let processor_result = with_force_backend(Some("g2d"), ImageProcessor::new);
7047        let mut processor = match processor_result {
7048            Ok(p) => p,
7049            Err(e) => {
7050                eprintln!("SKIPPED: g2d forced — init failed: {e:?}");
7051                return;
7052            }
7053        };
7054
7055        // Case 1: empty + no bg. G2D requires DMA-backed dst.
7056        let mut dst = TensorDyn::image(
7057            64,
7058            64,
7059            PixelFormat::Rgba,
7060            DType::U8,
7061            Some(TensorMemory::Dma),
7062        )
7063        .unwrap();
7064        {
7065            use edgefirst_tensor::TensorMapTrait;
7066            let u8t = dst.as_u8_mut().unwrap();
7067            let mut map = u8t.map().unwrap();
7068            map.as_mut_slice().fill(0xBB);
7069        }
7070        processor
7071            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::default())
7072            .expect("g2d empty+no-bg");
7073        assert_every_pixel_eq(&dst, [0, 0, 0, 0], "g2d/case1 cleared");
7074
7075        // Case 2: empty + bg. Both surfaces DMA-backed for g2d_blit.
7076        let bg_color = [7, 11, 13, 255];
7077        let bg = {
7078            let t = TensorDyn::image(
7079                64,
7080                64,
7081                PixelFormat::Rgba,
7082                DType::U8,
7083                Some(TensorMemory::Dma),
7084            )
7085            .unwrap();
7086            {
7087                use edgefirst_tensor::TensorMapTrait;
7088                let u8t = t.as_u8().unwrap();
7089                let mut map = u8t.map().unwrap();
7090                for chunk in map.as_mut_slice().chunks_exact_mut(4) {
7091                    chunk.copy_from_slice(&bg_color);
7092                }
7093            }
7094            t
7095        };
7096        let mut dst = TensorDyn::image(
7097            64,
7098            64,
7099            PixelFormat::Rgba,
7100            DType::U8,
7101            Some(TensorMemory::Dma),
7102        )
7103        .unwrap();
7104        {
7105            use edgefirst_tensor::TensorMapTrait;
7106            let u8t = dst.as_u8_mut().unwrap();
7107            let mut map = u8t.map().unwrap();
7108            map.as_mut_slice().fill(0x55);
7109        }
7110        processor
7111            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::new().with_background(&bg))
7112            .expect("g2d empty+bg");
7113        assert_every_pixel_eq(&dst, bg_color, "g2d/case2 bg blit");
7114
7115        // Case 3 and 4: detect present — must return NotImplemented.
7116        let detect = DetectBox {
7117            bbox: [0.25, 0.25, 0.75, 0.75].into(),
7118            score: 0.9,
7119            label: 0,
7120        };
7121        let mut dst = TensorDyn::image(
7122            64,
7123            64,
7124            PixelFormat::Rgba,
7125            DType::U8,
7126            Some(TensorMemory::Dma),
7127        )
7128        .unwrap();
7129        let err = processor
7130            .draw_decoded_masks(&mut dst, &[detect], &[], MaskOverlay::default())
7131            .expect_err("g2d must reject detect-present draw_decoded_masks");
7132        assert!(
7133            matches!(err, Error::NotImplemented(_)),
7134            "g2d case3 wrong error: {err:?}"
7135        );
7136    }
7137
7138    #[test]
7139    fn test_set_format_then_cpu_convert() {
7140        // Force CPU backend (save/restore to avoid leaking into other tests)
7141        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
7142        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
7143        let mut processor = ImageProcessor::new().unwrap();
7144        match original {
7145            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
7146            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
7147        }
7148
7149        // Load a source image
7150        let image = include_bytes!(concat!(
7151            env!("CARGO_MANIFEST_DIR"),
7152            "/../../testdata/zidane.jpg"
7153        ));
7154        let src = load_image(image, Some(PixelFormat::Rgba), None).unwrap();
7155
7156        // Create a raw tensor, then attach format — simulating the from_fd workflow
7157        let mut dst =
7158            TensorDyn::new(&[640, 640, 3], DType::U8, Some(TensorMemory::Mem), None).unwrap();
7159        dst.set_format(PixelFormat::Rgb).unwrap();
7160
7161        // Convert should work with the set_format-annotated tensor
7162        processor
7163            .convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
7164            .unwrap();
7165
7166        // Verify format survived conversion
7167        assert_eq!(dst.format(), Some(PixelFormat::Rgb));
7168        assert_eq!(dst.width(), Some(640));
7169        assert_eq!(dst.height(), Some(640));
7170    }
7171
7172    /// Verify that creating multiple ImageProcessors on the same thread and
7173    /// performing a resize on each does not deadlock or error.
7174    ///
7175    /// Uses automatic memory allocation (DMA → PBO → Mem fallback) so that
7176    /// hardware backends (OpenGL, G2D) are exercised on capable targets.
7177    #[test]
7178    fn test_multiple_image_processors_same_thread() {
7179        let mut processors: Vec<ImageProcessor> = (0..4)
7180            .map(|_| ImageProcessor::new().expect("ImageProcessor::new() failed"))
7181            .collect();
7182
7183        for proc in &mut processors {
7184            let src = proc
7185                .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
7186                .expect("create src failed");
7187            let mut dst = proc
7188                .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
7189                .expect("create dst failed");
7190            proc.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
7191                .expect("convert failed");
7192            assert_eq!(dst.width(), Some(64));
7193            assert_eq!(dst.height(), Some(64));
7194        }
7195    }
7196
7197    /// Verify that creating ImageProcessors on separate threads and performing
7198    /// a resize on each does not deadlock or error.
7199    ///
7200    /// Uses automatic memory allocation (DMA → PBO → Mem fallback) so that
7201    /// hardware backends (OpenGL, G2D) are exercised on capable targets.
7202    /// A 60-second timeout prevents CI from hanging on deadlock regressions.
7203    #[test]
7204    fn test_multiple_image_processors_separate_threads() {
7205        use std::sync::mpsc;
7206        use std::time::Duration;
7207
7208        const TIMEOUT: Duration = Duration::from_secs(60);
7209
7210        let (tx, rx) = mpsc::channel::<()>();
7211
7212        std::thread::spawn(move || {
7213            let handles: Vec<_> = (0..4)
7214                .map(|i| {
7215                    std::thread::spawn(move || {
7216                        let mut proc = ImageProcessor::new().unwrap_or_else(|e| {
7217                            panic!("ImageProcessor::new() failed on thread {i}: {e}")
7218                        });
7219                        let src = proc
7220                            .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
7221                            .unwrap_or_else(|e| panic!("create src failed on thread {i}: {e}"));
7222                        let mut dst = proc
7223                            .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
7224                            .unwrap_or_else(|e| panic!("create dst failed on thread {i}: {e}"));
7225                        proc.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
7226                            .unwrap_or_else(|e| panic!("convert failed on thread {i}: {e}"));
7227                        assert_eq!(dst.width(), Some(64));
7228                        assert_eq!(dst.height(), Some(64));
7229                    })
7230                })
7231                .collect();
7232
7233            for (i, h) in handles.into_iter().enumerate() {
7234                h.join()
7235                    .unwrap_or_else(|e| panic!("thread {i} panicked: {e:?}"));
7236            }
7237
7238            let _ = tx.send(());
7239        });
7240
7241        rx.recv_timeout(TIMEOUT).unwrap_or_else(|_| {
7242            panic!("test_multiple_image_processors_separate_threads timed out after {TIMEOUT:?}")
7243        });
7244    }
7245
7246    /// Verify that 4 fully-initialized ImageProcessors on separate threads can
7247    /// all operate concurrently without deadlocking each other.
7248    ///
7249    /// All processors are created first, then a barrier synchronizes them so
7250    /// they all start converting at the same instant — maximizing contention.
7251    /// A 60-second timeout prevents CI from hanging on deadlock regressions.
7252    #[test]
7253    fn test_image_processors_concurrent_operations() {
7254        use std::sync::{mpsc, Arc, Barrier};
7255        use std::time::Duration;
7256
7257        const N: usize = 4;
7258        const ROUNDS: usize = 10;
7259        const TIMEOUT: Duration = Duration::from_secs(60);
7260
7261        let (tx, rx) = mpsc::channel::<()>();
7262
7263        std::thread::spawn(move || {
7264            let barrier = Arc::new(Barrier::new(N));
7265
7266            let handles: Vec<_> = (0..N)
7267                .map(|i| {
7268                    let barrier = Arc::clone(&barrier);
7269                    std::thread::spawn(move || {
7270                        let mut proc = ImageProcessor::new().unwrap_or_else(|e| {
7271                            panic!("ImageProcessor::new() failed on thread {i}: {e}")
7272                        });
7273
7274                        // All threads wait here until every processor is initialized.
7275                        barrier.wait();
7276
7277                        // Now all 4 hammer the GPU concurrently.
7278                        for round in 0..ROUNDS {
7279                            let src = proc
7280                                .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
7281                                .unwrap_or_else(|e| {
7282                                    panic!("create src failed on thread {i} round {round}: {e}")
7283                                });
7284                            let mut dst = proc
7285                                .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
7286                                .unwrap_or_else(|e| {
7287                                    panic!("create dst failed on thread {i} round {round}: {e}")
7288                                });
7289                            proc.convert(
7290                                &src,
7291                                &mut dst,
7292                                Rotation::None,
7293                                Flip::None,
7294                                Crop::default(),
7295                            )
7296                            .unwrap_or_else(|e| {
7297                                panic!("convert failed on thread {i} round {round}: {e}")
7298                            });
7299                            assert_eq!(dst.width(), Some(64));
7300                            assert_eq!(dst.height(), Some(64));
7301                        }
7302                    })
7303                })
7304                .collect();
7305
7306            for (i, h) in handles.into_iter().enumerate() {
7307                h.join()
7308                    .unwrap_or_else(|e| panic!("thread {i} panicked: {e:?}"));
7309            }
7310
7311            let _ = tx.send(());
7312        });
7313
7314        rx.recv_timeout(TIMEOUT).unwrap_or_else(|_| {
7315            panic!("test_image_processors_concurrent_operations timed out after {TIMEOUT:?}")
7316        });
7317    }
7318}
edgefirst_image/lib.rs

edgefirst_image/
lib.rs