Skip to main content

edgefirst_image/
lib.rs

1// SPDX-FileCopyrightText: Copyright 2025 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4/*!
5
6## EdgeFirst HAL - Image Converter
7
8The `edgefirst_image` crate is part of the EdgeFirst Hardware Abstraction
9Layer (HAL) and provides functionality for converting images between
10different formats and sizes.  The crate is designed to work with hardware
11acceleration when available, but also provides a CPU-based fallback for
12environments where hardware acceleration is not present or not suitable.
13
14The main features of the `edgefirst_image` crate include:
15- Support for various image formats, including YUYV, RGB, RGBA, and GREY.
16- Support for source crop, destination crop, rotation, and flipping.
17- Image conversion using hardware acceleration (G2D, OpenGL) when available.
18- CPU-based image conversion as a fallback option.
19
20The crate uses [`TensorDyn`] from `edgefirst_tensor` to represent images,
21with [`PixelFormat`] metadata describing the pixel layout. The
22[`ImageProcessor`] struct manages the conversion process, selecting
23the appropriate conversion method based on the available hardware.
24
25## Examples
26
27```rust
28# use edgefirst_image::{ImageProcessor, Rotation, Flip, Crop, ImageProcessorTrait, load_image};
29# use edgefirst_tensor::{PixelFormat, DType, TensorDyn};
30# fn main() -> Result<(), edgefirst_image::Error> {
31let image = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/../../testdata/zidane.jpg"));
32let src = load_image(image, Some(PixelFormat::Rgba), None)?;
33let mut converter = ImageProcessor::new()?;
34let mut dst = converter.create_image(640, 480, PixelFormat::Rgb, DType::U8, None)?;
35converter.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())?;
36# Ok(())
37# }
38```
39
40## Environment Variables
41The behavior of the `edgefirst_image::ImageProcessor` struct can be influenced by the
42following environment variables:
43- `EDGEFIRST_FORCE_BACKEND`: When set to `cpu`, `g2d`, or `opengl` (case-insensitive),
44  only that single backend is initialized and no fallback chain is used. If the
45  forced backend fails to initialize, an error is returned immediately. This is
46  useful for benchmarking individual backends in isolation. When this variable is
47  set, the `EDGEFIRST_DISABLE_*` variables are ignored.
48- `EDGEFIRST_DISABLE_GL`: If set to `1`, disables the use of OpenGL for image
49  conversion, forcing the use of CPU or other available hardware methods.
50- `EDGEFIRST_DISABLE_G2D`: If set to `1`, disables the use of G2D for image
51  conversion, forcing the use of CPU or other available hardware methods.
52- `EDGEFIRST_DISABLE_CPU`: If set to `1`, disables the use of CPU for image
53  conversion, forcing the use of hardware acceleration methods. If no hardware
54  acceleration methods are available, an error will be returned when attempting
55  to create an `ImageProcessor`.
56
57Additionally the TensorMemory used by default allocations can be controlled using the
58`EDGEFIRST_TENSOR_FORCE_MEM` environment variable. If set to `1`, default tensor memory
59uses system memory. This will disable the use of specialized memory regions for tensors
60and hardware acceleration. However, this will increase the performance of the CPU converter.
61*/
62#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
63
64/// Pitch alignment requirement for DMA-BUF tensors that may be imported as
65/// EGLImages by the GL backend. Mali Valhall (i.MX 95 / G310) rejects
66/// `eglCreateImageKHR` with `EGL_BAD_ALLOC` for any DMA-BUF whose row pitch
67/// is not a multiple of 64 bytes; Vivante GC7000UL (i.MX 8MP) accepts any
68/// pitch so the constant is harmless on that path. 64 is the smallest
69/// alignment that satisfies every embedded ARM GPU we ship to.
70///
71/// Applied automatically inside [`ImageProcessor::create_image`] when the
72/// allocation lands on `TensorMemory::Dma`. External callers that allocate
73/// their own DMA-BUF tensors (e.g. GStreamer plugins, video pipelines) can
74/// use [`align_width_for_gpu_pitch`] to compute a width whose resulting row
75/// stride satisfies this requirement.
76pub const GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES: usize = 64;
77
78/// Round `width` (in pixels) up so the resulting row stride
79/// `width * bpp` is a multiple of [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`]
80/// AND a multiple of `bpp` (so the rounded width is an integer pixel count).
81///
82/// `bpp` must be the per-pixel byte count for the image's primary plane
83/// (e.g. 4 for RGBA8/BGRA8, 3 for RGB888, 1 for Grey/NV12-luma).
84///
85/// External callers — GStreamer plugins, video pipelines, anyone wrapping a
86/// foreign DMA-BUF — should call this when sizing the destination so that
87/// `eglCreateImageKHR` doesn't reject the import on Mali. Pre-aligned widths
88/// (640, 1280, 1920, 3008, 3840 …) round-trip unchanged; misaligned widths
89/// are bumped up to the next valid value.
90///
91/// # Overflow behaviour
92///
93/// All arithmetic is checked. If the alignment computation or the rounded
94/// width would overflow `usize`, the function logs a warning and returns the
95/// original `width` unchanged rather than wrapping or producing a smaller
96/// value. Callers can rely on the returned width being **at least** the
97/// requested width.
98///
99/// `bpp == 0` and `width == 0` short-circuit to return the input unchanged.
100///
101/// # Examples
102///
103/// ```
104/// use edgefirst_image::align_width_for_gpu_pitch;
105///
106/// // RGBA8 (bpp=4): width must round to a multiple of 16 pixels (64-byte stride).
107/// assert_eq!(align_width_for_gpu_pitch(1920, 4), 1920); // already aligned
108/// assert_eq!(align_width_for_gpu_pitch(3004, 4), 3008); // crowd.png case: +4 px
109/// assert_eq!(align_width_for_gpu_pitch(1281, 4), 1296); // +15 px
110///
111/// // RGB888 (bpp=3): width must round to a multiple of 64 pixels (192-byte stride).
112/// assert_eq!(align_width_for_gpu_pitch(640, 3), 640);
113/// assert_eq!(align_width_for_gpu_pitch(641, 3), 704);
114/// ```
115pub fn align_width_for_gpu_pitch(width: usize, bpp: usize) -> usize {
116    if bpp == 0 || width == 0 {
117        return width;
118    }
119
120    // The minimum aligned stride must be a common multiple of both the
121    // GPU's pitch alignment and the per-pixel byte count. Using the LCM
122    // guarantees the rounded stride is an integer multiple of `bpp`, so
123    // converting back to a pixel count is exact.
124    //
125    // Compute the alignment in pixels (`width_alignment`) so we never need
126    // to multiply `width * bpp`, which is the only operation that could
127    // realistically overflow for large caller-supplied widths.
128    let Some(lcm_alignment) = checked_num_integer_lcm(GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES, bpp)
129    else {
130        log::warn!(
131            "align_width_for_gpu_pitch: lcm({GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES}, {bpp}) \
132             overflows usize, returning unaligned width {width}"
133        );
134        return width;
135    };
136    if lcm_alignment == 0 {
137        return width;
138    }
139
140    debug_assert_eq!(lcm_alignment % bpp, 0);
141    let width_alignment = lcm_alignment / bpp;
142    if width_alignment == 0 {
143        return width;
144    }
145
146    let remainder = width % width_alignment;
147    if remainder == 0 {
148        return width;
149    }
150
151    let pad = width_alignment - remainder;
152    match width.checked_add(pad) {
153        Some(aligned) => aligned,
154        None => {
155            log::warn!(
156                "align_width_for_gpu_pitch: width {width} + pad {pad} overflows usize, \
157                 returning unaligned (caller should use a smaller width or pre-aligned size)"
158            );
159            width
160        }
161    }
162}
163
164/// Round `min_pitch_bytes` up to the next multiple of
165/// [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`]. Returns `None` if the rounded
166/// value would overflow `usize`. Returns `Some(0)` for input 0.
167///
168/// Used internally by [`ImageProcessor::create_image`] to compute the
169/// padded row stride for DMA-backed image allocations. External callers
170/// that need pixel-counted alignment (instead of raw byte pitch) should
171/// use [`align_width_for_gpu_pitch`] instead.
172#[cfg(target_os = "linux")]
173pub(crate) fn align_pitch_bytes_to_gpu_alignment(min_pitch_bytes: usize) -> Option<usize> {
174    let alignment = GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES;
175    if min_pitch_bytes == 0 {
176        return Some(0);
177    }
178    let remainder = min_pitch_bytes % alignment;
179    if remainder == 0 {
180        return Some(min_pitch_bytes);
181    }
182    min_pitch_bytes.checked_add(alignment - remainder)
183}
184
185/// Overflow-safe least common multiple. Returns `None` when `(a / gcd) * b`
186/// would wrap.
187fn checked_num_integer_lcm(a: usize, b: usize) -> Option<usize> {
188    if a == 0 || b == 0 {
189        return Some(0);
190    }
191    let g = num_integer_gcd(a, b);
192    // a / g is exact (g divides a by definition) and at most a, so this
193    // division never panics. Only the subsequent multiply can overflow.
194    (a / g).checked_mul(b)
195}
196
197fn num_integer_gcd(a: usize, b: usize) -> usize {
198    if b == 0 {
199        a
200    } else {
201        num_integer_gcd(b, a % b)
202    }
203}
204
205/// Bytes-per-pixel for the primary plane of `format` at element size `elem`.
206/// Returns `None` for formats that don't have a single packed BPP (semi-planar
207/// chroma is handled separately, returning the luma-plane bpp).
208///
209/// External callers can use this together with [`align_width_for_gpu_pitch`]
210/// to size their own DMA-BUFs without having to remember per-format BPPs:
211///
212/// ```
213/// use edgefirst_image::{align_width_for_gpu_pitch, primary_plane_bpp};
214/// use edgefirst_tensor::PixelFormat;
215///
216/// let bpp = primary_plane_bpp(PixelFormat::Rgba, 1).unwrap();
217/// let aligned = align_width_for_gpu_pitch(3004, bpp);
218/// assert_eq!(aligned, 3008);
219/// ```
220pub fn primary_plane_bpp(format: PixelFormat, elem: usize) -> Option<usize> {
221    use edgefirst_tensor::PixelLayout;
222    match format.layout() {
223        PixelLayout::Packed => Some(format.channels() * elem),
224        PixelLayout::Planar => Some(elem),
225        // For NV12/NV16 the luma plane is single-channel so the pitch
226        // matches `elem`; the chroma plane uses the same pitch in bytes
227        // (UV is half-width but two interleaved channels = same pitch).
228        PixelLayout::SemiPlanar => Some(elem),
229        // `PixelLayout` is non-exhaustive — fall through unaligned for
230        // any future variant we don't yet recognise.
231        _ => None,
232    }
233}
234
235use edgefirst_decoder::{DetectBox, ProtoData, Segmentation};
236use edgefirst_tensor::{
237    DType, PixelFormat, PixelLayout, Tensor, TensorDyn, TensorMemory, TensorTrait as _,
238};
239use enum_dispatch::enum_dispatch;
240use std::{fmt::Display, time::Instant};
241use zune_jpeg::{
242    zune_core::{colorspace::ColorSpace, options::DecoderOptions},
243    JpegDecoder,
244};
245use zune_png::PngDecoder;
246
247pub use cpu::CPUProcessor;
248pub use error::{Error, Result};
249#[cfg(target_os = "linux")]
250pub use g2d::G2DProcessor;
251#[cfg(target_os = "linux")]
252#[cfg(feature = "opengl")]
253pub use opengl_headless::GLProcessorThreaded;
254#[cfg(target_os = "linux")]
255#[cfg(feature = "opengl")]
256pub use opengl_headless::Int8InterpolationMode;
257#[cfg(target_os = "linux")]
258#[cfg(feature = "opengl")]
259pub use opengl_headless::{probe_egl_displays, EglDisplayInfo, EglDisplayKind};
260
261mod cpu;
262mod error;
263mod g2d;
264#[path = "gl/mod.rs"]
265mod opengl_headless;
266
267// Use `edgefirst_tensor::PixelFormat` variants (Rgb, Rgba, Grey, etc.) and
268// `TensorDyn` / `Tensor<u8>` with `.format()` metadata instead.
269
270/// Flips the image data, then rotates it. Returns a new `TensorDyn`.
271fn rotate_flip_to_dyn(
272    src: &Tensor<u8>,
273    src_fmt: PixelFormat,
274    rotation: Rotation,
275    flip: Flip,
276    memory: Option<TensorMemory>,
277) -> Result<TensorDyn, Error> {
278    let src_w = src.width().unwrap();
279    let src_h = src.height().unwrap();
280    let channels = src_fmt.channels();
281
282    let (dst_w, dst_h) = match rotation {
283        Rotation::None | Rotation::Rotate180 => (src_w, src_h),
284        Rotation::Clockwise90 | Rotation::CounterClockwise90 => (src_h, src_w),
285    };
286
287    let dst = Tensor::<u8>::image(dst_w, dst_h, src_fmt, memory)?;
288    let src_map = src.map()?;
289    let mut dst_map = dst.map()?;
290
291    CPUProcessor::flip_rotate_ndarray_pf(
292        &src_map,
293        &mut dst_map,
294        dst_w,
295        dst_h,
296        channels,
297        rotation,
298        flip,
299    )?;
300    drop(dst_map);
301    drop(src_map);
302
303    Ok(TensorDyn::from(dst))
304}
305
306#[derive(Debug, Clone, Copy, PartialEq, Eq)]
307pub enum Rotation {
308    None = 0,
309    Clockwise90 = 1,
310    Rotate180 = 2,
311    CounterClockwise90 = 3,
312}
313impl Rotation {
314    /// Creates a Rotation enum from an angle in degrees. The angle must be a
315    /// multiple of 90.
316    ///
317    /// # Panics
318    /// Panics if the angle is not a multiple of 90.
319    ///
320    /// # Examples
321    /// ```rust
322    /// # use edgefirst_image::Rotation;
323    /// let rotation = Rotation::from_degrees_clockwise(270);
324    /// assert_eq!(rotation, Rotation::CounterClockwise90);
325    /// ```
326    pub fn from_degrees_clockwise(angle: usize) -> Rotation {
327        match angle.rem_euclid(360) {
328            0 => Rotation::None,
329            90 => Rotation::Clockwise90,
330            180 => Rotation::Rotate180,
331            270 => Rotation::CounterClockwise90,
332            _ => panic!("rotation angle is not a multiple of 90"),
333        }
334    }
335}
336
337#[derive(Debug, Clone, Copy, PartialEq, Eq)]
338pub enum Flip {
339    None = 0,
340    Vertical = 1,
341    Horizontal = 2,
342}
343
344/// Controls how the color palette index is chosen for each detected object.
345#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
346pub enum ColorMode {
347    /// Color is chosen by object class label (`det.label`). Default.
348    ///
349    /// Preserves backward compatibility and is correct for semantic
350    /// segmentation where colors carry class meaning.
351    #[default]
352    Class,
353    /// Color is chosen by instance order (loop index, zero-based).
354    ///
355    /// Each detected object gets a unique color regardless of class,
356    /// useful for instance segmentation.
357    Instance,
358    /// Color is chosen by track ID (future use; currently behaves like
359    /// [`Instance`](Self::Instance)).
360    Track,
361}
362
363impl ColorMode {
364    /// Return the palette index for a detection given its loop index and label.
365    #[inline]
366    pub fn index(self, idx: usize, label: usize) -> usize {
367        match self {
368            ColorMode::Class => label,
369            ColorMode::Instance | ColorMode::Track => idx,
370        }
371    }
372}
373
374/// Controls the resolution and coordinate frame of masks produced by
375/// [`ImageProcessor::materialize_masks`].
376///
377/// - [`Proto`](Self::Proto) returns per-detection tiles at proto-plane
378///   resolution (e.g. 48×32 u8 for a typical COCO bbox on a 160×160 proto
379///   plane). This is the historical behavior of `materialize_masks` and the
380///   fastest path because no upsample runs inside HAL. Mask values are
381///   continuous sigmoid output quantized to `uint8 [0, 255]`.
382/// - [`Scaled`](Self::Scaled) returns per-detection tiles at caller-specified
383///   pixel resolution by upsampling the full proto plane once and cropping by
384///   bbox after sigmoid. The upsample uses bilinear interpolation with
385///   edge-clamp sampling — semantically equivalent to Ultralytics'
386///   `process_masks_retina` reference. When a `letterbox` is also passed to
387///   [`materialize_masks`], the inverse letterbox transform is applied during
388///   the upsample so mask pixels land in original-content coordinates
389///   (drop-in for overlay on the original image). Mask values are binary
390///   `uint8 {0, 255}` after thresholding sigmoid > 0.5 — interchangeable
391///   with `Proto` output via the same `> 127` test.
392///
393/// [`materialize_masks`]: ImageProcessor::materialize_masks
394#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
395pub enum MaskResolution {
396    /// Per-detection tile at proto-plane resolution (default).
397    #[default]
398    Proto,
399    /// Per-detection tile at `(width, height)` pixel resolution in the
400    /// coordinate frame determined by the `letterbox` parameter of
401    /// [`ImageProcessor::materialize_masks`].
402    Scaled {
403        /// Target pixel width of the output coordinate frame.
404        width: u32,
405        /// Target pixel height of the output coordinate frame.
406        height: u32,
407    },
408}
409
410/// Options for mask overlay rendering.
411///
412/// Controls how segmentation masks are composited onto the destination image:
413/// - `background`: when set, the background image is drawn first and masks
414///   are composited over it (result written to `dst`). When `None`, `dst` is
415///   cleared to `0x00000000` (fully transparent) before masks are drawn.
416///   **`dst` is always fully overwritten — its prior contents are never
417///   preserved.** Callers who used to pre-load an image into `dst` before
418///   calling `draw_decoded_masks` / `draw_proto_masks` must now supply that
419///   image via `background` instead (behaviour changed in v0.16.4).
420/// - `opacity`: scales the alpha of rendered mask colors. `1.0` (default)
421///   preserves the class color's alpha unchanged; `0.5` makes masks
422///   semi-transparent.
423/// - `color_mode`: controls whether colors are assigned by class label,
424///   instance index, or track ID. Defaults to [`ColorMode::Class`].
425#[derive(Debug, Clone, Copy)]
426pub struct MaskOverlay<'a> {
427    /// Compositing source image. Must have the same dimensions and pixel
428    /// format as `dst`. When `Some`, the output is `background + masks`.
429    /// When `None`, `dst` is cleared to `0x00000000` before masks are drawn.
430    pub background: Option<&'a TensorDyn>,
431    pub opacity: f32,
432    /// Normalized letterbox region `[xmin, ymin, xmax, ymax]` in model-input
433    /// space that contains actual image content (the rest is padding).
434    ///
435    /// When set, bounding boxes and mask coordinates from the decoder (which
436    /// are in model-input normalized space) are mapped back to the original
437    /// image coordinate space before rendering.
438    ///
439    /// Use [`with_letterbox_crop`](Self::with_letterbox_crop) to compute this
440    /// from the [`Crop`] that was used in the model input [`convert`](crate::ImageProcessorTrait::convert) call.
441    pub letterbox: Option<[f32; 4]>,
442    pub color_mode: ColorMode,
443}
444
445impl Default for MaskOverlay<'_> {
446    fn default() -> Self {
447        Self {
448            background: None,
449            opacity: 1.0,
450            letterbox: None,
451            color_mode: ColorMode::Class,
452        }
453    }
454}
455
456impl<'a> MaskOverlay<'a> {
457    pub fn new() -> Self {
458        Self::default()
459    }
460
461    /// Set the compositing source image.
462    ///
463    /// `bg` must have the same dimensions and pixel format as the `dst` passed
464    /// to [`draw_decoded_masks`](crate::ImageProcessorTrait::draw_decoded_masks) /
465    /// [`draw_proto_masks`](crate::ImageProcessorTrait::draw_proto_masks).
466    /// The output will be `bg + masks`. Without a background, `dst` is cleared
467    /// to `0x00000000`.
468    pub fn with_background(mut self, bg: &'a TensorDyn) -> Self {
469        self.background = Some(bg);
470        self
471    }
472
473    pub fn with_opacity(mut self, opacity: f32) -> Self {
474        self.opacity = opacity.clamp(0.0, 1.0);
475        self
476    }
477
478    pub fn with_color_mode(mut self, mode: ColorMode) -> Self {
479        self.color_mode = mode;
480        self
481    }
482
483    /// Set the letterbox transform from the [`Crop`] used when preparing the
484    /// model input, so that bounding boxes and masks are correctly mapped back
485    /// to the original image coordinate space during rendering.
486    ///
487    /// Pass the same `crop` that was given to
488    /// [`convert`](crate::ImageProcessorTrait::convert) along with the model
489    /// input dimensions (`model_w` × `model_h`).
490    ///
491    /// Has no effect when `crop.dst_rect` is `None` (no letterbox applied).
492    pub fn with_letterbox_crop(mut self, crop: &Crop, model_w: usize, model_h: usize) -> Self {
493        if let Some(r) = crop.dst_rect {
494            self.letterbox = Some([
495                r.left as f32 / model_w as f32,
496                r.top as f32 / model_h as f32,
497                (r.left + r.width) as f32 / model_w as f32,
498                (r.top + r.height) as f32 / model_h as f32,
499            ]);
500        }
501        self
502    }
503}
504
505/// Apply the inverse letterbox transform to a bounding box.
506///
507/// `letterbox` is `[lx0, ly0, lx1, ly1]` — the normalized region of the model
508/// input that contains actual image content (output of
509/// [`MaskOverlay::with_letterbox_crop`]).
510///
511/// Converts model-input-normalized coords to output-image-normalized coords,
512/// clamped to `[0.0, 1.0]`. Also canonicalises the bbox (ensures xmin ≤ xmax).
513#[inline]
514fn unletter_bbox(bbox: DetectBox, lb: [f32; 4]) -> DetectBox {
515    let b = bbox.bbox.to_canonical();
516    let [lx0, ly0, lx1, ly1] = lb;
517    let inv_w = if lx1 > lx0 { 1.0 / (lx1 - lx0) } else { 1.0 };
518    let inv_h = if ly1 > ly0 { 1.0 / (ly1 - ly0) } else { 1.0 };
519    DetectBox {
520        bbox: edgefirst_decoder::BoundingBox {
521            xmin: ((b.xmin - lx0) * inv_w).clamp(0.0, 1.0),
522            ymin: ((b.ymin - ly0) * inv_h).clamp(0.0, 1.0),
523            xmax: ((b.xmax - lx0) * inv_w).clamp(0.0, 1.0),
524            ymax: ((b.ymax - ly0) * inv_h).clamp(0.0, 1.0),
525        },
526        ..bbox
527    }
528}
529
530#[derive(Debug, Clone, Copy, PartialEq, Eq)]
531pub struct Crop {
532    pub src_rect: Option<Rect>,
533    pub dst_rect: Option<Rect>,
534    pub dst_color: Option<[u8; 4]>,
535}
536
537impl Default for Crop {
538    fn default() -> Self {
539        Crop::new()
540    }
541}
542impl Crop {
543    // Creates a new Crop with default values (no cropping).
544    pub fn new() -> Self {
545        Crop {
546            src_rect: None,
547            dst_rect: None,
548            dst_color: None,
549        }
550    }
551
552    // Sets the source rectangle for cropping.
553    pub fn with_src_rect(mut self, src_rect: Option<Rect>) -> Self {
554        self.src_rect = src_rect;
555        self
556    }
557
558    // Sets the destination rectangle for cropping.
559    pub fn with_dst_rect(mut self, dst_rect: Option<Rect>) -> Self {
560        self.dst_rect = dst_rect;
561        self
562    }
563
564    // Sets the destination color for areas outside the cropped region.
565    pub fn with_dst_color(mut self, dst_color: Option<[u8; 4]>) -> Self {
566        self.dst_color = dst_color;
567        self
568    }
569
570    // Creates a new Crop with no cropping.
571    pub fn no_crop() -> Self {
572        Crop::new()
573    }
574
575    /// Validate crop rectangles against explicit dimensions.
576    pub(crate) fn check_crop_dims(
577        &self,
578        src_w: usize,
579        src_h: usize,
580        dst_w: usize,
581        dst_h: usize,
582    ) -> Result<(), Error> {
583        let src_ok = self
584            .src_rect
585            .is_none_or(|r| r.left + r.width <= src_w && r.top + r.height <= src_h);
586        let dst_ok = self
587            .dst_rect
588            .is_none_or(|r| r.left + r.width <= dst_w && r.top + r.height <= dst_h);
589        match (src_ok, dst_ok) {
590            (true, true) => Ok(()),
591            (true, false) => Err(Error::CropInvalid(format!(
592                "Dest crop invalid: {:?}",
593                self.dst_rect
594            ))),
595            (false, true) => Err(Error::CropInvalid(format!(
596                "Src crop invalid: {:?}",
597                self.src_rect
598            ))),
599            (false, false) => Err(Error::CropInvalid(format!(
600                "Dest and Src crop invalid: {:?} {:?}",
601                self.dst_rect, self.src_rect
602            ))),
603        }
604    }
605
606    /// Validate crop rectangles against TensorDyn source and destination.
607    pub fn check_crop_dyn(
608        &self,
609        src: &edgefirst_tensor::TensorDyn,
610        dst: &edgefirst_tensor::TensorDyn,
611    ) -> Result<(), Error> {
612        self.check_crop_dims(
613            src.width().unwrap_or(0),
614            src.height().unwrap_or(0),
615            dst.width().unwrap_or(0),
616            dst.height().unwrap_or(0),
617        )
618    }
619}
620
621#[derive(Debug, Clone, Copy, PartialEq, Eq)]
622pub struct Rect {
623    pub left: usize,
624    pub top: usize,
625    pub width: usize,
626    pub height: usize,
627}
628
629impl Rect {
630    // Creates a new Rect with the specified left, top, width, and height.
631    pub fn new(left: usize, top: usize, width: usize, height: usize) -> Self {
632        Self {
633            left,
634            top,
635            width,
636            height,
637        }
638    }
639
640    // Checks if the rectangle is valid for the given TensorDyn image.
641    pub fn check_rect_dyn(&self, image: &TensorDyn) -> bool {
642        let w = image.width().unwrap_or(0);
643        let h = image.height().unwrap_or(0);
644        self.left + self.width <= w && self.top + self.height <= h
645    }
646}
647
648#[enum_dispatch(ImageProcessor)]
649pub trait ImageProcessorTrait {
650    /// Converts the source image to the destination image format and size. The
651    /// image is cropped first, then flipped, then rotated
652    ///
653    /// # Arguments
654    ///
655    /// * `dst` - The destination image to be converted to.
656    /// * `src` - The source image to convert from.
657    /// * `rotation` - The rotation to apply to the destination image.
658    /// * `flip` - Flips the image
659    /// * `crop` - An optional rectangle specifying the area to crop from the
660    ///   source image
661    ///
662    /// # Returns
663    ///
664    /// A `Result` indicating success or failure of the conversion.
665    fn convert(
666        &mut self,
667        src: &TensorDyn,
668        dst: &mut TensorDyn,
669        rotation: Rotation,
670        flip: Flip,
671        crop: Crop,
672    ) -> Result<()>;
673
674    /// Draw pre-decoded detection boxes and segmentation masks onto `dst`.
675    ///
676    /// Supports two segmentation modes based on the mask channel count:
677    /// - **Instance segmentation** (`C=1`): one `Segmentation` per detection,
678    ///   `segmentation` and `detect` are zipped.
679    /// - **Semantic segmentation** (`C>1`): a single `Segmentation` covering
680    ///   all classes; only the first element is used.
681    ///
682    /// # Format requirements
683    ///
684    /// - CPU backend: `dst` must be `RGBA` or `RGB`.
685    /// - OpenGL backend: `dst` must be `RGBA`, `BGRA`, or `RGB`.
686    /// - G2D backend: only produces the base frame (empty detections);
687    ///   returns `NotImplemented` when any detection or segmentation is
688    ///   supplied.
689    ///
690    /// # Output contract
691    ///
692    /// This function always fully writes `dst` — it never relies on the
693    /// caller having pre-cleared the destination. The four cases are:
694    ///
695    /// | detections | background | output                              |
696    /// |------------|------------|-------------------------------------|
697    /// | none       | none       | dst cleared to `0x00000000`         |
698    /// | none       | set        | dst ← background                    |
699    /// | set        | none       | masks drawn over cleared dst        |
700    /// | set        | set        | masks drawn over background         |
701    ///
702    /// Each backend implements this with its native primitives: G2D uses
703    /// `g2d_clear` / `g2d_blit`, OpenGL uses `glClear` / DMA-BUF GPU blit
704    /// plus the mask program, and CPU uses direct buffer fill / memcpy as
705    /// the terminal fallback. CPU-memcpy of DMA buffers is avoided on the
706    /// accelerated paths.
707    ///
708    /// An empty `segmentation` slice is valid — only bounding boxes are drawn.
709    ///
710    /// `overlay` controls compositing: `background` is the compositing source
711    /// (must match `dst` in size and format); `opacity` scales mask alpha.
712    ///
713    /// # Buffer aliasing
714    ///
715    /// `dst` and `overlay.background` must reference **distinct underlying
716    /// buffers**. An aliased pair returns [`Error::AliasedBuffers`] without
717    /// dispatching to any backend — the GL path would otherwise read and
718    /// write the same texture in a single draw, which is undefined behaviour
719    /// on most drivers. Aliasing is detected via
720    /// [`TensorDyn::aliases`](edgefirst_tensor::TensorDyn::aliases), which
721    /// catches both shared-allocation clones and separate imports over the
722    /// same dmabuf fd.
723    ///
724    /// # Migration from v0.16.3 and earlier
725    ///
726    /// Prior to v0.16.4 the call silently preserved `dst`'s contents on empty
727    /// detections. That invariant no longer holds — `dst` is always fully
728    /// written. Callers who pre-loaded an image into `dst` before calling this
729    /// function must now pass that image via `overlay.background` instead.
730    fn draw_decoded_masks(
731        &mut self,
732        dst: &mut TensorDyn,
733        detect: &[DetectBox],
734        segmentation: &[Segmentation],
735        overlay: MaskOverlay<'_>,
736    ) -> Result<()>;
737
738    /// Draw masks from proto data onto image (fused decode+draw).
739    ///
740    /// For YOLO segmentation models, this avoids materializing intermediate
741    /// `Array3<u8>` masks. The `ProtoData` contains mask coefficients and the
742    /// prototype tensor; the renderer computes `mask_coeff @ protos` directly
743    /// at the output resolution using bilinear sampling.
744    ///
745    /// `detect` and `proto_data.mask_coefficients` must have the same length
746    /// (enforced by zip — excess entries are silently ignored). An empty
747    /// `detect` slice is valid and produces the base frame — cleared or
748    /// background-blitted — via the selected backend's native primitive.
749    ///
750    /// # Format requirements and output contract
751    ///
752    /// Same as [`draw_decoded_masks`](Self::draw_decoded_masks), including
753    /// the "always fully writes dst" guarantee across all four
754    /// detection/background combinations.
755    ///
756    /// `overlay` controls compositing — see [`draw_decoded_masks`](Self::draw_decoded_masks).
757    fn draw_proto_masks(
758        &mut self,
759        dst: &mut TensorDyn,
760        detect: &[DetectBox],
761        proto_data: &ProtoData,
762        overlay: MaskOverlay<'_>,
763    ) -> Result<()>;
764
765    /// Sets the colors used for rendering segmentation masks. Up to 20 colors
766    /// can be set.
767    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()>;
768}
769
770/// Configuration for [`ImageProcessor`] construction.
771///
772/// Use with [`ImageProcessor::with_config`] to override the default EGL
773/// display auto-detection and backend selection. The default configuration
774/// preserves the existing auto-detection behaviour.
775#[derive(Debug, Clone, Default)]
776pub struct ImageProcessorConfig {
777    /// Force OpenGL to use this EGL display type instead of auto-detecting.
778    ///
779    /// When `None`, the processor probes displays in priority order: GBM,
780    /// PlatformDevice, Default. Use [`probe_egl_displays`] to discover
781    /// which displays are available on the current system.
782    ///
783    /// Ignored when `EDGEFIRST_DISABLE_GL=1` is set.
784    #[cfg(target_os = "linux")]
785    #[cfg(feature = "opengl")]
786    pub egl_display: Option<EglDisplayKind>,
787
788    /// Preferred compute backend.
789    ///
790    /// When set to a specific backend (not [`ComputeBackend::Auto`]), the
791    /// processor initializes that backend with no fallback — returns an error if the conversion is not supported.
792    /// This takes precedence over `EDGEFIRST_FORCE_BACKEND` and the
793    /// `EDGEFIRST_DISABLE_*` environment variables.
794    ///
795    /// - [`ComputeBackend::OpenGl`]: init OpenGL + CPU, skip G2D
796    /// - [`ComputeBackend::G2d`]: init G2D + CPU, skip OpenGL
797    /// - [`ComputeBackend::Cpu`]: init CPU only
798    /// - [`ComputeBackend::Auto`]: existing env-var-driven selection
799    pub backend: ComputeBackend,
800}
801
802/// Compute backend selection for [`ImageProcessor`].
803///
804/// Use with [`ImageProcessorConfig::backend`] to select which backend the
805/// processor should prefer. When a specific backend is selected, the
806/// processor initializes that backend plus CPU as a fallback. When `Auto`
807/// is used, the existing environment-variable-driven selection applies.
808#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
809pub enum ComputeBackend {
810    /// Auto-detect based on available hardware and environment variables.
811    #[default]
812    Auto,
813    /// CPU-only processing (no hardware acceleration).
814    Cpu,
815    /// Prefer G2D hardware blitter (+ CPU fallback).
816    G2d,
817    /// Prefer OpenGL ES (+ CPU fallback).
818    OpenGl,
819}
820
821/// Backend forced via the `EDGEFIRST_FORCE_BACKEND` environment variable
822/// or [`ImageProcessorConfig::backend`].
823///
824/// When set, the [`ImageProcessor`] only initializes and dispatches to the
825/// selected backend — no fallback chain is used.
826#[derive(Debug, Clone, Copy, PartialEq, Eq)]
827pub(crate) enum ForcedBackend {
828    Cpu,
829    G2d,
830    OpenGl,
831}
832
833/// Image converter that uses available hardware acceleration or CPU as a
834/// fallback.
835#[derive(Debug)]
836pub struct ImageProcessor {
837    /// CPU-based image converter as a fallback. This is only None if the
838    /// EDGEFIRST_DISABLE_CPU environment variable is set.
839    pub cpu: Option<CPUProcessor>,
840
841    #[cfg(target_os = "linux")]
842    /// G2D-based image converter for Linux systems. This is only available if
843    /// the EDGEFIRST_DISABLE_G2D environment variable is not set and libg2d.so
844    /// is available.
845    pub g2d: Option<G2DProcessor>,
846    #[cfg(target_os = "linux")]
847    #[cfg(feature = "opengl")]
848    /// OpenGL-based image converter for Linux systems. This is only available
849    /// if the EDGEFIRST_DISABLE_GL environment variable is not set and OpenGL
850    /// ES is available.
851    pub opengl: Option<GLProcessorThreaded>,
852
853    /// When set, only the specified backend is used — no fallback chain.
854    pub(crate) forced_backend: Option<ForcedBackend>,
855}
856
857unsafe impl Send for ImageProcessor {}
858unsafe impl Sync for ImageProcessor {}
859
860impl ImageProcessor {
861    /// Creates a new `ImageProcessor` instance, initializing available
862    /// hardware converters based on the system capabilities and environment
863    /// variables.
864    ///
865    /// # Examples
866    /// ```rust
867    /// # use edgefirst_image::{ImageProcessor, Rotation, Flip, Crop, ImageProcessorTrait, load_image};
868    /// # use edgefirst_tensor::{PixelFormat, DType, TensorDyn};
869    /// # fn main() -> Result<(), edgefirst_image::Error> {
870    /// let image = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/../../testdata/zidane.jpg"));
871    /// let src = load_image(image, Some(PixelFormat::Rgba), None)?;
872    /// let mut converter = ImageProcessor::new()?;
873    /// let mut dst = converter.create_image(640, 480, PixelFormat::Rgb, DType::U8, None)?;
874    /// converter.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())?;
875    /// # Ok(())
876    /// # }
877    /// ```
878    pub fn new() -> Result<Self> {
879        Self::with_config(ImageProcessorConfig::default())
880    }
881
882    /// Creates a new `ImageProcessor` with the given configuration.
883    ///
884    /// When [`ImageProcessorConfig::backend`] is set to a specific backend,
885    /// environment variables are ignored and the processor initializes the
886    /// requested backend plus CPU as a fallback.
887    ///
888    /// When `Auto`, the existing `EDGEFIRST_FORCE_BACKEND` and
889    /// `EDGEFIRST_DISABLE_*` environment variables apply.
890    #[allow(unused_variables)]
891    pub fn with_config(config: ImageProcessorConfig) -> Result<Self> {
892        // ── Config-driven backend selection ──────────────────────────
893        // When the caller explicitly requests a backend via the config,
894        // skip all environment variable logic.
895        match config.backend {
896            ComputeBackend::Cpu => {
897                log::info!("ComputeBackend::Cpu — CPU only");
898                return Ok(Self {
899                    cpu: Some(CPUProcessor::new()),
900                    #[cfg(target_os = "linux")]
901                    g2d: None,
902                    #[cfg(target_os = "linux")]
903                    #[cfg(feature = "opengl")]
904                    opengl: None,
905                    forced_backend: None,
906                });
907            }
908            ComputeBackend::G2d => {
909                log::info!("ComputeBackend::G2d — G2D + CPU fallback");
910                #[cfg(target_os = "linux")]
911                {
912                    let g2d = match G2DProcessor::new() {
913                        Ok(g) => Some(g),
914                        Err(e) => {
915                            log::warn!("G2D requested but failed to initialize: {e:?}");
916                            None
917                        }
918                    };
919                    return Ok(Self {
920                        cpu: Some(CPUProcessor::new()),
921                        g2d,
922                        #[cfg(feature = "opengl")]
923                        opengl: None,
924                        forced_backend: None,
925                    });
926                }
927                #[cfg(not(target_os = "linux"))]
928                {
929                    log::warn!("G2D requested but not available on this platform, using CPU");
930                    return Ok(Self {
931                        cpu: Some(CPUProcessor::new()),
932                        forced_backend: None,
933                    });
934                }
935            }
936            ComputeBackend::OpenGl => {
937                log::info!("ComputeBackend::OpenGl — OpenGL + CPU fallback");
938                #[cfg(target_os = "linux")]
939                {
940                    #[cfg(feature = "opengl")]
941                    let opengl = match GLProcessorThreaded::new(config.egl_display) {
942                        Ok(gl) => Some(gl),
943                        Err(e) => {
944                            log::warn!("OpenGL requested but failed to initialize: {e:?}");
945                            None
946                        }
947                    };
948                    return Ok(Self {
949                        cpu: Some(CPUProcessor::new()),
950                        g2d: None,
951                        #[cfg(feature = "opengl")]
952                        opengl,
953                        forced_backend: None,
954                    });
955                }
956                #[cfg(not(target_os = "linux"))]
957                {
958                    log::warn!("OpenGL requested but not available on this platform, using CPU");
959                    return Ok(Self {
960                        cpu: Some(CPUProcessor::new()),
961                        forced_backend: None,
962                    });
963                }
964            }
965            ComputeBackend::Auto => { /* fall through to env-var logic below */ }
966        }
967
968        // ── EDGEFIRST_FORCE_BACKEND ──────────────────────────────────
969        // When set, only the requested backend is initialised and no
970        // fallback chain is used. Accepted values (case-insensitive):
971        //   "cpu", "g2d", "opengl"
972        if let Ok(val) = std::env::var("EDGEFIRST_FORCE_BACKEND") {
973            let val_lower = val.to_lowercase();
974            let forced = match val_lower.as_str() {
975                "cpu" => ForcedBackend::Cpu,
976                "g2d" => ForcedBackend::G2d,
977                "opengl" => ForcedBackend::OpenGl,
978                other => {
979                    return Err(Error::ForcedBackendUnavailable(format!(
980                        "unknown EDGEFIRST_FORCE_BACKEND value: {other:?} (expected cpu, g2d, or opengl)"
981                    )));
982                }
983            };
984
985            log::info!("EDGEFIRST_FORCE_BACKEND={val} — only initializing {val_lower} backend");
986
987            return match forced {
988                ForcedBackend::Cpu => Ok(Self {
989                    cpu: Some(CPUProcessor::new()),
990                    #[cfg(target_os = "linux")]
991                    g2d: None,
992                    #[cfg(target_os = "linux")]
993                    #[cfg(feature = "opengl")]
994                    opengl: None,
995                    forced_backend: Some(ForcedBackend::Cpu),
996                }),
997                ForcedBackend::G2d => {
998                    #[cfg(target_os = "linux")]
999                    {
1000                        let g2d = G2DProcessor::new().map_err(|e| {
1001                            Error::ForcedBackendUnavailable(format!(
1002                                "g2d forced but failed to initialize: {e:?}"
1003                            ))
1004                        })?;
1005                        Ok(Self {
1006                            cpu: None,
1007                            g2d: Some(g2d),
1008                            #[cfg(feature = "opengl")]
1009                            opengl: None,
1010                            forced_backend: Some(ForcedBackend::G2d),
1011                        })
1012                    }
1013                    #[cfg(not(target_os = "linux"))]
1014                    {
1015                        Err(Error::ForcedBackendUnavailable(
1016                            "g2d backend is only available on Linux".into(),
1017                        ))
1018                    }
1019                }
1020                ForcedBackend::OpenGl => {
1021                    #[cfg(target_os = "linux")]
1022                    #[cfg(feature = "opengl")]
1023                    {
1024                        let opengl = GLProcessorThreaded::new(config.egl_display).map_err(|e| {
1025                            Error::ForcedBackendUnavailable(format!(
1026                                "opengl forced but failed to initialize: {e:?}"
1027                            ))
1028                        })?;
1029                        Ok(Self {
1030                            cpu: None,
1031                            g2d: None,
1032                            opengl: Some(opengl),
1033                            forced_backend: Some(ForcedBackend::OpenGl),
1034                        })
1035                    }
1036                    #[cfg(not(all(target_os = "linux", feature = "opengl")))]
1037                    {
1038                        Err(Error::ForcedBackendUnavailable(
1039                            "opengl backend requires Linux with the 'opengl' feature enabled"
1040                                .into(),
1041                        ))
1042                    }
1043                }
1044            };
1045        }
1046
1047        // ── Existing DISABLE logic (unchanged) ──────────────────────
1048        #[cfg(target_os = "linux")]
1049        let g2d = if std::env::var("EDGEFIRST_DISABLE_G2D")
1050            .map(|x| x != "0" && x.to_lowercase() != "false")
1051            .unwrap_or(false)
1052        {
1053            log::debug!("EDGEFIRST_DISABLE_G2D is set");
1054            None
1055        } else {
1056            match G2DProcessor::new() {
1057                Ok(g2d_converter) => Some(g2d_converter),
1058                Err(err) => {
1059                    log::warn!("Failed to initialize G2D converter: {err:?}");
1060                    None
1061                }
1062            }
1063        };
1064
1065        #[cfg(target_os = "linux")]
1066        #[cfg(feature = "opengl")]
1067        let opengl = if std::env::var("EDGEFIRST_DISABLE_GL")
1068            .map(|x| x != "0" && x.to_lowercase() != "false")
1069            .unwrap_or(false)
1070        {
1071            log::debug!("EDGEFIRST_DISABLE_GL is set");
1072            None
1073        } else {
1074            match GLProcessorThreaded::new(config.egl_display) {
1075                Ok(gl_converter) => Some(gl_converter),
1076                Err(err) => {
1077                    log::warn!("Failed to initialize GL converter: {err:?}");
1078                    None
1079                }
1080            }
1081        };
1082
1083        let cpu = if std::env::var("EDGEFIRST_DISABLE_CPU")
1084            .map(|x| x != "0" && x.to_lowercase() != "false")
1085            .unwrap_or(false)
1086        {
1087            log::debug!("EDGEFIRST_DISABLE_CPU is set");
1088            None
1089        } else {
1090            Some(CPUProcessor::new())
1091        };
1092        Ok(Self {
1093            cpu,
1094            #[cfg(target_os = "linux")]
1095            g2d,
1096            #[cfg(target_os = "linux")]
1097            #[cfg(feature = "opengl")]
1098            opengl,
1099            forced_backend: None,
1100        })
1101    }
1102
1103    /// Sets the interpolation mode for int8 proto textures on the OpenGL
1104    /// backend. No-op if OpenGL is not available.
1105    #[cfg(target_os = "linux")]
1106    #[cfg(feature = "opengl")]
1107    pub fn set_int8_interpolation_mode(&mut self, mode: Int8InterpolationMode) -> Result<()> {
1108        if let Some(ref mut gl) = self.opengl {
1109            gl.set_int8_interpolation_mode(mode)?;
1110        }
1111        Ok(())
1112    }
1113
1114    /// Create a [`TensorDyn`] image with the best available memory backend.
1115    ///
1116    /// Priority: DMA-buf → PBO (byte-sized types: u8, i8) → system memory.
1117    ///
1118    /// Use this method instead of [`TensorDyn::image()`] when the tensor will
1119    /// be used with [`ImageProcessor::convert()`]. It selects the optimal
1120    /// memory backing (including PBO for GPU zero-copy) which direct
1121    /// allocation cannot achieve.
1122    ///
1123    /// This method is on [`ImageProcessor`] rather than [`ImageProcessorTrait`]
1124    /// because optimal allocation requires knowledge of the active compute
1125    /// backends (e.g. the GL context handle for PBO allocation). Individual
1126    /// backend implementations ([`CPUProcessor`], etc.) do not have this
1127    /// cross-backend visibility.
1128    ///
1129    /// # Arguments
1130    ///
1131    /// * `width` - Image width in pixels
1132    /// * `height` - Image height in pixels
1133    /// * `format` - Pixel format
1134    /// * `dtype` - Element data type (e.g. `DType::U8`, `DType::I8`)
1135    /// * `memory` - Optional memory type override; when `None`, the best
1136    ///   available backend is selected automatically.
1137    ///
1138    /// # Returns
1139    ///
1140    /// A [`TensorDyn`] backed by the highest-performance memory type
1141    /// available on this system.
1142    ///
1143    /// # Pitch alignment for DMA-backed allocations
1144    ///
1145    /// DMA-BUF imports into the GL backend (Mali Valhall on i.MX 95
1146    /// specifically) require every row pitch to be a multiple of
1147    /// [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`] (currently 64). When this
1148    /// method lands on `TensorMemory::Dma`, the underlying allocation is
1149    /// silently padded so the row stride satisfies that requirement.
1150    ///
1151    /// **The user-requested `width` is preserved** — `tensor.width()`
1152    /// returns the same value you passed in. The padding is carried by
1153    /// [`TensorDyn::row_stride`] / `effective_row_stride()`, which the
1154    /// GL backend reads when importing the buffer as an EGLImage.
1155    /// Callers that compute byte offsets from the tensor must use the
1156    /// stride, not `width × bytes_per_pixel`; the CPU mapping spans the
1157    /// full `stride × height` bytes.
1158    ///
1159    /// Pre-aligned widths (640, 1280, 1920, 3008, 3840 …) allocate
1160    /// exactly `width × bpp × height` bytes with no padding. PBO and
1161    /// Mem fallbacks never pad — they don't go through EGLImage import.
1162    ///
1163    /// See also [`align_width_for_gpu_pitch`] for an advisory helper
1164    /// that external callers (GStreamer plugins, video pipelines) can
1165    /// use to size their own DMA-BUFs for GL compatibility.
1166    ///
1167    /// # Errors
1168    ///
1169    /// Returns an error if all allocation strategies fail.
1170    pub fn create_image(
1171        &self,
1172        width: usize,
1173        height: usize,
1174        format: PixelFormat,
1175        dtype: DType,
1176        memory: Option<TensorMemory>,
1177    ) -> Result<TensorDyn> {
1178        // Compute the GPU-aligned row stride in bytes for this image.
1179        // `None` means either the format has no defined primary-plane bpp
1180        // (unknown future layout) or the stride calculation would overflow
1181        // — in both cases we fall back to the natural layout via the plain
1182        // `TensorDyn::image` constructor, and the slow-path warning inside
1183        // `draw_*_masks` will fire if the subsequent GL import fails.
1184        //
1185        // DMA allocation is Linux-only (see `TensorMemory::Dma` cfg gate),
1186        // so both the stride computation and the helper closure are gated
1187        // accordingly — the callers below are already Linux-only.
1188        #[cfg(target_os = "linux")]
1189        let dma_stride_bytes: Option<usize> = primary_plane_bpp(format, dtype.size())
1190            .and_then(|bpp| width.checked_mul(bpp))
1191            .and_then(align_pitch_bytes_to_gpu_alignment);
1192
1193        // Helper: allocate a DMA image, using the padded-stride constructor
1194        // when the computed stride exceeds the natural pitch, otherwise the
1195        // plain constructor (byte-identical result in the common case).
1196        #[cfg(target_os = "linux")]
1197        let try_dma = || -> Result<TensorDyn> {
1198            // Stride padding is only meaningful for packed pixel layouts
1199            // (RGBA8, BGRA8, RGB888, Grey) — the formats the GL backend
1200            // renders into. Semi-planar (NV12, NV16) and planar (PlanarRgb,
1201            // PlanarRgba) tensors go through `TensorDyn::image(...)` with
1202            // their natural layout; they're imported from camera capture
1203            // via `from_fd` far more often than allocated here, and
1204            // `Tensor::image_with_stride` explicitly rejects them.
1205            let packed = format.layout() == edgefirst_tensor::PixelLayout::Packed;
1206            match dma_stride_bytes {
1207                Some(stride)
1208                    if packed
1209                        && primary_plane_bpp(format, dtype.size())
1210                            .and_then(|bpp| width.checked_mul(bpp))
1211                            .is_some_and(|natural| stride > natural) =>
1212                {
1213                    log::debug!(
1214                        "create_image: padding row stride for {format:?} {width}x{height} \
1215                         from natural pitch to {stride} bytes for GPU alignment"
1216                    );
1217                    Ok(TensorDyn::image_with_stride(
1218                        width,
1219                        height,
1220                        format,
1221                        dtype,
1222                        stride,
1223                        Some(edgefirst_tensor::TensorMemory::Dma),
1224                    )?)
1225                }
1226                _ => Ok(TensorDyn::image(
1227                    width,
1228                    height,
1229                    format,
1230                    dtype,
1231                    Some(edgefirst_tensor::TensorMemory::Dma),
1232                )?),
1233            }
1234        };
1235
1236        // If an explicit memory type is requested, honour it directly.
1237        // On Linux, `TensorMemory::Dma` gets the padded-stride treatment;
1238        // other memory types take the user-requested width verbatim.
1239        match memory {
1240            #[cfg(target_os = "linux")]
1241            Some(TensorMemory::Dma) => {
1242                return try_dma();
1243            }
1244            Some(mem) => {
1245                return Ok(TensorDyn::image(width, height, format, dtype, Some(mem))?);
1246            }
1247            None => {}
1248        }
1249
1250        // Try DMA first on Linux — skip only when GL has explicitly selected PBO
1251        // as the preferred transfer path (PBO is better than DMA in that case).
1252        #[cfg(target_os = "linux")]
1253        {
1254            #[cfg(feature = "opengl")]
1255            let gl_uses_pbo = self
1256                .opengl
1257                .as_ref()
1258                .is_some_and(|gl| gl.transfer_backend() == opengl_headless::TransferBackend::Pbo);
1259            #[cfg(not(feature = "opengl"))]
1260            let gl_uses_pbo = false;
1261
1262            if !gl_uses_pbo {
1263                if let Ok(img) = try_dma() {
1264                    return Ok(img);
1265                }
1266            }
1267        }
1268
1269        // Try PBO (if GL available).
1270        // PBO buffers are u8-sized; the int8 shader emulates i8 output via
1271        // XOR 0x80 on the same underlying buffer, so both U8 and I8 work.
1272        #[cfg(target_os = "linux")]
1273        #[cfg(feature = "opengl")]
1274        if dtype.size() == 1 {
1275            if let Some(gl) = &self.opengl {
1276                match gl.create_pbo_image(width, height, format) {
1277                    Ok(t) => {
1278                        if dtype == DType::I8 {
1279                            // SAFETY: Tensor<u8> and Tensor<i8> are layout-
1280                            // identical (same element size, no T-dependent
1281                            // drop glue). The int8 shader applies XOR 0x80
1282                            // on the same PBO buffer. Same rationale as
1283                            // gl::processor::tensor_i8_as_u8_mut.
1284                            // Invariant: PBO tensors never have chroma
1285                            // (create_pbo_image → Tensor::wrap sets it None).
1286                            debug_assert!(
1287                                t.chroma().is_none(),
1288                                "PBO i8 transmute requires chroma == None"
1289                            );
1290                            let t_i8: Tensor<i8> = unsafe { std::mem::transmute(t) };
1291                            return Ok(TensorDyn::from(t_i8));
1292                        }
1293                        return Ok(TensorDyn::from(t));
1294                    }
1295                    Err(e) => log::debug!("PBO image creation failed, falling back to Mem: {e:?}"),
1296                }
1297            }
1298        }
1299
1300        // Fallback to Mem
1301        Ok(TensorDyn::image(
1302            width,
1303            height,
1304            format,
1305            dtype,
1306            Some(edgefirst_tensor::TensorMemory::Mem),
1307        )?)
1308    }
1309
1310    /// Import an external DMA-BUF image.
1311    ///
1312    /// Each [`PlaneDescriptor`] owns an already-duped fd; this method
1313    /// consumes the descriptors and takes ownership of those fds (whether
1314    /// the call succeeds or fails).
1315    ///
1316    /// The caller must ensure the DMA-BUF allocation is large enough for the
1317    /// specified width, height, format, and any stride/offset on the plane
1318    /// descriptors. No buffer-size validation is performed; an undersized
1319    /// buffer may cause GPU faults or EGL import failure.
1320    ///
1321    /// # Arguments
1322    ///
1323    /// * `image` - Plane descriptor for the primary (or only) plane
1324    /// * `chroma` - Optional plane descriptor for the UV chroma plane
1325    ///   (required for multiplane NV12)
1326    /// * `width` - Image width in pixels
1327    /// * `height` - Image height in pixels
1328    /// * `format` - Pixel format of the buffer
1329    /// * `dtype` - Element data type (e.g. `DType::U8`)
1330    ///
1331    /// # Returns
1332    ///
1333    /// A `TensorDyn` configured as an image.
1334    ///
1335    /// # Errors
1336    ///
1337    /// * [`Error::NotSupported`] if `chroma` is `Some` for a non-semi-planar
1338    ///   format, or multiplane NV16 (not yet supported), or the fd is not
1339    ///   DMA-backed
1340    /// * [`Error::InvalidShape`] if NV12 height is odd
1341    ///
1342    /// # Platform
1343    ///
1344    /// Linux only.
1345    ///
1346    /// # Examples
1347    ///
1348    /// ```rust,ignore
1349    /// use edgefirst_tensor::PlaneDescriptor;
1350    ///
1351    /// // Single-plane RGBA
1352    /// let pd = PlaneDescriptor::new(fd.as_fd())?;
1353    /// let src = proc.import_image(pd, None, 1920, 1080, PixelFormat::Rgba, DType::U8)?;
1354    ///
1355    /// // Multi-plane NV12 with stride
1356    /// let y_pd = PlaneDescriptor::new(y_fd.as_fd())?.with_stride(2048);
1357    /// let uv_pd = PlaneDescriptor::new(uv_fd.as_fd())?.with_stride(2048);
1358    /// let src = proc.import_image(y_pd, Some(uv_pd), 1920, 1080,
1359    ///                             PixelFormat::Nv12, DType::U8)?;
1360    /// ```
1361    #[cfg(target_os = "linux")]
1362    pub fn import_image(
1363        &self,
1364        image: edgefirst_tensor::PlaneDescriptor,
1365        chroma: Option<edgefirst_tensor::PlaneDescriptor>,
1366        width: usize,
1367        height: usize,
1368        format: PixelFormat,
1369        dtype: DType,
1370    ) -> Result<TensorDyn> {
1371        use edgefirst_tensor::{Tensor, TensorMemory};
1372
1373        // Capture stride/offset from descriptors before consuming them
1374        let image_stride = image.stride();
1375        let image_offset = image.offset();
1376        let chroma_stride = chroma.as_ref().and_then(|c| c.stride());
1377        let chroma_offset = chroma.as_ref().and_then(|c| c.offset());
1378
1379        if let Some(chroma_pd) = chroma {
1380            // ── Multiplane path ──────────────────────────────────────
1381            // Multiplane tensors are backed by Tensor<u8> (or transmuted to
1382            // Tensor<i8>). Reject other dtypes to avoid silently returning a
1383            // tensor with the wrong element type.
1384            if dtype != DType::U8 && dtype != DType::I8 {
1385                return Err(Error::NotSupported(format!(
1386                    "multiplane import only supports U8/I8, got {dtype:?}"
1387                )));
1388            }
1389            if format.layout() != PixelLayout::SemiPlanar {
1390                return Err(Error::NotSupported(format!(
1391                    "import_image with chroma requires a semi-planar format, got {format:?}"
1392                )));
1393            }
1394
1395            let chroma_h = match format {
1396                PixelFormat::Nv12 => {
1397                    if !height.is_multiple_of(2) {
1398                        return Err(Error::InvalidShape(format!(
1399                            "NV12 requires even height, got {height}"
1400                        )));
1401                    }
1402                    height / 2
1403                }
1404                // NV16 multiplane will be supported in a future release;
1405                // the GL backend currently only handles NV12 plane1 attributes.
1406                PixelFormat::Nv16 => {
1407                    return Err(Error::NotSupported(
1408                        "multiplane NV16 is not yet supported; use contiguous NV16 instead".into(),
1409                    ))
1410                }
1411                _ => {
1412                    return Err(Error::NotSupported(format!(
1413                        "unsupported semi-planar format: {format:?}"
1414                    )))
1415                }
1416            };
1417
1418            let luma = Tensor::<u8>::from_fd(image.into_fd(), &[height, width], Some("luma"))?;
1419            if luma.memory() != TensorMemory::Dma {
1420                return Err(Error::NotSupported(format!(
1421                    "luma fd must be DMA-backed, got {:?}",
1422                    luma.memory()
1423                )));
1424            }
1425
1426            let chroma_tensor =
1427                Tensor::<u8>::from_fd(chroma_pd.into_fd(), &[chroma_h, width], Some("chroma"))?;
1428            if chroma_tensor.memory() != TensorMemory::Dma {
1429                return Err(Error::NotSupported(format!(
1430                    "chroma fd must be DMA-backed, got {:?}",
1431                    chroma_tensor.memory()
1432                )));
1433            }
1434
1435            // from_planes creates the combined tensor with format set,
1436            // preserving luma's row_stride (currently None since luma was raw).
1437            let mut tensor = Tensor::<u8>::from_planes(luma, chroma_tensor, format)?;
1438
1439            // Apply stride/offset to the combined tensor (luma plane)
1440            if let Some(s) = image_stride {
1441                tensor.set_row_stride(s)?;
1442            }
1443            if let Some(o) = image_offset {
1444                tensor.set_plane_offset(o);
1445            }
1446
1447            // Apply stride/offset to the chroma sub-tensor.
1448            // The chroma tensor is a raw 2D [chroma_h, width] tensor without
1449            // format metadata, so we validate stride manually rather than
1450            // using set_row_stride (which requires format).
1451            if let Some(chroma_ref) = tensor.chroma_mut() {
1452                if let Some(s) = chroma_stride {
1453                    if s < width {
1454                        return Err(Error::InvalidShape(format!(
1455                            "chroma stride {s} < minimum {width} for {format:?}"
1456                        )));
1457                    }
1458                    chroma_ref.set_row_stride_unchecked(s);
1459                }
1460                if let Some(o) = chroma_offset {
1461                    chroma_ref.set_plane_offset(o);
1462                }
1463            }
1464
1465            if dtype == DType::I8 {
1466                // SAFETY: Tensor<u8> and Tensor<i8> have identical layout because
1467                // the struct contains only type-erased storage (OwnedFd, shape, name),
1468                // no inline T values. This assertion catches layout drift at compile time.
1469                const {
1470                    assert!(std::mem::size_of::<Tensor<u8>>() == std::mem::size_of::<Tensor<i8>>());
1471                    assert!(
1472                        std::mem::align_of::<Tensor<u8>>() == std::mem::align_of::<Tensor<i8>>()
1473                    );
1474                }
1475                let tensor_i8: Tensor<i8> = unsafe { std::mem::transmute(tensor) };
1476                return Ok(TensorDyn::from(tensor_i8));
1477            }
1478            Ok(TensorDyn::from(tensor))
1479        } else {
1480            // ── Single-plane path ────────────────────────────────────
1481            let shape = match format.layout() {
1482                PixelLayout::Packed => vec![height, width, format.channels()],
1483                PixelLayout::Planar => vec![format.channels(), height, width],
1484                PixelLayout::SemiPlanar => {
1485                    let total_h = match format {
1486                        PixelFormat::Nv12 => {
1487                            if !height.is_multiple_of(2) {
1488                                return Err(Error::InvalidShape(format!(
1489                                    "NV12 requires even height, got {height}"
1490                                )));
1491                            }
1492                            height * 3 / 2
1493                        }
1494                        PixelFormat::Nv16 => height * 2,
1495                        _ => {
1496                            return Err(Error::InvalidShape(format!(
1497                                "unknown semi-planar height multiplier for {format:?}"
1498                            )))
1499                        }
1500                    };
1501                    vec![total_h, width]
1502                }
1503                _ => {
1504                    return Err(Error::NotSupported(format!(
1505                        "unsupported pixel layout for import_image: {:?}",
1506                        format.layout()
1507                    )));
1508                }
1509            };
1510            let tensor = TensorDyn::from_fd(image.into_fd(), &shape, dtype, None)?;
1511            if tensor.memory() != TensorMemory::Dma {
1512                return Err(Error::NotSupported(format!(
1513                    "import_image requires DMA-backed fd, got {:?}",
1514                    tensor.memory()
1515                )));
1516            }
1517            let mut tensor = tensor.with_format(format)?;
1518            if let Some(s) = image_stride {
1519                tensor.set_row_stride(s)?;
1520            }
1521            if let Some(o) = image_offset {
1522                tensor.set_plane_offset(o);
1523            }
1524            Ok(tensor)
1525        }
1526    }
1527
1528    /// Decode model outputs and draw segmentation masks onto `dst`.
1529    ///
1530    /// This is the primary mask rendering API. The processor decodes via the
1531    /// provided [`Decoder`], selects the optimal rendering path (hybrid
1532    /// CPU+GL or fused GPU), and composites masks onto `dst`.
1533    ///
1534    /// Returns the detected bounding boxes.
1535    pub fn draw_masks(
1536        &mut self,
1537        decoder: &edgefirst_decoder::Decoder,
1538        outputs: &[&TensorDyn],
1539        dst: &mut TensorDyn,
1540        overlay: MaskOverlay<'_>,
1541    ) -> Result<Vec<DetectBox>> {
1542        let mut output_boxes = Vec::with_capacity(100);
1543
1544        // Try proto path first (fused rendering without materializing masks)
1545        let proto_result = decoder
1546            .decode_proto(outputs, &mut output_boxes)
1547            .map_err(|e| Error::Internal(format!("decode_proto: {e:#?}")))?;
1548
1549        if let Some(proto_data) = proto_result {
1550            self.draw_proto_masks(dst, &output_boxes, &proto_data, overlay)?;
1551        } else {
1552            // Detection-only or unsupported model: full decode + render
1553            let mut output_masks = Vec::with_capacity(100);
1554            decoder
1555                .decode(outputs, &mut output_boxes, &mut output_masks)
1556                .map_err(|e| Error::Internal(format!("decode: {e:#?}")))?;
1557            self.draw_decoded_masks(dst, &output_boxes, &output_masks, overlay)?;
1558        }
1559        Ok(output_boxes)
1560    }
1561
1562    /// Decode tracked model outputs and draw segmentation masks onto `dst`.
1563    ///
1564    /// Like [`draw_masks`](Self::draw_masks) but integrates a tracker for
1565    /// maintaining object identities across frames. The tracker runs after
1566    /// NMS but before mask extraction.
1567    ///
1568    /// Returns detected boxes and track info.
1569    #[cfg(feature = "tracker")]
1570    pub fn draw_masks_tracked<TR: edgefirst_tracker::Tracker<DetectBox>>(
1571        &mut self,
1572        decoder: &edgefirst_decoder::Decoder,
1573        tracker: &mut TR,
1574        timestamp: u64,
1575        outputs: &[&TensorDyn],
1576        dst: &mut TensorDyn,
1577        overlay: MaskOverlay<'_>,
1578    ) -> Result<(Vec<DetectBox>, Vec<edgefirst_tracker::TrackInfo>)> {
1579        let mut output_boxes = Vec::with_capacity(100);
1580        let mut output_tracks = Vec::new();
1581
1582        let proto_result = decoder
1583            .decode_proto_tracked(
1584                tracker,
1585                timestamp,
1586                outputs,
1587                &mut output_boxes,
1588                &mut output_tracks,
1589            )
1590            .map_err(|e| Error::Internal(format!("decode_proto_tracked: {e:#?}")))?;
1591
1592        if let Some(proto_data) = proto_result {
1593            self.draw_proto_masks(dst, &output_boxes, &proto_data, overlay)?;
1594        } else {
1595            // Note: decode_proto_tracked returns None for detection-only/ModelPack
1596            // models WITHOUT calling the tracker. The else branch below is the
1597            // first (and only) tracker call for those model types.
1598            let mut output_masks = Vec::with_capacity(100);
1599            decoder
1600                .decode_tracked(
1601                    tracker,
1602                    timestamp,
1603                    outputs,
1604                    &mut output_boxes,
1605                    &mut output_masks,
1606                    &mut output_tracks,
1607                )
1608                .map_err(|e| Error::Internal(format!("decode_tracked: {e:#?}")))?;
1609            self.draw_decoded_masks(dst, &output_boxes, &output_masks, overlay)?;
1610        }
1611        Ok((output_boxes, output_tracks))
1612    }
1613
1614    /// Materialize per-instance segmentation masks from raw prototype data.
1615    ///
1616    /// Computes `mask_coeff @ protos` with sigmoid activation for each detection,
1617    /// producing compact masks at prototype resolution (e.g., 160×160 crops).
1618    /// Mask values are continuous sigmoid confidence outputs quantized to u8
1619    /// (0 = background, 255 = full confidence), NOT binary thresholded.
1620    ///
1621    /// The returned [`Vec<Segmentation>`] can be:
1622    /// - Inspected or exported for analytics, IoU computation, etc.
1623    /// - Passed directly to [`ImageProcessorTrait::draw_decoded_masks`] for
1624    ///   GPU-interpolated rendering.
1625    ///
1626    /// # Performance Note
1627    ///
1628    /// Calling `materialize_masks` + `draw_decoded_masks` separately prevents
1629    /// the HAL from using its internal fused optimization path. For render-only
1630    /// use cases, prefer [`ImageProcessorTrait::draw_proto_masks`] which selects
1631    /// the fastest path automatically (currently 1.6×–27× faster on tested
1632    /// platforms). Use this method when you need access to the intermediate masks.
1633    ///
1634    /// # Errors
1635    ///
1636    /// Returns [`Error::NoConverter`] if the CPU backend is not available.
1637    pub fn materialize_masks(
1638        &self,
1639        detect: &[DetectBox],
1640        proto_data: &ProtoData,
1641        letterbox: Option<[f32; 4]>,
1642        resolution: MaskResolution,
1643    ) -> Result<Vec<Segmentation>> {
1644        let cpu = self.cpu.as_ref().ok_or(Error::NoConverter)?;
1645        match resolution {
1646            MaskResolution::Proto => cpu.materialize_segmentations(detect, proto_data, letterbox),
1647            MaskResolution::Scaled { width, height } => {
1648                cpu.materialize_scaled_segmentations(detect, proto_data, letterbox, width, height)
1649            }
1650        }
1651    }
1652}
1653
1654impl ImageProcessorTrait for ImageProcessor {
1655    /// Converts the source image to the destination image format and size. The
1656    /// image is cropped first, then flipped, then rotated
1657    ///
1658    /// Prefer hardware accelerators when available, falling back to CPU if
1659    /// necessary.
1660    fn convert(
1661        &mut self,
1662        src: &TensorDyn,
1663        dst: &mut TensorDyn,
1664        rotation: Rotation,
1665        flip: Flip,
1666        crop: Crop,
1667    ) -> Result<()> {
1668        let start = Instant::now();
1669        let src_fmt = src.format();
1670        let dst_fmt = dst.format();
1671        log::trace!(
1672            "convert: {src_fmt:?}({:?}/{:?}) → {dst_fmt:?}({:?}/{:?}), \
1673             rotation={rotation:?}, flip={flip:?}, backend={:?}",
1674            src.dtype(),
1675            src.memory(),
1676            dst.dtype(),
1677            dst.memory(),
1678            self.forced_backend,
1679        );
1680
1681        // ── Forced backend: no fallback chain ────────────────────────
1682        if let Some(forced) = self.forced_backend {
1683            return match forced {
1684                ForcedBackend::Cpu => {
1685                    if let Some(cpu) = self.cpu.as_mut() {
1686                        let r = cpu.convert(src, dst, rotation, flip, crop);
1687                        log::trace!(
1688                            "convert: forced=cpu result={} ({:?})",
1689                            if r.is_ok() { "ok" } else { "err" },
1690                            start.elapsed()
1691                        );
1692                        return r;
1693                    }
1694                    Err(Error::ForcedBackendUnavailable("cpu".into()))
1695                }
1696                ForcedBackend::G2d => {
1697                    #[cfg(target_os = "linux")]
1698                    if let Some(g2d) = self.g2d.as_mut() {
1699                        let r = g2d.convert(src, dst, rotation, flip, crop);
1700                        log::trace!(
1701                            "convert: forced=g2d result={} ({:?})",
1702                            if r.is_ok() { "ok" } else { "err" },
1703                            start.elapsed()
1704                        );
1705                        return r;
1706                    }
1707                    Err(Error::ForcedBackendUnavailable("g2d".into()))
1708                }
1709                ForcedBackend::OpenGl => {
1710                    #[cfg(target_os = "linux")]
1711                    #[cfg(feature = "opengl")]
1712                    if let Some(opengl) = self.opengl.as_mut() {
1713                        let r = opengl.convert(src, dst, rotation, flip, crop);
1714                        log::trace!(
1715                            "convert: forced=opengl result={} ({:?})",
1716                            if r.is_ok() { "ok" } else { "err" },
1717                            start.elapsed()
1718                        );
1719                        return r;
1720                    }
1721                    Err(Error::ForcedBackendUnavailable("opengl".into()))
1722                }
1723            };
1724        }
1725
1726        // ── Auto fallback chain: OpenGL → G2D → CPU ──────────────────
1727        #[cfg(target_os = "linux")]
1728        #[cfg(feature = "opengl")]
1729        if let Some(opengl) = self.opengl.as_mut() {
1730            match opengl.convert(src, dst, rotation, flip, crop) {
1731                Ok(_) => {
1732                    log::trace!(
1733                        "convert: auto selected=opengl for {src_fmt:?}→{dst_fmt:?} ({:?})",
1734                        start.elapsed()
1735                    );
1736                    return Ok(());
1737                }
1738                Err(e) => {
1739                    log::trace!("convert: auto opengl declined {src_fmt:?}→{dst_fmt:?}: {e}");
1740                }
1741            }
1742        }
1743
1744        #[cfg(target_os = "linux")]
1745        if let Some(g2d) = self.g2d.as_mut() {
1746            match g2d.convert(src, dst, rotation, flip, crop) {
1747                Ok(_) => {
1748                    log::trace!(
1749                        "convert: auto selected=g2d for {src_fmt:?}→{dst_fmt:?} ({:?})",
1750                        start.elapsed()
1751                    );
1752                    return Ok(());
1753                }
1754                Err(e) => {
1755                    log::trace!("convert: auto g2d declined {src_fmt:?}→{dst_fmt:?}: {e}");
1756                }
1757            }
1758        }
1759
1760        if let Some(cpu) = self.cpu.as_mut() {
1761            match cpu.convert(src, dst, rotation, flip, crop) {
1762                Ok(_) => {
1763                    log::trace!(
1764                        "convert: auto selected=cpu for {src_fmt:?}→{dst_fmt:?} ({:?})",
1765                        start.elapsed()
1766                    );
1767                    return Ok(());
1768                }
1769                Err(e) => {
1770                    log::trace!("convert: auto cpu failed {src_fmt:?}→{dst_fmt:?}: {e}");
1771                    return Err(e);
1772                }
1773            }
1774        }
1775        Err(Error::NoConverter)
1776    }
1777
1778    fn draw_decoded_masks(
1779        &mut self,
1780        dst: &mut TensorDyn,
1781        detect: &[DetectBox],
1782        segmentation: &[Segmentation],
1783        overlay: MaskOverlay<'_>,
1784    ) -> Result<()> {
1785        let start = Instant::now();
1786
1787        if let Some(bg) = overlay.background {
1788            if bg.aliases(dst) {
1789                return Err(Error::AliasedBuffers(
1790                    "background must not reference the same buffer as dst".to_string(),
1791                ));
1792            }
1793        }
1794
1795        // Un-letterbox detect boxes and segmentation bboxes for rendering when
1796        // a letterbox was applied to prepare the model input.
1797        let lb_boxes: Vec<DetectBox>;
1798        let lb_segs: Vec<Segmentation>;
1799        let (detect, segmentation) = if let Some(lb) = overlay.letterbox {
1800            lb_boxes = detect.iter().map(|&d| unletter_bbox(d, lb)).collect();
1801            // Keep segmentation bboxes in sync with the transformed detect boxes
1802            // when we have a 1:1 correspondence (instance segmentation).
1803            lb_segs = if segmentation.len() == lb_boxes.len() {
1804                segmentation
1805                    .iter()
1806                    .zip(lb_boxes.iter())
1807                    .map(|(s, d)| Segmentation {
1808                        xmin: d.bbox.xmin,
1809                        ymin: d.bbox.ymin,
1810                        xmax: d.bbox.xmax,
1811                        ymax: d.bbox.ymax,
1812                        segmentation: s.segmentation.clone(),
1813                    })
1814                    .collect()
1815            } else {
1816                segmentation.to_vec()
1817            };
1818            (lb_boxes.as_slice(), lb_segs.as_slice())
1819        } else {
1820            (detect, segmentation)
1821        };
1822        #[cfg(target_os = "linux")]
1823        let is_empty_frame = detect.is_empty() && segmentation.is_empty();
1824
1825        // ── Forced backend: no fallback chain ────────────────────────
1826        if let Some(forced) = self.forced_backend {
1827            return match forced {
1828                ForcedBackend::Cpu => {
1829                    if let Some(cpu) = self.cpu.as_mut() {
1830                        return cpu.draw_decoded_masks(dst, detect, segmentation, overlay);
1831                    }
1832                    Err(Error::ForcedBackendUnavailable("cpu".into()))
1833                }
1834                ForcedBackend::G2d => {
1835                    // G2D can only produce empty frames (clear / bg blit).
1836                    // For populated frames it has no rasterizer — fail loudly.
1837                    #[cfg(target_os = "linux")]
1838                    if let Some(g2d) = self.g2d.as_mut() {
1839                        return g2d.draw_decoded_masks(dst, detect, segmentation, overlay);
1840                    }
1841                    Err(Error::ForcedBackendUnavailable("g2d".into()))
1842                }
1843                ForcedBackend::OpenGl => {
1844                    // GL handles background natively via GPU blit, and now
1845                    // actively clears when there is no background.
1846                    #[cfg(target_os = "linux")]
1847                    #[cfg(feature = "opengl")]
1848                    if let Some(opengl) = self.opengl.as_mut() {
1849                        return opengl.draw_decoded_masks(dst, detect, segmentation, overlay);
1850                    }
1851                    Err(Error::ForcedBackendUnavailable("opengl".into()))
1852                }
1853            };
1854        }
1855
1856        // ── Auto dispatch ──────────────────────────────────────────
1857        // Empty frames prefer G2D when available — a single g2d_clear or
1858        // g2d_blit is the cheapest HW path to produce the correct output
1859        // and avoids spinning up the GL pipeline every zero-detection
1860        // frame in a triple-buffered display loop.
1861        #[cfg(target_os = "linux")]
1862        if is_empty_frame {
1863            if let Some(g2d) = self.g2d.as_mut() {
1864                match g2d.draw_decoded_masks(dst, detect, segmentation, overlay) {
1865                    Ok(_) => {
1866                        log::trace!(
1867                            "draw_decoded_masks empty frame via g2d in {:?}",
1868                            start.elapsed()
1869                        );
1870                        return Ok(());
1871                    }
1872                    Err(e) => log::trace!("g2d empty-frame path unavailable: {e:?}"),
1873                }
1874            }
1875        }
1876
1877        // Populated frames (or G2D unavailable): GL first, CPU fallback.
1878        // Both backends now own their own base-layer handling (bg blit
1879        // or clear), so we hand the overlay through untouched.
1880        #[cfg(target_os = "linux")]
1881        #[cfg(feature = "opengl")]
1882        if let Some(opengl) = self.opengl.as_mut() {
1883            log::trace!(
1884                "draw_decoded_masks started with opengl in {:?}",
1885                start.elapsed()
1886            );
1887            match opengl.draw_decoded_masks(dst, detect, segmentation, overlay) {
1888                Ok(_) => {
1889                    log::trace!("draw_decoded_masks with opengl in {:?}", start.elapsed());
1890                    return Ok(());
1891                }
1892                Err(e) => {
1893                    log::trace!("draw_decoded_masks didn't work with opengl: {e:?}")
1894                }
1895            }
1896        }
1897
1898        log::trace!(
1899            "draw_decoded_masks started with cpu in {:?}",
1900            start.elapsed()
1901        );
1902        if let Some(cpu) = self.cpu.as_mut() {
1903            match cpu.draw_decoded_masks(dst, detect, segmentation, overlay) {
1904                Ok(_) => {
1905                    log::trace!("draw_decoded_masks with cpu in {:?}", start.elapsed());
1906                    return Ok(());
1907                }
1908                Err(e) => {
1909                    log::trace!("draw_decoded_masks didn't work with cpu: {e:?}");
1910                    return Err(e);
1911                }
1912            }
1913        }
1914        Err(Error::NoConverter)
1915    }
1916
1917    fn draw_proto_masks(
1918        &mut self,
1919        dst: &mut TensorDyn,
1920        detect: &[DetectBox],
1921        proto_data: &ProtoData,
1922        overlay: MaskOverlay<'_>,
1923    ) -> Result<()> {
1924        let start = Instant::now();
1925
1926        if let Some(bg) = overlay.background {
1927            if bg.aliases(dst) {
1928                return Err(Error::AliasedBuffers(
1929                    "background must not reference the same buffer as dst".to_string(),
1930                ));
1931            }
1932        }
1933
1934        // Un-letterbox detect boxes for rendering when a letterbox was applied
1935        // to prepare the model input.  The original `detect` coords are still
1936        // passed to `materialize_segmentations` (which needs model-space coords
1937        // to correctly crop the proto tensor) alongside `overlay.letterbox` so
1938        // it can emit `Segmentation` structs in output-image space.
1939        let lb_boxes: Vec<DetectBox>;
1940        let render_detect = if let Some(lb) = overlay.letterbox {
1941            lb_boxes = detect.iter().map(|&d| unletter_bbox(d, lb)).collect();
1942            lb_boxes.as_slice()
1943        } else {
1944            detect
1945        };
1946        #[cfg(target_os = "linux")]
1947        let is_empty_frame = detect.is_empty();
1948
1949        // ── Forced backend: no fallback chain ────────────────────────
1950        if let Some(forced) = self.forced_backend {
1951            return match forced {
1952                ForcedBackend::Cpu => {
1953                    if let Some(cpu) = self.cpu.as_mut() {
1954                        return cpu.draw_proto_masks(dst, render_detect, proto_data, overlay);
1955                    }
1956                    Err(Error::ForcedBackendUnavailable("cpu".into()))
1957                }
1958                ForcedBackend::G2d => {
1959                    #[cfg(target_os = "linux")]
1960                    if let Some(g2d) = self.g2d.as_mut() {
1961                        return g2d.draw_proto_masks(dst, render_detect, proto_data, overlay);
1962                    }
1963                    Err(Error::ForcedBackendUnavailable("g2d".into()))
1964                }
1965                ForcedBackend::OpenGl => {
1966                    #[cfg(target_os = "linux")]
1967                    #[cfg(feature = "opengl")]
1968                    if let Some(opengl) = self.opengl.as_mut() {
1969                        return opengl.draw_proto_masks(dst, render_detect, proto_data, overlay);
1970                    }
1971                    Err(Error::ForcedBackendUnavailable("opengl".into()))
1972                }
1973            };
1974        }
1975
1976        // ── Auto dispatch ──────────────────────────────────────────
1977        // Empty frames: prefer G2D — cheapest HW path (clear or bg blit).
1978        #[cfg(target_os = "linux")]
1979        if is_empty_frame {
1980            if let Some(g2d) = self.g2d.as_mut() {
1981                match g2d.draw_proto_masks(dst, render_detect, proto_data, overlay) {
1982                    Ok(_) => {
1983                        log::trace!(
1984                            "draw_proto_masks empty frame via g2d in {:?}",
1985                            start.elapsed()
1986                        );
1987                        return Ok(());
1988                    }
1989                    Err(e) => log::trace!("g2d empty-frame path unavailable: {e:?}"),
1990                }
1991            }
1992        }
1993
1994        // Hybrid path: CPU materialize + GL overlay (benchmarked faster than
1995        // full-GPU draw_proto_masks on all tested platforms: 27× on imx8mp,
1996        // 4× on imx95, 2.5× on rpi5, 1.6× on x86).
1997        // GL owns its own bg-blit / glClear — we pass the overlay through.
1998        #[cfg(target_os = "linux")]
1999        #[cfg(feature = "opengl")]
2000        if let Some(opengl) = self.opengl.as_mut() {
2001            let Some(cpu) = self.cpu.as_ref() else {
2002                return Err(Error::Internal(
2003                    "draw_proto_masks requires CPU backend for hybrid path".into(),
2004                ));
2005            };
2006            log::trace!(
2007                "draw_proto_masks started with hybrid (cpu+opengl) in {:?}",
2008                start.elapsed()
2009            );
2010            let segmentation =
2011                cpu.materialize_segmentations(detect, proto_data, overlay.letterbox)?;
2012            match opengl.draw_decoded_masks(dst, render_detect, &segmentation, overlay) {
2013                Ok(_) => {
2014                    log::trace!(
2015                        "draw_proto_masks with hybrid (cpu+opengl) in {:?}",
2016                        start.elapsed()
2017                    );
2018                    return Ok(());
2019                }
2020                Err(e) => {
2021                    log::trace!("draw_proto_masks hybrid path failed, falling back to cpu: {e:?}");
2022                }
2023            }
2024        }
2025
2026        let Some(cpu) = self.cpu.as_mut() else {
2027            return Err(Error::Internal(
2028                "draw_proto_masks requires CPU backend for fallback path".into(),
2029            ));
2030        };
2031        log::trace!("draw_proto_masks started with cpu in {:?}", start.elapsed());
2032        cpu.draw_proto_masks(dst, render_detect, proto_data, overlay)
2033    }
2034
2035    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()> {
2036        let start = Instant::now();
2037
2038        // ── Forced backend: no fallback chain ────────────────────────
2039        if let Some(forced) = self.forced_backend {
2040            return match forced {
2041                ForcedBackend::Cpu => {
2042                    if let Some(cpu) = self.cpu.as_mut() {
2043                        return cpu.set_class_colors(colors);
2044                    }
2045                    Err(Error::ForcedBackendUnavailable("cpu".into()))
2046                }
2047                ForcedBackend::G2d => Err(Error::NotSupported(
2048                    "g2d does not support set_class_colors".into(),
2049                )),
2050                ForcedBackend::OpenGl => {
2051                    #[cfg(target_os = "linux")]
2052                    #[cfg(feature = "opengl")]
2053                    if let Some(opengl) = self.opengl.as_mut() {
2054                        return opengl.set_class_colors(colors);
2055                    }
2056                    Err(Error::ForcedBackendUnavailable("opengl".into()))
2057                }
2058            };
2059        }
2060
2061        // skip G2D as it doesn't support rendering to image
2062
2063        #[cfg(target_os = "linux")]
2064        #[cfg(feature = "opengl")]
2065        if let Some(opengl) = self.opengl.as_mut() {
2066            log::trace!("image started with opengl in {:?}", start.elapsed());
2067            match opengl.set_class_colors(colors) {
2068                Ok(_) => {
2069                    log::trace!("colors set with opengl in {:?}", start.elapsed());
2070                    return Ok(());
2071                }
2072                Err(e) => {
2073                    log::trace!("colors didn't set with opengl: {e:?}")
2074                }
2075            }
2076        }
2077        log::trace!("image started with cpu in {:?}", start.elapsed());
2078        if let Some(cpu) = self.cpu.as_mut() {
2079            match cpu.set_class_colors(colors) {
2080                Ok(_) => {
2081                    log::trace!("colors set with cpu in {:?}", start.elapsed());
2082                    return Ok(());
2083                }
2084                Err(e) => {
2085                    log::trace!("colors didn't set with cpu: {e:?}");
2086                    return Err(e);
2087                }
2088            }
2089        }
2090        Err(Error::NoConverter)
2091    }
2092}
2093
2094// ---------------------------------------------------------------------------
2095// Image loading / saving helpers
2096// ---------------------------------------------------------------------------
2097
2098/// Read EXIF orientation from raw EXIF bytes and return (Rotation, Flip).
2099fn read_exif_orientation(exif_bytes: &[u8]) -> (Rotation, Flip) {
2100    let exifreader = exif::Reader::new();
2101    let Ok(exif_) = exifreader.read_raw(exif_bytes.to_vec()) else {
2102        return (Rotation::None, Flip::None);
2103    };
2104    let Some(orientation) = exif_.get_field(exif::Tag::Orientation, exif::In::PRIMARY) else {
2105        return (Rotation::None, Flip::None);
2106    };
2107    match orientation.value.get_uint(0) {
2108        Some(1) => (Rotation::None, Flip::None),
2109        Some(2) => (Rotation::None, Flip::Horizontal),
2110        Some(3) => (Rotation::Rotate180, Flip::None),
2111        Some(4) => (Rotation::Rotate180, Flip::Horizontal),
2112        Some(5) => (Rotation::Clockwise90, Flip::Horizontal),
2113        Some(6) => (Rotation::Clockwise90, Flip::None),
2114        Some(7) => (Rotation::CounterClockwise90, Flip::Horizontal),
2115        Some(8) => (Rotation::CounterClockwise90, Flip::None),
2116        Some(v) => {
2117            log::warn!("broken orientation EXIF value: {v}");
2118            (Rotation::None, Flip::None)
2119        }
2120        None => (Rotation::None, Flip::None),
2121    }
2122}
2123
2124/// Map a [`PixelFormat`] to the zune-jpeg `ColorSpace` for decoding.
2125/// Returns `None` for formats that the JPEG decoder cannot output directly.
2126fn pixelfmt_to_colorspace(fmt: PixelFormat) -> Option<ColorSpace> {
2127    match fmt {
2128        PixelFormat::Rgb => Some(ColorSpace::RGB),
2129        PixelFormat::Rgba => Some(ColorSpace::RGBA),
2130        PixelFormat::Grey => Some(ColorSpace::Luma),
2131        _ => None,
2132    }
2133}
2134
2135/// Map a zune-jpeg `ColorSpace` to a [`PixelFormat`].
2136fn colorspace_to_pixelfmt(cs: ColorSpace) -> Option<PixelFormat> {
2137    match cs {
2138        ColorSpace::RGB => Some(PixelFormat::Rgb),
2139        ColorSpace::RGBA => Some(PixelFormat::Rgba),
2140        ColorSpace::Luma => Some(PixelFormat::Grey),
2141        _ => None,
2142    }
2143}
2144
2145/// Load a JPEG image from raw bytes and return a [`TensorDyn`].
2146fn load_jpeg(
2147    image: &[u8],
2148    format: Option<PixelFormat>,
2149    memory: Option<TensorMemory>,
2150) -> Result<TensorDyn> {
2151    let colour = match format {
2152        Some(f) => pixelfmt_to_colorspace(f)
2153            .ok_or_else(|| Error::NotSupported(format!("Unsupported image format {f:?}")))?,
2154        None => ColorSpace::RGB,
2155    };
2156    let options = DecoderOptions::default().jpeg_set_out_colorspace(colour);
2157    let mut decoder = JpegDecoder::new_with_options(image, options);
2158    decoder.decode_headers()?;
2159
2160    let image_info = decoder.info().ok_or(Error::Internal(
2161        "JPEG did not return decoded image info".to_string(),
2162    ))?;
2163
2164    let converted_cs = decoder
2165        .get_output_colorspace()
2166        .ok_or(Error::Internal("No output colorspace".to_string()))?;
2167
2168    let converted_fmt = colorspace_to_pixelfmt(converted_cs).ok_or(Error::NotSupported(
2169        "Unsupported JPEG decoder output".to_string(),
2170    ))?;
2171
2172    let dest_fmt = format.unwrap_or(converted_fmt);
2173
2174    let (rotation, flip) = decoder
2175        .exif()
2176        .map(|x| read_exif_orientation(x))
2177        .unwrap_or((Rotation::None, Flip::None));
2178
2179    let w = image_info.width as usize;
2180    let h = image_info.height as usize;
2181
2182    if (rotation, flip) == (Rotation::None, Flip::None) {
2183        let mut img = Tensor::<u8>::image(w, h, dest_fmt, memory)?;
2184
2185        if converted_fmt != dest_fmt {
2186            let tmp = Tensor::<u8>::image(w, h, converted_fmt, Some(TensorMemory::Mem))?;
2187            decoder.decode_into(&mut tmp.map()?)?;
2188            CPUProcessor::convert_format_pf(&tmp, &mut img, converted_fmt, dest_fmt)?;
2189            return Ok(TensorDyn::from(img));
2190        }
2191        decoder.decode_into(&mut img.map()?)?;
2192        return Ok(TensorDyn::from(img));
2193    }
2194
2195    let mut tmp = Tensor::<u8>::image(w, h, dest_fmt, Some(TensorMemory::Mem))?;
2196
2197    if converted_fmt != dest_fmt {
2198        let tmp2 = Tensor::<u8>::image(w, h, converted_fmt, Some(TensorMemory::Mem))?;
2199        decoder.decode_into(&mut tmp2.map()?)?;
2200        CPUProcessor::convert_format_pf(&tmp2, &mut tmp, converted_fmt, dest_fmt)?;
2201    } else {
2202        decoder.decode_into(&mut tmp.map()?)?;
2203    }
2204
2205    rotate_flip_to_dyn(&tmp, dest_fmt, rotation, flip, memory)
2206}
2207
2208/// Load a PNG image from raw bytes and return a [`TensorDyn`].
2209fn load_png(
2210    image: &[u8],
2211    format: Option<PixelFormat>,
2212    memory: Option<TensorMemory>,
2213) -> Result<TensorDyn> {
2214    let fmt = format.unwrap_or(PixelFormat::Rgb);
2215    let alpha = match fmt {
2216        PixelFormat::Rgb => false,
2217        PixelFormat::Rgba => true,
2218        _ => {
2219            return Err(Error::NotImplemented(
2220                "Unsupported image format".to_string(),
2221            ));
2222        }
2223    };
2224
2225    let options = DecoderOptions::default()
2226        .png_set_add_alpha_channel(alpha)
2227        .png_set_decode_animated(false);
2228    let mut decoder = PngDecoder::new_with_options(image, options);
2229    decoder.decode_headers()?;
2230    let image_info = decoder.get_info().ok_or(Error::Internal(
2231        "PNG did not return decoded image info".to_string(),
2232    ))?;
2233
2234    let (rotation, flip) = image_info
2235        .exif
2236        .as_ref()
2237        .map(|x| read_exif_orientation(x))
2238        .unwrap_or((Rotation::None, Flip::None));
2239
2240    if (rotation, flip) == (Rotation::None, Flip::None) {
2241        let img = Tensor::<u8>::image(image_info.width, image_info.height, fmt, memory)?;
2242        decoder.decode_into(&mut img.map()?)?;
2243        return Ok(TensorDyn::from(img));
2244    }
2245
2246    let tmp = Tensor::<u8>::image(
2247        image_info.width,
2248        image_info.height,
2249        fmt,
2250        Some(TensorMemory::Mem),
2251    )?;
2252    decoder.decode_into(&mut tmp.map()?)?;
2253
2254    rotate_flip_to_dyn(&tmp, fmt, rotation, flip, memory)
2255}
2256
2257/// Load an image from raw bytes (JPEG or PNG) and return a [`TensorDyn`].
2258///
2259/// The optional `format` specifies the desired output pixel format (e.g.,
2260/// [`PixelFormat::Rgb`], [`PixelFormat::Rgba`]); if `None`, the native
2261/// format of the file is used (typically RGB for JPEG).
2262///
2263/// # Examples
2264/// ```rust
2265/// use edgefirst_image::load_image;
2266/// use edgefirst_tensor::PixelFormat;
2267/// # fn main() -> Result<(), edgefirst_image::Error> {
2268/// let jpeg = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/../../testdata/zidane.jpg"));
2269/// let img = load_image(jpeg, Some(PixelFormat::Rgb), None)?;
2270/// assert_eq!(img.width(), Some(1280));
2271/// assert_eq!(img.height(), Some(720));
2272/// # Ok(())
2273/// # }
2274/// ```
2275pub fn load_image(
2276    image: &[u8],
2277    format: Option<PixelFormat>,
2278    memory: Option<TensorMemory>,
2279) -> Result<TensorDyn> {
2280    if let Ok(i) = load_jpeg(image, format, memory) {
2281        return Ok(i);
2282    }
2283    if let Ok(i) = load_png(image, format, memory) {
2284        return Ok(i);
2285    }
2286    Err(Error::NotSupported(
2287        "Could not decode as jpeg or png".to_string(),
2288    ))
2289}
2290
2291/// Save a [`TensorDyn`] image as a JPEG file.
2292///
2293/// Only packed RGB and RGBA formats are supported.
2294pub fn save_jpeg(tensor: &TensorDyn, path: impl AsRef<std::path::Path>, quality: u8) -> Result<()> {
2295    let t = tensor.as_u8().ok_or(Error::UnsupportedFormat(
2296        "save_jpeg requires u8 tensor".to_string(),
2297    ))?;
2298    let fmt = t.format().ok_or(Error::NotAnImage)?;
2299    if fmt.layout() != PixelLayout::Packed {
2300        return Err(Error::NotImplemented(
2301            "Saving planar images is not supported".to_string(),
2302        ));
2303    }
2304
2305    let colour = match fmt {
2306        PixelFormat::Rgb => jpeg_encoder::ColorType::Rgb,
2307        PixelFormat::Rgba => jpeg_encoder::ColorType::Rgba,
2308        _ => {
2309            return Err(Error::NotImplemented(
2310                "Unsupported image format for saving".to_string(),
2311            ));
2312        }
2313    };
2314
2315    let w = t.width().ok_or(Error::NotAnImage)?;
2316    let h = t.height().ok_or(Error::NotAnImage)?;
2317    let encoder = jpeg_encoder::Encoder::new_file(path, quality)?;
2318    let tensor_map = t.map()?;
2319
2320    encoder.encode(&tensor_map, w as u16, h as u16, colour)?;
2321
2322    Ok(())
2323}
2324
2325pub(crate) struct FunctionTimer<T: Display> {
2326    name: T,
2327    start: std::time::Instant,
2328}
2329
2330impl<T: Display> FunctionTimer<T> {
2331    pub fn new(name: T) -> Self {
2332        Self {
2333            name,
2334            start: std::time::Instant::now(),
2335        }
2336    }
2337}
2338
2339impl<T: Display> Drop for FunctionTimer<T> {
2340    fn drop(&mut self) {
2341        log::trace!("{} elapsed: {:?}", self.name, self.start.elapsed())
2342    }
2343}
2344
2345const DEFAULT_COLORS: [[f32; 4]; 20] = [
2346    [0., 1., 0., 0.7],
2347    [1., 0.5568628, 0., 0.7],
2348    [0.25882353, 0.15294118, 0.13333333, 0.7],
2349    [0.8, 0.7647059, 0.78039216, 0.7],
2350    [0.3137255, 0.3137255, 0.3137255, 0.7],
2351    [0.1411765, 0.3098039, 0.1215686, 0.7],
2352    [1., 0.95686275, 0.5137255, 0.7],
2353    [0.3529412, 0.32156863, 0., 0.7],
2354    [0.4235294, 0.6235294, 0.6509804, 0.7],
2355    [0.5098039, 0.5098039, 0.7294118, 0.7],
2356    [0.00784314, 0.18823529, 0.29411765, 0.7],
2357    [0.0, 0.2706, 1.0, 0.7],
2358    [0.0, 0.0, 0.0, 0.7],
2359    [0.0, 0.5, 0.0, 0.7],
2360    [1.0, 0.0, 0.0, 0.7],
2361    [0.0, 0.0, 1.0, 0.7],
2362    [1.0, 0.5, 0.5, 0.7],
2363    [0.1333, 0.5451, 0.1333, 0.7],
2364    [0.1176, 0.4118, 0.8235, 0.7],
2365    [1., 1., 1., 0.7],
2366];
2367
2368const fn denorm<const M: usize, const N: usize>(a: [[f32; M]; N]) -> [[u8; M]; N] {
2369    let mut result = [[0; M]; N];
2370    let mut i = 0;
2371    while i < N {
2372        let mut j = 0;
2373        while j < M {
2374            result[i][j] = (a[i][j] * 255.0).round() as u8;
2375            j += 1;
2376        }
2377        i += 1;
2378    }
2379    result
2380}
2381
2382const DEFAULT_COLORS_U8: [[u8; 4]; 20] = denorm(DEFAULT_COLORS);
2383
2384#[cfg(test)]
2385#[cfg_attr(coverage_nightly, coverage(off))]
2386mod alignment_tests {
2387    use super::*;
2388
2389    #[test]
2390    fn align_width_rgba8_common_widths() {
2391        // RGBA8 (bpp=4, lcm(64,4)=64, so width must round to multiple of 16 px).
2392        assert_eq!(align_width_for_gpu_pitch(640, 4), 640); // 2560 byte pitch — already aligned
2393        assert_eq!(align_width_for_gpu_pitch(1280, 4), 1280); // 5120
2394        assert_eq!(align_width_for_gpu_pitch(1920, 4), 1920); // 7680
2395        assert_eq!(align_width_for_gpu_pitch(3840, 4), 3840); // 15360
2396                                                              // crowd.png case from the imx95 investigation:
2397        assert_eq!(align_width_for_gpu_pitch(3004, 4), 3008); // 12016 → 12032
2398        assert_eq!(align_width_for_gpu_pitch(3000, 4), 3008); // 12000 → 12032
2399        assert_eq!(align_width_for_gpu_pitch(17, 4), 32); // 68 → 128
2400        assert_eq!(align_width_for_gpu_pitch(1, 4), 16); // 4 → 64
2401    }
2402
2403    #[test]
2404    fn align_width_rgb888_packed() {
2405        // RGB888 (bpp=3, lcm(64,3)=192, so width must round to multiple of 64 px).
2406        assert_eq!(align_width_for_gpu_pitch(64, 3), 64); // 192 byte pitch
2407        assert_eq!(align_width_for_gpu_pitch(640, 3), 640); // 1920
2408        assert_eq!(align_width_for_gpu_pitch(1, 3), 64); // 3 → 192
2409        assert_eq!(align_width_for_gpu_pitch(65, 3), 128); // 195 → 384
2410                                                           // Verify the rounded width × bpp is a clean multiple of the LCM.
2411        for w in [3004usize, 1281, 100, 17] {
2412            let padded = align_width_for_gpu_pitch(w, 3);
2413            assert!(padded >= w);
2414            assert_eq!((padded * 3) % 64, 0);
2415            assert_eq!((padded * 3) % 3, 0);
2416        }
2417    }
2418
2419    #[test]
2420    fn align_width_grey_u8() {
2421        // Grey (bpp=1, lcm(64,1)=64, so width must round to multiple of 64 px).
2422        assert_eq!(align_width_for_gpu_pitch(64, 1), 64);
2423        assert_eq!(align_width_for_gpu_pitch(640, 1), 640);
2424        assert_eq!(align_width_for_gpu_pitch(1, 1), 64);
2425        assert_eq!(align_width_for_gpu_pitch(65, 1), 128);
2426    }
2427
2428    #[test]
2429    fn align_width_zero_inputs() {
2430        assert_eq!(align_width_for_gpu_pitch(0, 4), 0);
2431        assert_eq!(align_width_for_gpu_pitch(640, 0), 640);
2432    }
2433
2434    #[test]
2435    fn align_width_never_returns_smaller_than_input() {
2436        // Spot-check the "returned width >= input width" contract across a
2437        // range of values that would previously have hit `width * bpp`
2438        // overflow paths.
2439        for &bpp in &[1usize, 2, 3, 4, 8] {
2440            for &w in &[
2441                1usize,
2442                17,
2443                64,
2444                65,
2445                100,
2446                1280,
2447                1281,
2448                1920,
2449                3004,
2450                3072,
2451                3840,
2452                usize::MAX / 8,
2453                usize::MAX / 4,
2454                usize::MAX / 2,
2455                usize::MAX - 1,
2456                usize::MAX,
2457            ] {
2458                let aligned = align_width_for_gpu_pitch(w, bpp);
2459                assert!(
2460                    aligned >= w,
2461                    "align_width_for_gpu_pitch({w}, {bpp}) = {aligned} < {w}"
2462                );
2463            }
2464        }
2465    }
2466
2467    #[test]
2468    fn align_width_overflow_returns_unaligned_not_smaller() {
2469        // For width values close to usize::MAX, padding up would wrap. The
2470        // function must return the original width rather than wrapping or
2471        // panicking. A pre-aligned width round-trips unchanged even at the
2472        // extreme.
2473        let aligned_extreme = usize::MAX - 15; // 16-pixel boundary for RGBA8
2474        assert_eq!(
2475            align_width_for_gpu_pitch(aligned_extreme, 4),
2476            aligned_extreme
2477        );
2478        // A misaligned extreme value cannot be rounded up — the function
2479        // returns the original.
2480        let misaligned_extreme = usize::MAX - 1;
2481        let result = align_width_for_gpu_pitch(misaligned_extreme, 4);
2482        assert!(
2483            result == misaligned_extreme || result >= misaligned_extreme,
2484            "extreme misaligned width must not be rounded down to {result}"
2485        );
2486    }
2487
2488    #[test]
2489    fn checked_lcm_basic_and_overflow() {
2490        assert_eq!(checked_num_integer_lcm(64, 4), Some(64));
2491        assert_eq!(checked_num_integer_lcm(64, 3), Some(192));
2492        assert_eq!(checked_num_integer_lcm(64, 1), Some(64));
2493        assert_eq!(checked_num_integer_lcm(0, 4), Some(0));
2494        assert_eq!(checked_num_integer_lcm(64, 0), Some(0));
2495        // Coprime values whose product exceeds usize::MAX must return None.
2496        assert_eq!(
2497            checked_num_integer_lcm(usize::MAX, usize::MAX - 1),
2498            None,
2499            "coprime extreme values must overflow detect, not panic"
2500        );
2501    }
2502
2503    #[test]
2504    fn primary_plane_bpp_known_formats() {
2505        // Packed formats use channels × elem_size.
2506        assert_eq!(primary_plane_bpp(PixelFormat::Rgba, 1), Some(4));
2507        assert_eq!(primary_plane_bpp(PixelFormat::Bgra, 1), Some(4));
2508        assert_eq!(primary_plane_bpp(PixelFormat::Rgb, 1), Some(3));
2509        assert_eq!(primary_plane_bpp(PixelFormat::Grey, 1), Some(1));
2510        // Semi-planar (NV12) reports the luma plane's bpp.
2511        assert_eq!(primary_plane_bpp(PixelFormat::Nv12, 1), Some(1));
2512    }
2513}
2514
2515#[cfg(test)]
2516#[cfg_attr(coverage_nightly, coverage(off))]
2517mod image_tests {
2518    use super::*;
2519    use crate::{CPUProcessor, Rotation};
2520    #[cfg(target_os = "linux")]
2521    use edgefirst_tensor::is_dma_available;
2522    use edgefirst_tensor::{TensorMapTrait, TensorMemory, TensorTrait};
2523    use image::buffer::ConvertBuffer;
2524
2525    /// Test helper: call `ImageProcessorTrait::convert()` on two `TensorDyn`s
2526    /// by going through the `TensorDyn` API.
2527    ///
2528    /// Returns the `(src_image, dst_image)` reconstructed from the TensorDyn
2529    /// round-trip so the caller can feed them to `compare_images` etc.
2530    fn convert_img(
2531        proc: &mut dyn ImageProcessorTrait,
2532        src: TensorDyn,
2533        dst: TensorDyn,
2534        rotation: Rotation,
2535        flip: Flip,
2536        crop: Crop,
2537    ) -> (Result<()>, TensorDyn, TensorDyn) {
2538        let src_fourcc = src.format().unwrap();
2539        let dst_fourcc = dst.format().unwrap();
2540        let src_dyn = src;
2541        let mut dst_dyn = dst;
2542        let result = proc.convert(&src_dyn, &mut dst_dyn, rotation, flip, crop);
2543        let src_back = {
2544            let mut __t = src_dyn.into_u8().unwrap();
2545            __t.set_format(src_fourcc).unwrap();
2546            TensorDyn::from(__t)
2547        };
2548        let dst_back = {
2549            let mut __t = dst_dyn.into_u8().unwrap();
2550            __t.set_format(dst_fourcc).unwrap();
2551            TensorDyn::from(__t)
2552        };
2553        (result, src_back, dst_back)
2554    }
2555
2556    #[ctor::ctor]
2557    fn init() {
2558        env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
2559    }
2560
2561    macro_rules! function {
2562        () => {{
2563            fn f() {}
2564            fn type_name_of<T>(_: T) -> &'static str {
2565                std::any::type_name::<T>()
2566            }
2567            let name = type_name_of(f);
2568
2569            // Find and cut the rest of the path
2570            match &name[..name.len() - 3].rfind(':') {
2571                Some(pos) => &name[pos + 1..name.len() - 3],
2572                None => &name[..name.len() - 3],
2573            }
2574        }};
2575    }
2576
2577    #[test]
2578    fn test_invalid_crop() {
2579        let src = TensorDyn::image(100, 100, PixelFormat::Rgb, DType::U8, None).unwrap();
2580        let dst = TensorDyn::image(100, 100, PixelFormat::Rgb, DType::U8, None).unwrap();
2581
2582        let crop = Crop::new()
2583            .with_src_rect(Some(Rect::new(50, 50, 60, 60)))
2584            .with_dst_rect(Some(Rect::new(0, 0, 150, 150)));
2585
2586        let result = crop.check_crop_dyn(&src, &dst);
2587        assert!(matches!(
2588            result,
2589            Err(Error::CropInvalid(e)) if e.starts_with("Dest and Src crop invalid")
2590        ));
2591
2592        let crop = crop.with_src_rect(Some(Rect::new(0, 0, 10, 10)));
2593        let result = crop.check_crop_dyn(&src, &dst);
2594        assert!(matches!(
2595            result,
2596            Err(Error::CropInvalid(e)) if e.starts_with("Dest crop invalid")
2597        ));
2598
2599        let crop = crop
2600            .with_src_rect(Some(Rect::new(50, 50, 60, 60)))
2601            .with_dst_rect(Some(Rect::new(0, 0, 50, 50)));
2602        let result = crop.check_crop_dyn(&src, &dst);
2603        assert!(matches!(
2604            result,
2605            Err(Error::CropInvalid(e)) if e.starts_with("Src crop invalid")
2606        ));
2607
2608        let crop = crop.with_src_rect(Some(Rect::new(50, 50, 50, 50)));
2609
2610        let result = crop.check_crop_dyn(&src, &dst);
2611        assert!(result.is_ok());
2612    }
2613
2614    #[test]
2615    fn test_invalid_tensor_format() -> Result<(), Error> {
2616        // 4D tensor cannot be set to a 3-channel pixel format
2617        let mut tensor = Tensor::<u8>::new(&[720, 1280, 4, 1], None, None)?;
2618        let result = tensor.set_format(PixelFormat::Rgb);
2619        assert!(result.is_err(), "4D tensor should reject set_format");
2620
2621        // Tensor with wrong channel count for the format
2622        let mut tensor = Tensor::<u8>::new(&[720, 1280, 4], None, None)?;
2623        let result = tensor.set_format(PixelFormat::Rgb);
2624        assert!(result.is_err(), "4-channel tensor should reject RGB format");
2625
2626        Ok(())
2627    }
2628
2629    #[test]
2630    fn test_invalid_image_file() -> Result<(), Error> {
2631        let result = crate::load_image(&[123; 5000], None, None);
2632        assert!(matches!(
2633            result,
2634            Err(Error::NotSupported(e)) if e == "Could not decode as jpeg or png"));
2635
2636        Ok(())
2637    }
2638
2639    #[test]
2640    fn test_invalid_jpeg_format() -> Result<(), Error> {
2641        let result = crate::load_image(&[123; 5000], Some(PixelFormat::Yuyv), None);
2642        assert!(matches!(
2643            result,
2644            Err(Error::NotSupported(e)) if e == "Could not decode as jpeg or png"));
2645
2646        Ok(())
2647    }
2648
2649    #[test]
2650    fn test_load_resize_save() {
2651        let file = include_bytes!(concat!(
2652            env!("CARGO_MANIFEST_DIR"),
2653            "/../../testdata/zidane.jpg"
2654        ));
2655        let img = crate::load_image(file, Some(PixelFormat::Rgba), None).unwrap();
2656        assert_eq!(img.width(), Some(1280));
2657        assert_eq!(img.height(), Some(720));
2658
2659        let dst = TensorDyn::image(640, 360, PixelFormat::Rgba, DType::U8, None).unwrap();
2660        let mut converter = CPUProcessor::new();
2661        let (result, _img, dst) = convert_img(
2662            &mut converter,
2663            img,
2664            dst,
2665            Rotation::None,
2666            Flip::None,
2667            Crop::no_crop(),
2668        );
2669        result.unwrap();
2670        assert_eq!(dst.width(), Some(640));
2671        assert_eq!(dst.height(), Some(360));
2672
2673        crate::save_jpeg(&dst, "zidane_resized.jpg", 80).unwrap();
2674
2675        let file = std::fs::read("zidane_resized.jpg").unwrap();
2676        let img = crate::load_image(&file, None, None).unwrap();
2677        assert_eq!(img.width(), Some(640));
2678        assert_eq!(img.height(), Some(360));
2679        assert_eq!(img.format().unwrap(), PixelFormat::Rgb);
2680    }
2681
2682    #[test]
2683    fn test_from_tensor_planar() -> Result<(), Error> {
2684        let mut tensor = Tensor::new(&[3, 720, 1280], None, None)?;
2685        tensor.map()?.copy_from_slice(include_bytes!(concat!(
2686            env!("CARGO_MANIFEST_DIR"),
2687            "/../../testdata/camera720p.8bps"
2688        )));
2689        let planar = {
2690            tensor
2691                .set_format(PixelFormat::PlanarRgb)
2692                .map_err(|e| crate::Error::Internal(e.to_string()))?;
2693            TensorDyn::from(tensor)
2694        };
2695
2696        let rbga = load_bytes_to_tensor(
2697            1280,
2698            720,
2699            PixelFormat::Rgba,
2700            None,
2701            include_bytes!(concat!(
2702                env!("CARGO_MANIFEST_DIR"),
2703                "/../../testdata/camera720p.rgba"
2704            )),
2705        )?;
2706        compare_images_convert_to_rgb(&planar, &rbga, 0.98, function!());
2707
2708        Ok(())
2709    }
2710
2711    #[test]
2712    fn test_from_tensor_invalid_format() {
2713        // PixelFormat::from_fourcc_str returns None for unknown FourCC codes.
2714        // Since there's no "TEST" pixel format, this validates graceful handling.
2715        assert!(PixelFormat::from_fourcc(u32::from_le_bytes(*b"TEST")).is_none());
2716    }
2717
2718    #[test]
2719    #[should_panic(expected = "Failed to save planar RGB image")]
2720    fn test_save_planar() {
2721        let planar_img = load_bytes_to_tensor(
2722            1280,
2723            720,
2724            PixelFormat::PlanarRgb,
2725            None,
2726            include_bytes!(concat!(
2727                env!("CARGO_MANIFEST_DIR"),
2728                "/../../testdata/camera720p.8bps"
2729            )),
2730        )
2731        .unwrap();
2732
2733        let save_path = "/tmp/planar_rgb.jpg";
2734        crate::save_jpeg(&planar_img, save_path, 90).expect("Failed to save planar RGB image");
2735    }
2736
2737    #[test]
2738    #[should_panic(expected = "Failed to save YUYV image")]
2739    fn test_save_yuyv() {
2740        let planar_img = load_bytes_to_tensor(
2741            1280,
2742            720,
2743            PixelFormat::Yuyv,
2744            None,
2745            include_bytes!(concat!(
2746                env!("CARGO_MANIFEST_DIR"),
2747                "/../../testdata/camera720p.yuyv"
2748            )),
2749        )
2750        .unwrap();
2751
2752        let save_path = "/tmp/yuyv.jpg";
2753        crate::save_jpeg(&planar_img, save_path, 90).expect("Failed to save YUYV image");
2754    }
2755
2756    #[test]
2757    fn test_rotation_angle() {
2758        assert_eq!(Rotation::from_degrees_clockwise(0), Rotation::None);
2759        assert_eq!(Rotation::from_degrees_clockwise(90), Rotation::Clockwise90);
2760        assert_eq!(Rotation::from_degrees_clockwise(180), Rotation::Rotate180);
2761        assert_eq!(
2762            Rotation::from_degrees_clockwise(270),
2763            Rotation::CounterClockwise90
2764        );
2765        assert_eq!(Rotation::from_degrees_clockwise(360), Rotation::None);
2766        assert_eq!(Rotation::from_degrees_clockwise(450), Rotation::Clockwise90);
2767        assert_eq!(Rotation::from_degrees_clockwise(540), Rotation::Rotate180);
2768        assert_eq!(
2769            Rotation::from_degrees_clockwise(630),
2770            Rotation::CounterClockwise90
2771        );
2772    }
2773
2774    #[test]
2775    #[should_panic(expected = "rotation angle is not a multiple of 90")]
2776    fn test_rotation_angle_panic() {
2777        Rotation::from_degrees_clockwise(361);
2778    }
2779
2780    #[test]
2781    fn test_disable_env_var() -> Result<(), Error> {
2782        // EDGEFIRST_FORCE_BACKEND takes precedence over EDGEFIRST_DISABLE_*,
2783        // so clear it for the duration of this test to avoid races with
2784        // test_force_backend_cpu running in parallel.
2785        let saved_force = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
2786        unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") };
2787
2788        #[cfg(target_os = "linux")]
2789        {
2790            let original = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
2791            unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
2792            let converter = ImageProcessor::new()?;
2793            match original {
2794                Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
2795                None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
2796            }
2797            assert!(converter.g2d.is_none());
2798        }
2799
2800        #[cfg(target_os = "linux")]
2801        #[cfg(feature = "opengl")]
2802        {
2803            let original = std::env::var("EDGEFIRST_DISABLE_GL").ok();
2804            unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
2805            let converter = ImageProcessor::new()?;
2806            match original {
2807                Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
2808                None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
2809            }
2810            assert!(converter.opengl.is_none());
2811        }
2812
2813        let original = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
2814        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
2815        let converter = ImageProcessor::new()?;
2816        match original {
2817            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
2818            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
2819        }
2820        assert!(converter.cpu.is_none());
2821
2822        let original_cpu = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
2823        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
2824        let original_gl = std::env::var("EDGEFIRST_DISABLE_GL").ok();
2825        unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
2826        let original_g2d = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
2827        unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
2828        let mut converter = ImageProcessor::new()?;
2829
2830        let src = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None)?;
2831        let dst = TensorDyn::image(640, 360, PixelFormat::Rgba, DType::U8, None)?;
2832        let (result, _src, _dst) = convert_img(
2833            &mut converter,
2834            src,
2835            dst,
2836            Rotation::None,
2837            Flip::None,
2838            Crop::no_crop(),
2839        );
2840        assert!(matches!(result, Err(Error::NoConverter)));
2841
2842        match original_cpu {
2843            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
2844            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
2845        }
2846        match original_gl {
2847            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
2848            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
2849        }
2850        match original_g2d {
2851            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
2852            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
2853        }
2854        match saved_force {
2855            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
2856            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
2857        }
2858
2859        Ok(())
2860    }
2861
2862    #[test]
2863    fn test_unsupported_conversion() {
2864        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
2865        let dst = TensorDyn::image(640, 360, PixelFormat::Nv12, DType::U8, None).unwrap();
2866        let mut converter = ImageProcessor::new().unwrap();
2867        let (result, _src, _dst) = convert_img(
2868            &mut converter,
2869            src,
2870            dst,
2871            Rotation::None,
2872            Flip::None,
2873            Crop::no_crop(),
2874        );
2875        log::debug!("result: {:?}", result);
2876        assert!(matches!(
2877            result,
2878            Err(Error::NotSupported(e)) if e.starts_with("Conversion from NV12 to NV12")
2879        ));
2880    }
2881
2882    #[test]
2883    fn test_load_grey() {
2884        let grey_img = crate::load_image(
2885            include_bytes!(concat!(
2886                env!("CARGO_MANIFEST_DIR"),
2887                "/../../testdata/grey.jpg"
2888            )),
2889            Some(PixelFormat::Rgba),
2890            None,
2891        )
2892        .unwrap();
2893
2894        let grey_but_rgb_img = crate::load_image(
2895            include_bytes!(concat!(
2896                env!("CARGO_MANIFEST_DIR"),
2897                "/../../testdata/grey-rgb.jpg"
2898            )),
2899            Some(PixelFormat::Rgba),
2900            None,
2901        )
2902        .unwrap();
2903
2904        compare_images(&grey_img, &grey_but_rgb_img, 0.99, function!());
2905    }
2906
2907    #[test]
2908    fn test_new_nv12() {
2909        let nv12 = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
2910        assert_eq!(nv12.height(), Some(720));
2911        assert_eq!(nv12.width(), Some(1280));
2912        assert_eq!(nv12.format().unwrap(), PixelFormat::Nv12);
2913        // PixelFormat::Nv12.channels() returns 1 (luma plane channel count)
2914        assert_eq!(nv12.format().unwrap().channels(), 1);
2915        assert!(nv12.format().is_some_and(
2916            |f| f.layout() == PixelLayout::Planar || f.layout() == PixelLayout::SemiPlanar
2917        ))
2918    }
2919
2920    #[test]
2921    #[cfg(target_os = "linux")]
2922    fn test_new_image_converter() {
2923        let dst_width = 640;
2924        let dst_height = 360;
2925        let file = include_bytes!(concat!(
2926            env!("CARGO_MANIFEST_DIR"),
2927            "/../../testdata/zidane.jpg"
2928        ))
2929        .to_vec();
2930        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
2931
2932        let mut converter = ImageProcessor::new().unwrap();
2933        let converter_dst = converter
2934            .create_image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None)
2935            .unwrap();
2936        let (result, src, converter_dst) = convert_img(
2937            &mut converter,
2938            src,
2939            converter_dst,
2940            Rotation::None,
2941            Flip::None,
2942            Crop::no_crop(),
2943        );
2944        result.unwrap();
2945
2946        let cpu_dst =
2947            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
2948        let mut cpu_converter = CPUProcessor::new();
2949        let (result, _src, cpu_dst) = convert_img(
2950            &mut cpu_converter,
2951            src,
2952            cpu_dst,
2953            Rotation::None,
2954            Flip::None,
2955            Crop::no_crop(),
2956        );
2957        result.unwrap();
2958
2959        compare_images(&converter_dst, &cpu_dst, 0.98, function!());
2960    }
2961
2962    #[test]
2963    #[cfg(target_os = "linux")]
2964    fn test_create_image_dtype_i8() {
2965        let mut converter = ImageProcessor::new().unwrap();
2966
2967        // I8 image should allocate successfully via create_image
2968        let dst = converter
2969            .create_image(320, 240, PixelFormat::Rgb, DType::I8, None)
2970            .unwrap();
2971        assert_eq!(dst.dtype(), DType::I8);
2972        assert!(dst.width() == Some(320));
2973        assert!(dst.height() == Some(240));
2974        assert_eq!(dst.format(), Some(PixelFormat::Rgb));
2975
2976        // U8 for comparison
2977        let dst_u8 = converter
2978            .create_image(320, 240, PixelFormat::Rgb, DType::U8, None)
2979            .unwrap();
2980        assert_eq!(dst_u8.dtype(), DType::U8);
2981
2982        // Convert into I8 dst should succeed
2983        let file = include_bytes!(concat!(
2984            env!("CARGO_MANIFEST_DIR"),
2985            "/../../testdata/zidane.jpg"
2986        ))
2987        .to_vec();
2988        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
2989        let mut dst_i8 = converter
2990            .create_image(320, 240, PixelFormat::Rgb, DType::I8, None)
2991            .unwrap();
2992        converter
2993            .convert(
2994                &src,
2995                &mut dst_i8,
2996                Rotation::None,
2997                Flip::None,
2998                Crop::no_crop(),
2999            )
3000            .unwrap();
3001    }
3002
3003    #[test]
3004    #[cfg(target_os = "linux")]
3005    fn test_create_image_nv12_dma_non_aligned_width() {
3006        // Regression for C2: create_image must not apply stride padding to
3007        // non-packed formats. NV12 is semi-planar (PixelLayout::SemiPlanar),
3008        // so the try_dma path should fall through to the plain
3009        // TensorDyn::image allocation for any width, regardless of the
3010        // 64-byte GPU pitch alignment.
3011        let converter = ImageProcessor::new().unwrap();
3012
3013        // 100 is intentionally not a multiple of 64 (the Mali pitch
3014        // alignment) to prove that non-packed layouts do not take the
3015        // stride-padded branch.
3016        let result = converter.create_image(
3017            100,
3018            64,
3019            PixelFormat::Nv12,
3020            DType::U8,
3021            Some(TensorMemory::Dma),
3022        );
3023
3024        match result {
3025            Ok(img) => {
3026                assert_eq!(img.width(), Some(100));
3027                assert_eq!(img.height(), Some(64));
3028                assert_eq!(img.format(), Some(PixelFormat::Nv12));
3029                // Non-packed formats must never carry a row_stride override.
3030                assert!(
3031                    img.row_stride().is_none(),
3032                    "NV12 must not be stride-padded by create_image",
3033                );
3034            }
3035            Err(e) => {
3036                // Accept skip on hosts without a dma-heap, but never the
3037                // "NotImplemented" we used to return for non-packed layouts.
3038                let msg = format!("{e}");
3039                assert!(
3040                    !msg.contains("image_with_stride"),
3041                    "NV12 should not hit the stride-padded path: {msg}",
3042                );
3043            }
3044        }
3045    }
3046
3047    #[test]
3048    #[ignore] // Hangs on desktop platforms where DMA-buf is unavailable and PBO
3049              // fallback triggers a GPU driver hang during SHM→texture upload (e.g.,
3050              // NVIDIA without /dev/dma_heap permissions). Works on embedded targets.
3051    fn test_crop_skip() {
3052        let file = include_bytes!(concat!(
3053            env!("CARGO_MANIFEST_DIR"),
3054            "/../../testdata/zidane.jpg"
3055        ))
3056        .to_vec();
3057        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3058
3059        let mut converter = ImageProcessor::new().unwrap();
3060        let converter_dst = converter
3061            .create_image(1280, 720, PixelFormat::Rgba, DType::U8, None)
3062            .unwrap();
3063        let crop = Crop::new()
3064            .with_src_rect(Some(Rect::new(0, 0, 640, 640)))
3065            .with_dst_rect(Some(Rect::new(0, 0, 640, 640)));
3066        let (result, src, converter_dst) = convert_img(
3067            &mut converter,
3068            src,
3069            converter_dst,
3070            Rotation::None,
3071            Flip::None,
3072            crop,
3073        );
3074        result.unwrap();
3075
3076        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
3077        let mut cpu_converter = CPUProcessor::new();
3078        let (result, _src, cpu_dst) = convert_img(
3079            &mut cpu_converter,
3080            src,
3081            cpu_dst,
3082            Rotation::None,
3083            Flip::None,
3084            crop,
3085        );
3086        result.unwrap();
3087
3088        compare_images(&converter_dst, &cpu_dst, 0.99999, function!());
3089    }
3090
3091    #[test]
3092    fn test_invalid_pixel_format() {
3093        // PixelFormat::from_fourcc returns None for unknown formats,
3094        // so TensorDyn::image cannot be called with an invalid format.
3095        assert!(PixelFormat::from_fourcc(u32::from_le_bytes(*b"TEST")).is_none());
3096    }
3097
3098    // Helper function to check if G2D library is available (Linux/i.MX8 only)
3099    #[cfg(target_os = "linux")]
3100    static G2D_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
3101
3102    #[cfg(target_os = "linux")]
3103    fn is_g2d_available() -> bool {
3104        *G2D_AVAILABLE.get_or_init(|| G2DProcessor::new().is_ok())
3105    }
3106
3107    #[cfg(target_os = "linux")]
3108    #[cfg(feature = "opengl")]
3109    static GL_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
3110
3111    #[cfg(target_os = "linux")]
3112    #[cfg(feature = "opengl")]
3113    // Helper function to check if OpenGL is available
3114    fn is_opengl_available() -> bool {
3115        #[cfg(all(target_os = "linux", feature = "opengl"))]
3116        {
3117            *GL_AVAILABLE.get_or_init(|| GLProcessorThreaded::new(None).is_ok())
3118        }
3119
3120        #[cfg(not(all(target_os = "linux", feature = "opengl")))]
3121        {
3122            false
3123        }
3124    }
3125
3126    #[test]
3127    fn test_load_jpeg_with_exif() {
3128        let file = include_bytes!(concat!(
3129            env!("CARGO_MANIFEST_DIR"),
3130            "/../../testdata/zidane_rotated_exif.jpg"
3131        ))
3132        .to_vec();
3133        let loaded = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3134
3135        assert_eq!(loaded.height(), Some(1280));
3136        assert_eq!(loaded.width(), Some(720));
3137
3138        let file = include_bytes!(concat!(
3139            env!("CARGO_MANIFEST_DIR"),
3140            "/../../testdata/zidane.jpg"
3141        ))
3142        .to_vec();
3143        let cpu_src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3144
3145        let (dst_width, dst_height) = (cpu_src.height().unwrap(), cpu_src.width().unwrap());
3146
3147        let cpu_dst =
3148            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3149        let mut cpu_converter = CPUProcessor::new();
3150
3151        let (result, _cpu_src, cpu_dst) = convert_img(
3152            &mut cpu_converter,
3153            cpu_src,
3154            cpu_dst,
3155            Rotation::Clockwise90,
3156            Flip::None,
3157            Crop::no_crop(),
3158        );
3159        result.unwrap();
3160
3161        compare_images(&loaded, &cpu_dst, 0.98, function!());
3162    }
3163
3164    #[test]
3165    fn test_load_png_with_exif() {
3166        let file = include_bytes!(concat!(
3167            env!("CARGO_MANIFEST_DIR"),
3168            "/../../testdata/zidane_rotated_exif_180.png"
3169        ))
3170        .to_vec();
3171        let loaded = crate::load_png(&file, Some(PixelFormat::Rgba), None).unwrap();
3172
3173        assert_eq!(loaded.height(), Some(720));
3174        assert_eq!(loaded.width(), Some(1280));
3175
3176        let file = include_bytes!(concat!(
3177            env!("CARGO_MANIFEST_DIR"),
3178            "/../../testdata/zidane.jpg"
3179        ))
3180        .to_vec();
3181        let cpu_src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3182
3183        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
3184        let mut cpu_converter = CPUProcessor::new();
3185
3186        let (result, _cpu_src, cpu_dst) = convert_img(
3187            &mut cpu_converter,
3188            cpu_src,
3189            cpu_dst,
3190            Rotation::Rotate180,
3191            Flip::None,
3192            Crop::no_crop(),
3193        );
3194        result.unwrap();
3195
3196        compare_images(&loaded, &cpu_dst, 0.98, function!());
3197    }
3198
3199    #[test]
3200    #[cfg(target_os = "linux")]
3201    fn test_g2d_resize() {
3202        if !is_g2d_available() {
3203            eprintln!("SKIPPED: test_g2d_resize - G2D library (libg2d.so.2) not available");
3204            return;
3205        }
3206        if !is_dma_available() {
3207            eprintln!(
3208                "SKIPPED: test_g2d_resize - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3209            );
3210            return;
3211        }
3212
3213        let dst_width = 640;
3214        let dst_height = 360;
3215        let file = include_bytes!(concat!(
3216            env!("CARGO_MANIFEST_DIR"),
3217            "/../../testdata/zidane.jpg"
3218        ))
3219        .to_vec();
3220        let src =
3221            crate::load_image(&file, Some(PixelFormat::Rgba), Some(TensorMemory::Dma)).unwrap();
3222
3223        let g2d_dst = TensorDyn::image(
3224            dst_width,
3225            dst_height,
3226            PixelFormat::Rgba,
3227            DType::U8,
3228            Some(TensorMemory::Dma),
3229        )
3230        .unwrap();
3231        let mut g2d_converter = G2DProcessor::new().unwrap();
3232        let (result, src, g2d_dst) = convert_img(
3233            &mut g2d_converter,
3234            src,
3235            g2d_dst,
3236            Rotation::None,
3237            Flip::None,
3238            Crop::no_crop(),
3239        );
3240        result.unwrap();
3241
3242        let cpu_dst =
3243            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3244        let mut cpu_converter = CPUProcessor::new();
3245        let (result, _src, cpu_dst) = convert_img(
3246            &mut cpu_converter,
3247            src,
3248            cpu_dst,
3249            Rotation::None,
3250            Flip::None,
3251            Crop::no_crop(),
3252        );
3253        result.unwrap();
3254
3255        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
3256    }
3257
3258    #[test]
3259    #[cfg(target_os = "linux")]
3260    #[cfg(feature = "opengl")]
3261    fn test_opengl_resize() {
3262        if !is_opengl_available() {
3263            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3264            return;
3265        }
3266
3267        let dst_width = 640;
3268        let dst_height = 360;
3269        let file = include_bytes!(concat!(
3270            env!("CARGO_MANIFEST_DIR"),
3271            "/../../testdata/zidane.jpg"
3272        ))
3273        .to_vec();
3274        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3275
3276        let cpu_dst =
3277            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3278        let mut cpu_converter = CPUProcessor::new();
3279        let (result, src, cpu_dst) = convert_img(
3280            &mut cpu_converter,
3281            src,
3282            cpu_dst,
3283            Rotation::None,
3284            Flip::None,
3285            Crop::no_crop(),
3286        );
3287        result.unwrap();
3288
3289        let mut src = src;
3290        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3291
3292        for _ in 0..5 {
3293            let gl_dst =
3294                TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None)
3295                    .unwrap();
3296            let (result, src_back, gl_dst) = convert_img(
3297                &mut gl_converter,
3298                src,
3299                gl_dst,
3300                Rotation::None,
3301                Flip::None,
3302                Crop::no_crop(),
3303            );
3304            result.unwrap();
3305            src = src_back;
3306
3307            compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3308        }
3309    }
3310
3311    #[test]
3312    #[cfg(target_os = "linux")]
3313    #[cfg(feature = "opengl")]
3314    fn test_opengl_10_threads() {
3315        if !is_opengl_available() {
3316            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3317            return;
3318        }
3319
3320        let handles: Vec<_> = (0..10)
3321            .map(|i| {
3322                std::thread::Builder::new()
3323                    .name(format!("Thread {i}"))
3324                    .spawn(test_opengl_resize)
3325                    .unwrap()
3326            })
3327            .collect();
3328        handles.into_iter().for_each(|h| {
3329            if let Err(e) = h.join() {
3330                std::panic::resume_unwind(e)
3331            }
3332        });
3333    }
3334
3335    #[test]
3336    #[cfg(target_os = "linux")]
3337    #[cfg(feature = "opengl")]
3338    fn test_opengl_grey() {
3339        if !is_opengl_available() {
3340            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3341            return;
3342        }
3343
3344        let img = crate::load_image(
3345            include_bytes!(concat!(
3346                env!("CARGO_MANIFEST_DIR"),
3347                "/../../testdata/grey.jpg"
3348            )),
3349            Some(PixelFormat::Grey),
3350            None,
3351        )
3352        .unwrap();
3353
3354        let gl_dst = TensorDyn::image(640, 640, PixelFormat::Grey, DType::U8, None).unwrap();
3355        let cpu_dst = TensorDyn::image(640, 640, PixelFormat::Grey, DType::U8, None).unwrap();
3356
3357        let mut converter = CPUProcessor::new();
3358
3359        let (result, img, cpu_dst) = convert_img(
3360            &mut converter,
3361            img,
3362            cpu_dst,
3363            Rotation::None,
3364            Flip::None,
3365            Crop::no_crop(),
3366        );
3367        result.unwrap();
3368
3369        let mut gl = GLProcessorThreaded::new(None).unwrap();
3370        let (result, _img, gl_dst) = convert_img(
3371            &mut gl,
3372            img,
3373            gl_dst,
3374            Rotation::None,
3375            Flip::None,
3376            Crop::no_crop(),
3377        );
3378        result.unwrap();
3379
3380        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3381    }
3382
3383    #[test]
3384    #[cfg(target_os = "linux")]
3385    fn test_g2d_src_crop() {
3386        if !is_g2d_available() {
3387            eprintln!("SKIPPED: test_g2d_src_crop - G2D library (libg2d.so.2) not available");
3388            return;
3389        }
3390        if !is_dma_available() {
3391            eprintln!(
3392                "SKIPPED: test_g2d_src_crop - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3393            );
3394            return;
3395        }
3396
3397        let dst_width = 640;
3398        let dst_height = 640;
3399        let file = include_bytes!(concat!(
3400            env!("CARGO_MANIFEST_DIR"),
3401            "/../../testdata/zidane.jpg"
3402        ))
3403        .to_vec();
3404        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3405
3406        let cpu_dst =
3407            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3408        let mut cpu_converter = CPUProcessor::new();
3409        let crop = Crop {
3410            src_rect: Some(Rect {
3411                left: 0,
3412                top: 0,
3413                width: 640,
3414                height: 360,
3415            }),
3416            dst_rect: None,
3417            dst_color: None,
3418        };
3419        let (result, src, cpu_dst) = convert_img(
3420            &mut cpu_converter,
3421            src,
3422            cpu_dst,
3423            Rotation::None,
3424            Flip::None,
3425            crop,
3426        );
3427        result.unwrap();
3428
3429        let g2d_dst =
3430            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3431        let mut g2d_converter = G2DProcessor::new().unwrap();
3432        let (result, _src, g2d_dst) = convert_img(
3433            &mut g2d_converter,
3434            src,
3435            g2d_dst,
3436            Rotation::None,
3437            Flip::None,
3438            crop,
3439        );
3440        result.unwrap();
3441
3442        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
3443    }
3444
3445    #[test]
3446    #[cfg(target_os = "linux")]
3447    fn test_g2d_dst_crop() {
3448        if !is_g2d_available() {
3449            eprintln!("SKIPPED: test_g2d_dst_crop - G2D library (libg2d.so.2) not available");
3450            return;
3451        }
3452        if !is_dma_available() {
3453            eprintln!(
3454                "SKIPPED: test_g2d_dst_crop - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3455            );
3456            return;
3457        }
3458
3459        let dst_width = 640;
3460        let dst_height = 640;
3461        let file = include_bytes!(concat!(
3462            env!("CARGO_MANIFEST_DIR"),
3463            "/../../testdata/zidane.jpg"
3464        ))
3465        .to_vec();
3466        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3467
3468        let cpu_dst =
3469            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3470        let mut cpu_converter = CPUProcessor::new();
3471        let crop = Crop {
3472            src_rect: None,
3473            dst_rect: Some(Rect::new(100, 100, 512, 288)),
3474            dst_color: None,
3475        };
3476        let (result, src, cpu_dst) = convert_img(
3477            &mut cpu_converter,
3478            src,
3479            cpu_dst,
3480            Rotation::None,
3481            Flip::None,
3482            crop,
3483        );
3484        result.unwrap();
3485
3486        let g2d_dst =
3487            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3488        let mut g2d_converter = G2DProcessor::new().unwrap();
3489        let (result, _src, g2d_dst) = convert_img(
3490            &mut g2d_converter,
3491            src,
3492            g2d_dst,
3493            Rotation::None,
3494            Flip::None,
3495            crop,
3496        );
3497        result.unwrap();
3498
3499        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
3500    }
3501
3502    #[test]
3503    #[cfg(target_os = "linux")]
3504    fn test_g2d_all_rgba() {
3505        if !is_g2d_available() {
3506            eprintln!("SKIPPED: test_g2d_all_rgba - G2D library (libg2d.so.2) not available");
3507            return;
3508        }
3509        if !is_dma_available() {
3510            eprintln!(
3511                "SKIPPED: test_g2d_all_rgba - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3512            );
3513            return;
3514        }
3515
3516        let dst_width = 640;
3517        let dst_height = 640;
3518        let file = include_bytes!(concat!(
3519            env!("CARGO_MANIFEST_DIR"),
3520            "/../../testdata/zidane.jpg"
3521        ))
3522        .to_vec();
3523        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3524        let src_dyn = src;
3525
3526        let mut cpu_dst =
3527            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3528        let mut cpu_converter = CPUProcessor::new();
3529        let mut g2d_dst =
3530            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3531        let mut g2d_converter = G2DProcessor::new().unwrap();
3532
3533        let crop = Crop {
3534            src_rect: Some(Rect::new(50, 120, 1024, 576)),
3535            dst_rect: Some(Rect::new(100, 100, 512, 288)),
3536            dst_color: None,
3537        };
3538
3539        for rot in [
3540            Rotation::None,
3541            Rotation::Clockwise90,
3542            Rotation::Rotate180,
3543            Rotation::CounterClockwise90,
3544        ] {
3545            cpu_dst
3546                .as_u8()
3547                .unwrap()
3548                .map()
3549                .unwrap()
3550                .as_mut_slice()
3551                .fill(114);
3552            g2d_dst
3553                .as_u8()
3554                .unwrap()
3555                .map()
3556                .unwrap()
3557                .as_mut_slice()
3558                .fill(114);
3559            for flip in [Flip::None, Flip::Horizontal, Flip::Vertical] {
3560                let mut cpu_dst_dyn = cpu_dst;
3561                cpu_converter
3562                    .convert(&src_dyn, &mut cpu_dst_dyn, Rotation::None, Flip::None, crop)
3563                    .unwrap();
3564                cpu_dst = {
3565                    let mut __t = cpu_dst_dyn.into_u8().unwrap();
3566                    __t.set_format(PixelFormat::Rgba).unwrap();
3567                    TensorDyn::from(__t)
3568                };
3569
3570                let mut g2d_dst_dyn = g2d_dst;
3571                g2d_converter
3572                    .convert(&src_dyn, &mut g2d_dst_dyn, Rotation::None, Flip::None, crop)
3573                    .unwrap();
3574                g2d_dst = {
3575                    let mut __t = g2d_dst_dyn.into_u8().unwrap();
3576                    __t.set_format(PixelFormat::Rgba).unwrap();
3577                    TensorDyn::from(__t)
3578                };
3579
3580                compare_images(
3581                    &g2d_dst,
3582                    &cpu_dst,
3583                    0.98,
3584                    &format!("{} {:?} {:?}", function!(), rot, flip),
3585                );
3586            }
3587        }
3588    }
3589
3590    #[test]
3591    #[cfg(target_os = "linux")]
3592    #[cfg(feature = "opengl")]
3593    fn test_opengl_src_crop() {
3594        if !is_opengl_available() {
3595            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3596            return;
3597        }
3598
3599        let dst_width = 640;
3600        let dst_height = 360;
3601        let file = include_bytes!(concat!(
3602            env!("CARGO_MANIFEST_DIR"),
3603            "/../../testdata/zidane.jpg"
3604        ))
3605        .to_vec();
3606        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3607        let crop = Crop {
3608            src_rect: Some(Rect {
3609                left: 320,
3610                top: 180,
3611                width: 1280 - 320,
3612                height: 720 - 180,
3613            }),
3614            dst_rect: None,
3615            dst_color: None,
3616        };
3617
3618        let cpu_dst =
3619            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3620        let mut cpu_converter = CPUProcessor::new();
3621        let (result, src, cpu_dst) = convert_img(
3622            &mut cpu_converter,
3623            src,
3624            cpu_dst,
3625            Rotation::None,
3626            Flip::None,
3627            crop,
3628        );
3629        result.unwrap();
3630
3631        let gl_dst =
3632            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3633        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3634        let (result, _src, gl_dst) = convert_img(
3635            &mut gl_converter,
3636            src,
3637            gl_dst,
3638            Rotation::None,
3639            Flip::None,
3640            crop,
3641        );
3642        result.unwrap();
3643
3644        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3645    }
3646
3647    #[test]
3648    #[cfg(target_os = "linux")]
3649    #[cfg(feature = "opengl")]
3650    fn test_opengl_dst_crop() {
3651        if !is_opengl_available() {
3652            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3653            return;
3654        }
3655
3656        let dst_width = 640;
3657        let dst_height = 640;
3658        let file = include_bytes!(concat!(
3659            env!("CARGO_MANIFEST_DIR"),
3660            "/../../testdata/zidane.jpg"
3661        ))
3662        .to_vec();
3663        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3664
3665        let cpu_dst =
3666            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3667        let mut cpu_converter = CPUProcessor::new();
3668        let crop = Crop {
3669            src_rect: None,
3670            dst_rect: Some(Rect::new(100, 100, 512, 288)),
3671            dst_color: None,
3672        };
3673        let (result, src, cpu_dst) = convert_img(
3674            &mut cpu_converter,
3675            src,
3676            cpu_dst,
3677            Rotation::None,
3678            Flip::None,
3679            crop,
3680        );
3681        result.unwrap();
3682
3683        let gl_dst =
3684            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3685        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3686        let (result, _src, gl_dst) = convert_img(
3687            &mut gl_converter,
3688            src,
3689            gl_dst,
3690            Rotation::None,
3691            Flip::None,
3692            crop,
3693        );
3694        result.unwrap();
3695
3696        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3697    }
3698
3699    #[test]
3700    #[cfg(target_os = "linux")]
3701    #[cfg(feature = "opengl")]
3702    fn test_opengl_all_rgba() {
3703        if !is_opengl_available() {
3704            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3705            return;
3706        }
3707
3708        let dst_width = 640;
3709        let dst_height = 640;
3710        let file = include_bytes!(concat!(
3711            env!("CARGO_MANIFEST_DIR"),
3712            "/../../testdata/zidane.jpg"
3713        ))
3714        .to_vec();
3715
3716        let mut cpu_converter = CPUProcessor::new();
3717
3718        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3719
3720        let mut mem = vec![None, Some(TensorMemory::Mem), Some(TensorMemory::Shm)];
3721        if is_dma_available() {
3722            mem.push(Some(TensorMemory::Dma));
3723        }
3724        let crop = Crop {
3725            src_rect: Some(Rect::new(50, 120, 1024, 576)),
3726            dst_rect: Some(Rect::new(100, 100, 512, 288)),
3727            dst_color: None,
3728        };
3729        for m in mem {
3730            let src = crate::load_image(&file, Some(PixelFormat::Rgba), m).unwrap();
3731            let src_dyn = src;
3732
3733            for rot in [
3734                Rotation::None,
3735                Rotation::Clockwise90,
3736                Rotation::Rotate180,
3737                Rotation::CounterClockwise90,
3738            ] {
3739                for flip in [Flip::None, Flip::Horizontal, Flip::Vertical] {
3740                    let cpu_dst =
3741                        TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, m)
3742                            .unwrap();
3743                    let gl_dst =
3744                        TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, m)
3745                            .unwrap();
3746                    cpu_dst
3747                        .as_u8()
3748                        .unwrap()
3749                        .map()
3750                        .unwrap()
3751                        .as_mut_slice()
3752                        .fill(114);
3753                    gl_dst
3754                        .as_u8()
3755                        .unwrap()
3756                        .map()
3757                        .unwrap()
3758                        .as_mut_slice()
3759                        .fill(114);
3760
3761                    let mut cpu_dst_dyn = cpu_dst;
3762                    cpu_converter
3763                        .convert(&src_dyn, &mut cpu_dst_dyn, Rotation::None, Flip::None, crop)
3764                        .unwrap();
3765                    let cpu_dst = {
3766                        let mut __t = cpu_dst_dyn.into_u8().unwrap();
3767                        __t.set_format(PixelFormat::Rgba).unwrap();
3768                        TensorDyn::from(__t)
3769                    };
3770
3771                    let mut gl_dst_dyn = gl_dst;
3772                    gl_converter
3773                        .convert(&src_dyn, &mut gl_dst_dyn, Rotation::None, Flip::None, crop)
3774                        .map_err(|e| {
3775                            log::error!("error mem {m:?} rot {rot:?} error: {e:?}");
3776                            e
3777                        })
3778                        .unwrap();
3779                    let gl_dst = {
3780                        let mut __t = gl_dst_dyn.into_u8().unwrap();
3781                        __t.set_format(PixelFormat::Rgba).unwrap();
3782                        TensorDyn::from(__t)
3783                    };
3784
3785                    compare_images(
3786                        &gl_dst,
3787                        &cpu_dst,
3788                        0.98,
3789                        &format!("{} {:?} {:?}", function!(), rot, flip),
3790                    );
3791                }
3792            }
3793        }
3794    }
3795
3796    #[test]
3797    #[cfg(target_os = "linux")]
3798    fn test_cpu_rotate() {
3799        for rot in [
3800            Rotation::Clockwise90,
3801            Rotation::Rotate180,
3802            Rotation::CounterClockwise90,
3803        ] {
3804            test_cpu_rotate_(rot);
3805        }
3806    }
3807
3808    #[cfg(target_os = "linux")]
3809    fn test_cpu_rotate_(rot: Rotation) {
3810        // This test rotates the image 4 times and checks that the image was returned to
3811        // be the same Currently doesn't check if rotations actually rotated in
3812        // right direction
3813        let file = include_bytes!(concat!(
3814            env!("CARGO_MANIFEST_DIR"),
3815            "/../../testdata/zidane.jpg"
3816        ))
3817        .to_vec();
3818
3819        let unchanged_src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3820        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3821
3822        let (dst_width, dst_height) = match rot {
3823            Rotation::None | Rotation::Rotate180 => (src.width().unwrap(), src.height().unwrap()),
3824            Rotation::Clockwise90 | Rotation::CounterClockwise90 => {
3825                (src.height().unwrap(), src.width().unwrap())
3826            }
3827        };
3828
3829        let cpu_dst =
3830            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3831        let mut cpu_converter = CPUProcessor::new();
3832
3833        // After rotating 4 times, the image should be the same as the original
3834
3835        let (result, src, cpu_dst) = convert_img(
3836            &mut cpu_converter,
3837            src,
3838            cpu_dst,
3839            rot,
3840            Flip::None,
3841            Crop::no_crop(),
3842        );
3843        result.unwrap();
3844
3845        let (result, cpu_dst, src) = convert_img(
3846            &mut cpu_converter,
3847            cpu_dst,
3848            src,
3849            rot,
3850            Flip::None,
3851            Crop::no_crop(),
3852        );
3853        result.unwrap();
3854
3855        let (result, src, cpu_dst) = convert_img(
3856            &mut cpu_converter,
3857            src,
3858            cpu_dst,
3859            rot,
3860            Flip::None,
3861            Crop::no_crop(),
3862        );
3863        result.unwrap();
3864
3865        let (result, _cpu_dst, src) = convert_img(
3866            &mut cpu_converter,
3867            cpu_dst,
3868            src,
3869            rot,
3870            Flip::None,
3871            Crop::no_crop(),
3872        );
3873        result.unwrap();
3874
3875        compare_images(&src, &unchanged_src, 0.98, function!());
3876    }
3877
3878    #[test]
3879    #[cfg(target_os = "linux")]
3880    #[cfg(feature = "opengl")]
3881    fn test_opengl_rotate() {
3882        if !is_opengl_available() {
3883            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3884            return;
3885        }
3886
3887        let size = (1280, 720);
3888        let mut mem = vec![None, Some(TensorMemory::Shm), Some(TensorMemory::Mem)];
3889
3890        if is_dma_available() {
3891            mem.push(Some(TensorMemory::Dma));
3892        }
3893        for m in mem {
3894            for rot in [
3895                Rotation::Clockwise90,
3896                Rotation::Rotate180,
3897                Rotation::CounterClockwise90,
3898            ] {
3899                test_opengl_rotate_(size, rot, m);
3900            }
3901        }
3902    }
3903
3904    #[cfg(target_os = "linux")]
3905    #[cfg(feature = "opengl")]
3906    fn test_opengl_rotate_(
3907        size: (usize, usize),
3908        rot: Rotation,
3909        tensor_memory: Option<TensorMemory>,
3910    ) {
3911        let (dst_width, dst_height) = match rot {
3912            Rotation::None | Rotation::Rotate180 => size,
3913            Rotation::Clockwise90 | Rotation::CounterClockwise90 => (size.1, size.0),
3914        };
3915
3916        let file = include_bytes!(concat!(
3917            env!("CARGO_MANIFEST_DIR"),
3918            "/../../testdata/zidane.jpg"
3919        ))
3920        .to_vec();
3921        let src = crate::load_image(&file, Some(PixelFormat::Rgba), tensor_memory).unwrap();
3922
3923        let cpu_dst =
3924            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3925        let mut cpu_converter = CPUProcessor::new();
3926
3927        let (result, mut src, cpu_dst) = convert_img(
3928            &mut cpu_converter,
3929            src,
3930            cpu_dst,
3931            rot,
3932            Flip::None,
3933            Crop::no_crop(),
3934        );
3935        result.unwrap();
3936
3937        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3938
3939        for _ in 0..5 {
3940            let gl_dst = TensorDyn::image(
3941                dst_width,
3942                dst_height,
3943                PixelFormat::Rgba,
3944                DType::U8,
3945                tensor_memory,
3946            )
3947            .unwrap();
3948            let (result, src_back, gl_dst) = convert_img(
3949                &mut gl_converter,
3950                src,
3951                gl_dst,
3952                rot,
3953                Flip::None,
3954                Crop::no_crop(),
3955            );
3956            result.unwrap();
3957            src = src_back;
3958            compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3959        }
3960    }
3961
3962    #[test]
3963    #[cfg(target_os = "linux")]
3964    fn test_g2d_rotate() {
3965        if !is_g2d_available() {
3966            eprintln!("SKIPPED: test_g2d_rotate - G2D library (libg2d.so.2) not available");
3967            return;
3968        }
3969        if !is_dma_available() {
3970            eprintln!(
3971                "SKIPPED: test_g2d_rotate - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3972            );
3973            return;
3974        }
3975
3976        let size = (1280, 720);
3977        for rot in [
3978            Rotation::Clockwise90,
3979            Rotation::Rotate180,
3980            Rotation::CounterClockwise90,
3981        ] {
3982            test_g2d_rotate_(size, rot);
3983        }
3984    }
3985
3986    #[cfg(target_os = "linux")]
3987    fn test_g2d_rotate_(size: (usize, usize), rot: Rotation) {
3988        let (dst_width, dst_height) = match rot {
3989            Rotation::None | Rotation::Rotate180 => size,
3990            Rotation::Clockwise90 | Rotation::CounterClockwise90 => (size.1, size.0),
3991        };
3992
3993        let file = include_bytes!(concat!(
3994            env!("CARGO_MANIFEST_DIR"),
3995            "/../../testdata/zidane.jpg"
3996        ))
3997        .to_vec();
3998        let src =
3999            crate::load_image(&file, Some(PixelFormat::Rgba), Some(TensorMemory::Dma)).unwrap();
4000
4001        let cpu_dst =
4002            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4003        let mut cpu_converter = CPUProcessor::new();
4004
4005        let (result, src, cpu_dst) = convert_img(
4006            &mut cpu_converter,
4007            src,
4008            cpu_dst,
4009            rot,
4010            Flip::None,
4011            Crop::no_crop(),
4012        );
4013        result.unwrap();
4014
4015        let g2d_dst = TensorDyn::image(
4016            dst_width,
4017            dst_height,
4018            PixelFormat::Rgba,
4019            DType::U8,
4020            Some(TensorMemory::Dma),
4021        )
4022        .unwrap();
4023        let mut g2d_converter = G2DProcessor::new().unwrap();
4024
4025        let (result, _src, g2d_dst) = convert_img(
4026            &mut g2d_converter,
4027            src,
4028            g2d_dst,
4029            rot,
4030            Flip::None,
4031            Crop::no_crop(),
4032        );
4033        result.unwrap();
4034
4035        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
4036    }
4037
4038    #[test]
4039    fn test_rgba_to_yuyv_resize_cpu() {
4040        let src = load_bytes_to_tensor(
4041            1280,
4042            720,
4043            PixelFormat::Rgba,
4044            None,
4045            include_bytes!(concat!(
4046                env!("CARGO_MANIFEST_DIR"),
4047                "/../../testdata/camera720p.rgba"
4048            )),
4049        )
4050        .unwrap();
4051
4052        let (dst_width, dst_height) = (640, 360);
4053
4054        let dst =
4055            TensorDyn::image(dst_width, dst_height, PixelFormat::Yuyv, DType::U8, None).unwrap();
4056
4057        let dst_through_yuyv =
4058            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4059        let dst_direct =
4060            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4061
4062        let mut cpu_converter = CPUProcessor::new();
4063
4064        let (result, src, dst) = convert_img(
4065            &mut cpu_converter,
4066            src,
4067            dst,
4068            Rotation::None,
4069            Flip::None,
4070            Crop::no_crop(),
4071        );
4072        result.unwrap();
4073
4074        let (result, _dst, dst_through_yuyv) = convert_img(
4075            &mut cpu_converter,
4076            dst,
4077            dst_through_yuyv,
4078            Rotation::None,
4079            Flip::None,
4080            Crop::no_crop(),
4081        );
4082        result.unwrap();
4083
4084        let (result, _src, dst_direct) = convert_img(
4085            &mut cpu_converter,
4086            src,
4087            dst_direct,
4088            Rotation::None,
4089            Flip::None,
4090            Crop::no_crop(),
4091        );
4092        result.unwrap();
4093
4094        compare_images(&dst_through_yuyv, &dst_direct, 0.98, function!());
4095    }
4096
4097    #[test]
4098    #[cfg(target_os = "linux")]
4099    #[cfg(feature = "opengl")]
4100    #[ignore = "opengl doesn't support rendering to PixelFormat::Yuyv texture"]
4101    fn test_rgba_to_yuyv_resize_opengl() {
4102        if !is_opengl_available() {
4103            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4104            return;
4105        }
4106
4107        if !is_dma_available() {
4108            eprintln!(
4109                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
4110                function!()
4111            );
4112            return;
4113        }
4114
4115        let src = load_bytes_to_tensor(
4116            1280,
4117            720,
4118            PixelFormat::Rgba,
4119            None,
4120            include_bytes!(concat!(
4121                env!("CARGO_MANIFEST_DIR"),
4122                "/../../testdata/camera720p.rgba"
4123            )),
4124        )
4125        .unwrap();
4126
4127        let (dst_width, dst_height) = (640, 360);
4128
4129        let dst = TensorDyn::image(
4130            dst_width,
4131            dst_height,
4132            PixelFormat::Yuyv,
4133            DType::U8,
4134            Some(TensorMemory::Dma),
4135        )
4136        .unwrap();
4137
4138        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4139
4140        let (result, src, dst) = convert_img(
4141            &mut gl_converter,
4142            src,
4143            dst,
4144            Rotation::None,
4145            Flip::None,
4146            Crop::new()
4147                .with_dst_rect(Some(Rect::new(100, 100, 100, 100)))
4148                .with_dst_color(Some([255, 255, 255, 255])),
4149        );
4150        result.unwrap();
4151
4152        std::fs::write(
4153            "rgba_to_yuyv_opengl.yuyv",
4154            dst.as_u8().unwrap().map().unwrap().as_slice(),
4155        )
4156        .unwrap();
4157        let cpu_dst = TensorDyn::image(
4158            dst_width,
4159            dst_height,
4160            PixelFormat::Yuyv,
4161            DType::U8,
4162            Some(TensorMemory::Dma),
4163        )
4164        .unwrap();
4165        let (result, _src, cpu_dst) = convert_img(
4166            &mut CPUProcessor::new(),
4167            src,
4168            cpu_dst,
4169            Rotation::None,
4170            Flip::None,
4171            Crop::no_crop(),
4172        );
4173        result.unwrap();
4174
4175        compare_images_convert_to_rgb(&dst, &cpu_dst, 0.98, function!());
4176    }
4177
4178    #[test]
4179    #[cfg(target_os = "linux")]
4180    fn test_rgba_to_yuyv_resize_g2d() {
4181        if !is_g2d_available() {
4182            eprintln!(
4183                "SKIPPED: test_rgba_to_yuyv_resize_g2d - G2D library (libg2d.so.2) not available"
4184            );
4185            return;
4186        }
4187        if !is_dma_available() {
4188            eprintln!(
4189                "SKIPPED: test_rgba_to_yuyv_resize_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4190            );
4191            return;
4192        }
4193
4194        let src = load_bytes_to_tensor(
4195            1280,
4196            720,
4197            PixelFormat::Rgba,
4198            Some(TensorMemory::Dma),
4199            include_bytes!(concat!(
4200                env!("CARGO_MANIFEST_DIR"),
4201                "/../../testdata/camera720p.rgba"
4202            )),
4203        )
4204        .unwrap();
4205
4206        let (dst_width, dst_height) = (1280, 720);
4207
4208        let cpu_dst = TensorDyn::image(
4209            dst_width,
4210            dst_height,
4211            PixelFormat::Yuyv,
4212            DType::U8,
4213            Some(TensorMemory::Dma),
4214        )
4215        .unwrap();
4216
4217        let g2d_dst = TensorDyn::image(
4218            dst_width,
4219            dst_height,
4220            PixelFormat::Yuyv,
4221            DType::U8,
4222            Some(TensorMemory::Dma),
4223        )
4224        .unwrap();
4225
4226        let mut g2d_converter = G2DProcessor::new().unwrap();
4227        let crop = Crop {
4228            src_rect: None,
4229            dst_rect: Some(Rect::new(100, 100, 2, 2)),
4230            dst_color: None,
4231        };
4232
4233        g2d_dst
4234            .as_u8()
4235            .unwrap()
4236            .map()
4237            .unwrap()
4238            .as_mut_slice()
4239            .fill(128);
4240        let (result, src, g2d_dst) = convert_img(
4241            &mut g2d_converter,
4242            src,
4243            g2d_dst,
4244            Rotation::None,
4245            Flip::None,
4246            crop,
4247        );
4248        result.unwrap();
4249
4250        let cpu_dst_img = cpu_dst;
4251        cpu_dst_img
4252            .as_u8()
4253            .unwrap()
4254            .map()
4255            .unwrap()
4256            .as_mut_slice()
4257            .fill(128);
4258        let (result, _src, cpu_dst) = convert_img(
4259            &mut CPUProcessor::new(),
4260            src,
4261            cpu_dst_img,
4262            Rotation::None,
4263            Flip::None,
4264            crop,
4265        );
4266        result.unwrap();
4267
4268        compare_images_convert_to_rgb(&cpu_dst, &g2d_dst, 0.98, function!());
4269    }
4270
4271    #[test]
4272    fn test_yuyv_to_rgba_cpu() {
4273        let file = include_bytes!(concat!(
4274            env!("CARGO_MANIFEST_DIR"),
4275            "/../../testdata/camera720p.yuyv"
4276        ))
4277        .to_vec();
4278        let src = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
4279        src.as_u8()
4280            .unwrap()
4281            .map()
4282            .unwrap()
4283            .as_mut_slice()
4284            .copy_from_slice(&file);
4285
4286        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4287        let mut cpu_converter = CPUProcessor::new();
4288
4289        let (result, _src, dst) = convert_img(
4290            &mut cpu_converter,
4291            src,
4292            dst,
4293            Rotation::None,
4294            Flip::None,
4295            Crop::no_crop(),
4296        );
4297        result.unwrap();
4298
4299        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4300        target_image
4301            .as_u8()
4302            .unwrap()
4303            .map()
4304            .unwrap()
4305            .as_mut_slice()
4306            .copy_from_slice(include_bytes!(concat!(
4307                env!("CARGO_MANIFEST_DIR"),
4308                "/../../testdata/camera720p.rgba"
4309            )));
4310
4311        compare_images(&dst, &target_image, 0.98, function!());
4312    }
4313
4314    #[test]
4315    fn test_yuyv_to_rgb_cpu() {
4316        let file = include_bytes!(concat!(
4317            env!("CARGO_MANIFEST_DIR"),
4318            "/../../testdata/camera720p.yuyv"
4319        ))
4320        .to_vec();
4321        let src = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
4322        src.as_u8()
4323            .unwrap()
4324            .map()
4325            .unwrap()
4326            .as_mut_slice()
4327            .copy_from_slice(&file);
4328
4329        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4330        let mut cpu_converter = CPUProcessor::new();
4331
4332        let (result, _src, dst) = convert_img(
4333            &mut cpu_converter,
4334            src,
4335            dst,
4336            Rotation::None,
4337            Flip::None,
4338            Crop::no_crop(),
4339        );
4340        result.unwrap();
4341
4342        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4343        target_image
4344            .as_u8()
4345            .unwrap()
4346            .map()
4347            .unwrap()
4348            .as_mut_slice()
4349            .as_chunks_mut::<3>()
4350            .0
4351            .iter_mut()
4352            .zip(
4353                include_bytes!(concat!(
4354                    env!("CARGO_MANIFEST_DIR"),
4355                    "/../../testdata/camera720p.rgba"
4356                ))
4357                .as_chunks::<4>()
4358                .0,
4359            )
4360            .for_each(|(dst, src)| *dst = [src[0], src[1], src[2]]);
4361
4362        compare_images(&dst, &target_image, 0.98, function!());
4363    }
4364
4365    #[test]
4366    #[cfg(target_os = "linux")]
4367    fn test_yuyv_to_rgba_g2d() {
4368        if !is_g2d_available() {
4369            eprintln!("SKIPPED: test_yuyv_to_rgba_g2d - G2D library (libg2d.so.2) not available");
4370            return;
4371        }
4372        if !is_dma_available() {
4373            eprintln!(
4374                "SKIPPED: test_yuyv_to_rgba_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4375            );
4376            return;
4377        }
4378
4379        let src = load_bytes_to_tensor(
4380            1280,
4381            720,
4382            PixelFormat::Yuyv,
4383            None,
4384            include_bytes!(concat!(
4385                env!("CARGO_MANIFEST_DIR"),
4386                "/../../testdata/camera720p.yuyv"
4387            )),
4388        )
4389        .unwrap();
4390
4391        let dst = TensorDyn::image(
4392            1280,
4393            720,
4394            PixelFormat::Rgba,
4395            DType::U8,
4396            Some(TensorMemory::Dma),
4397        )
4398        .unwrap();
4399        let mut g2d_converter = G2DProcessor::new().unwrap();
4400
4401        let (result, _src, dst) = convert_img(
4402            &mut g2d_converter,
4403            src,
4404            dst,
4405            Rotation::None,
4406            Flip::None,
4407            Crop::no_crop(),
4408        );
4409        result.unwrap();
4410
4411        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4412        target_image
4413            .as_u8()
4414            .unwrap()
4415            .map()
4416            .unwrap()
4417            .as_mut_slice()
4418            .copy_from_slice(include_bytes!(concat!(
4419                env!("CARGO_MANIFEST_DIR"),
4420                "/../../testdata/camera720p.rgba"
4421            )));
4422
4423        compare_images(&dst, &target_image, 0.98, function!());
4424    }
4425
4426    #[test]
4427    #[cfg(target_os = "linux")]
4428    #[cfg(feature = "opengl")]
4429    fn test_yuyv_to_rgba_opengl() {
4430        if !is_opengl_available() {
4431            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4432            return;
4433        }
4434        if !is_dma_available() {
4435            eprintln!(
4436                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
4437                function!()
4438            );
4439            return;
4440        }
4441
4442        let src = load_bytes_to_tensor(
4443            1280,
4444            720,
4445            PixelFormat::Yuyv,
4446            Some(TensorMemory::Dma),
4447            include_bytes!(concat!(
4448                env!("CARGO_MANIFEST_DIR"),
4449                "/../../testdata/camera720p.yuyv"
4450            )),
4451        )
4452        .unwrap();
4453
4454        let dst = TensorDyn::image(
4455            1280,
4456            720,
4457            PixelFormat::Rgba,
4458            DType::U8,
4459            Some(TensorMemory::Dma),
4460        )
4461        .unwrap();
4462        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4463
4464        let (result, _src, dst) = convert_img(
4465            &mut gl_converter,
4466            src,
4467            dst,
4468            Rotation::None,
4469            Flip::None,
4470            Crop::no_crop(),
4471        );
4472        result.unwrap();
4473
4474        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4475        target_image
4476            .as_u8()
4477            .unwrap()
4478            .map()
4479            .unwrap()
4480            .as_mut_slice()
4481            .copy_from_slice(include_bytes!(concat!(
4482                env!("CARGO_MANIFEST_DIR"),
4483                "/../../testdata/camera720p.rgba"
4484            )));
4485
4486        compare_images(&dst, &target_image, 0.98, function!());
4487    }
4488
4489    #[test]
4490    #[cfg(target_os = "linux")]
4491    fn test_yuyv_to_rgb_g2d() {
4492        if !is_g2d_available() {
4493            eprintln!("SKIPPED: test_yuyv_to_rgb_g2d - G2D library (libg2d.so.2) not available");
4494            return;
4495        }
4496        if !is_dma_available() {
4497            eprintln!(
4498                "SKIPPED: test_yuyv_to_rgb_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4499            );
4500            return;
4501        }
4502
4503        let src = load_bytes_to_tensor(
4504            1280,
4505            720,
4506            PixelFormat::Yuyv,
4507            None,
4508            include_bytes!(concat!(
4509                env!("CARGO_MANIFEST_DIR"),
4510                "/../../testdata/camera720p.yuyv"
4511            )),
4512        )
4513        .unwrap();
4514
4515        let g2d_dst = TensorDyn::image(
4516            1280,
4517            720,
4518            PixelFormat::Rgb,
4519            DType::U8,
4520            Some(TensorMemory::Dma),
4521        )
4522        .unwrap();
4523        let mut g2d_converter = G2DProcessor::new().unwrap();
4524
4525        let (result, src, g2d_dst) = convert_img(
4526            &mut g2d_converter,
4527            src,
4528            g2d_dst,
4529            Rotation::None,
4530            Flip::None,
4531            Crop::no_crop(),
4532        );
4533        result.unwrap();
4534
4535        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4536        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
4537
4538        let (result, _src, cpu_dst) = convert_img(
4539            &mut cpu_converter,
4540            src,
4541            cpu_dst,
4542            Rotation::None,
4543            Flip::None,
4544            Crop::no_crop(),
4545        );
4546        result.unwrap();
4547
4548        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
4549    }
4550
4551    #[test]
4552    #[cfg(target_os = "linux")]
4553    fn test_yuyv_to_yuyv_resize_g2d() {
4554        if !is_g2d_available() {
4555            eprintln!(
4556                "SKIPPED: test_yuyv_to_yuyv_resize_g2d - G2D library (libg2d.so.2) not available"
4557            );
4558            return;
4559        }
4560        if !is_dma_available() {
4561            eprintln!(
4562                "SKIPPED: test_yuyv_to_yuyv_resize_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4563            );
4564            return;
4565        }
4566
4567        let src = load_bytes_to_tensor(
4568            1280,
4569            720,
4570            PixelFormat::Yuyv,
4571            None,
4572            include_bytes!(concat!(
4573                env!("CARGO_MANIFEST_DIR"),
4574                "/../../testdata/camera720p.yuyv"
4575            )),
4576        )
4577        .unwrap();
4578
4579        let g2d_dst = TensorDyn::image(
4580            600,
4581            400,
4582            PixelFormat::Yuyv,
4583            DType::U8,
4584            Some(TensorMemory::Dma),
4585        )
4586        .unwrap();
4587        let mut g2d_converter = G2DProcessor::new().unwrap();
4588
4589        let (result, src, g2d_dst) = convert_img(
4590            &mut g2d_converter,
4591            src,
4592            g2d_dst,
4593            Rotation::None,
4594            Flip::None,
4595            Crop::no_crop(),
4596        );
4597        result.unwrap();
4598
4599        let cpu_dst = TensorDyn::image(600, 400, PixelFormat::Yuyv, DType::U8, None).unwrap();
4600        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
4601
4602        let (result, _src, cpu_dst) = convert_img(
4603            &mut cpu_converter,
4604            src,
4605            cpu_dst,
4606            Rotation::None,
4607            Flip::None,
4608            Crop::no_crop(),
4609        );
4610        result.unwrap();
4611
4612        // TODO: compare PixelFormat::Yuyv and PixelFormat::Yuyv images without having to convert them to PixelFormat::Rgb
4613        compare_images_convert_to_rgb(&g2d_dst, &cpu_dst, 0.98, function!());
4614    }
4615
4616    #[test]
4617    fn test_yuyv_to_rgba_resize_cpu() {
4618        let src = load_bytes_to_tensor(
4619            1280,
4620            720,
4621            PixelFormat::Yuyv,
4622            None,
4623            include_bytes!(concat!(
4624                env!("CARGO_MANIFEST_DIR"),
4625                "/../../testdata/camera720p.yuyv"
4626            )),
4627        )
4628        .unwrap();
4629
4630        let (dst_width, dst_height) = (960, 540);
4631
4632        let dst =
4633            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4634        let mut cpu_converter = CPUProcessor::new();
4635
4636        let (result, _src, dst) = convert_img(
4637            &mut cpu_converter,
4638            src,
4639            dst,
4640            Rotation::None,
4641            Flip::None,
4642            Crop::no_crop(),
4643        );
4644        result.unwrap();
4645
4646        let dst_target =
4647            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4648        let src_target = load_bytes_to_tensor(
4649            1280,
4650            720,
4651            PixelFormat::Rgba,
4652            None,
4653            include_bytes!(concat!(
4654                env!("CARGO_MANIFEST_DIR"),
4655                "/../../testdata/camera720p.rgba"
4656            )),
4657        )
4658        .unwrap();
4659        let (result, _src_target, dst_target) = convert_img(
4660            &mut cpu_converter,
4661            src_target,
4662            dst_target,
4663            Rotation::None,
4664            Flip::None,
4665            Crop::no_crop(),
4666        );
4667        result.unwrap();
4668
4669        compare_images(&dst, &dst_target, 0.98, function!());
4670    }
4671
4672    #[test]
4673    #[cfg(target_os = "linux")]
4674    fn test_yuyv_to_rgba_crop_flip_g2d() {
4675        if !is_g2d_available() {
4676            eprintln!(
4677                "SKIPPED: test_yuyv_to_rgba_crop_flip_g2d - G2D library (libg2d.so.2) not available"
4678            );
4679            return;
4680        }
4681        if !is_dma_available() {
4682            eprintln!(
4683                "SKIPPED: test_yuyv_to_rgba_crop_flip_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4684            );
4685            return;
4686        }
4687
4688        let src = load_bytes_to_tensor(
4689            1280,
4690            720,
4691            PixelFormat::Yuyv,
4692            Some(TensorMemory::Dma),
4693            include_bytes!(concat!(
4694                env!("CARGO_MANIFEST_DIR"),
4695                "/../../testdata/camera720p.yuyv"
4696            )),
4697        )
4698        .unwrap();
4699
4700        let (dst_width, dst_height) = (640, 640);
4701
4702        let dst_g2d = TensorDyn::image(
4703            dst_width,
4704            dst_height,
4705            PixelFormat::Rgba,
4706            DType::U8,
4707            Some(TensorMemory::Dma),
4708        )
4709        .unwrap();
4710        let mut g2d_converter = G2DProcessor::new().unwrap();
4711        let crop = Crop {
4712            src_rect: Some(Rect {
4713                left: 20,
4714                top: 15,
4715                width: 400,
4716                height: 300,
4717            }),
4718            dst_rect: None,
4719            dst_color: None,
4720        };
4721
4722        let (result, src, dst_g2d) = convert_img(
4723            &mut g2d_converter,
4724            src,
4725            dst_g2d,
4726            Rotation::None,
4727            Flip::Horizontal,
4728            crop,
4729        );
4730        result.unwrap();
4731
4732        let dst_cpu = TensorDyn::image(
4733            dst_width,
4734            dst_height,
4735            PixelFormat::Rgba,
4736            DType::U8,
4737            Some(TensorMemory::Dma),
4738        )
4739        .unwrap();
4740        let mut cpu_converter = CPUProcessor::new();
4741
4742        let (result, _src, dst_cpu) = convert_img(
4743            &mut cpu_converter,
4744            src,
4745            dst_cpu,
4746            Rotation::None,
4747            Flip::Horizontal,
4748            crop,
4749        );
4750        result.unwrap();
4751        compare_images(&dst_g2d, &dst_cpu, 0.98, function!());
4752    }
4753
4754    #[test]
4755    #[cfg(target_os = "linux")]
4756    #[cfg(feature = "opengl")]
4757    fn test_yuyv_to_rgba_crop_flip_opengl() {
4758        if !is_opengl_available() {
4759            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4760            return;
4761        }
4762
4763        if !is_dma_available() {
4764            eprintln!(
4765                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
4766                function!()
4767            );
4768            return;
4769        }
4770
4771        let src = load_bytes_to_tensor(
4772            1280,
4773            720,
4774            PixelFormat::Yuyv,
4775            Some(TensorMemory::Dma),
4776            include_bytes!(concat!(
4777                env!("CARGO_MANIFEST_DIR"),
4778                "/../../testdata/camera720p.yuyv"
4779            )),
4780        )
4781        .unwrap();
4782
4783        let (dst_width, dst_height) = (640, 640);
4784
4785        let dst_gl = TensorDyn::image(
4786            dst_width,
4787            dst_height,
4788            PixelFormat::Rgba,
4789            DType::U8,
4790            Some(TensorMemory::Dma),
4791        )
4792        .unwrap();
4793        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4794        let crop = Crop {
4795            src_rect: Some(Rect {
4796                left: 20,
4797                top: 15,
4798                width: 400,
4799                height: 300,
4800            }),
4801            dst_rect: None,
4802            dst_color: None,
4803        };
4804
4805        let (result, src, dst_gl) = convert_img(
4806            &mut gl_converter,
4807            src,
4808            dst_gl,
4809            Rotation::None,
4810            Flip::Horizontal,
4811            crop,
4812        );
4813        result.unwrap();
4814
4815        let dst_cpu = TensorDyn::image(
4816            dst_width,
4817            dst_height,
4818            PixelFormat::Rgba,
4819            DType::U8,
4820            Some(TensorMemory::Dma),
4821        )
4822        .unwrap();
4823        let mut cpu_converter = CPUProcessor::new();
4824
4825        let (result, _src, dst_cpu) = convert_img(
4826            &mut cpu_converter,
4827            src,
4828            dst_cpu,
4829            Rotation::None,
4830            Flip::Horizontal,
4831            crop,
4832        );
4833        result.unwrap();
4834        compare_images(&dst_gl, &dst_cpu, 0.98, function!());
4835    }
4836
4837    #[test]
4838    fn test_vyuy_to_rgba_cpu() {
4839        let file = include_bytes!(concat!(
4840            env!("CARGO_MANIFEST_DIR"),
4841            "/../../testdata/camera720p.vyuy"
4842        ))
4843        .to_vec();
4844        let src = TensorDyn::image(1280, 720, PixelFormat::Vyuy, DType::U8, None).unwrap();
4845        src.as_u8()
4846            .unwrap()
4847            .map()
4848            .unwrap()
4849            .as_mut_slice()
4850            .copy_from_slice(&file);
4851
4852        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4853        let mut cpu_converter = CPUProcessor::new();
4854
4855        let (result, _src, dst) = convert_img(
4856            &mut cpu_converter,
4857            src,
4858            dst,
4859            Rotation::None,
4860            Flip::None,
4861            Crop::no_crop(),
4862        );
4863        result.unwrap();
4864
4865        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4866        target_image
4867            .as_u8()
4868            .unwrap()
4869            .map()
4870            .unwrap()
4871            .as_mut_slice()
4872            .copy_from_slice(include_bytes!(concat!(
4873                env!("CARGO_MANIFEST_DIR"),
4874                "/../../testdata/camera720p.rgba"
4875            )));
4876
4877        compare_images(&dst, &target_image, 0.98, function!());
4878    }
4879
4880    #[test]
4881    fn test_vyuy_to_rgb_cpu() {
4882        let file = include_bytes!(concat!(
4883            env!("CARGO_MANIFEST_DIR"),
4884            "/../../testdata/camera720p.vyuy"
4885        ))
4886        .to_vec();
4887        let src = TensorDyn::image(1280, 720, PixelFormat::Vyuy, DType::U8, None).unwrap();
4888        src.as_u8()
4889            .unwrap()
4890            .map()
4891            .unwrap()
4892            .as_mut_slice()
4893            .copy_from_slice(&file);
4894
4895        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4896        let mut cpu_converter = CPUProcessor::new();
4897
4898        let (result, _src, dst) = convert_img(
4899            &mut cpu_converter,
4900            src,
4901            dst,
4902            Rotation::None,
4903            Flip::None,
4904            Crop::no_crop(),
4905        );
4906        result.unwrap();
4907
4908        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4909        target_image
4910            .as_u8()
4911            .unwrap()
4912            .map()
4913            .unwrap()
4914            .as_mut_slice()
4915            .as_chunks_mut::<3>()
4916            .0
4917            .iter_mut()
4918            .zip(
4919                include_bytes!(concat!(
4920                    env!("CARGO_MANIFEST_DIR"),
4921                    "/../../testdata/camera720p.rgba"
4922                ))
4923                .as_chunks::<4>()
4924                .0,
4925            )
4926            .for_each(|(dst, src)| *dst = [src[0], src[1], src[2]]);
4927
4928        compare_images(&dst, &target_image, 0.98, function!());
4929    }
4930
4931    #[test]
4932    #[cfg(target_os = "linux")]
4933    #[ignore = "G2D does not support VYUY; re-enable when hardware support is added"]
4934    fn test_vyuy_to_rgba_g2d() {
4935        if !is_g2d_available() {
4936            eprintln!("SKIPPED: test_vyuy_to_rgba_g2d - G2D library (libg2d.so.2) not available");
4937            return;
4938        }
4939        if !is_dma_available() {
4940            eprintln!(
4941                "SKIPPED: test_vyuy_to_rgba_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4942            );
4943            return;
4944        }
4945
4946        let src = load_bytes_to_tensor(
4947            1280,
4948            720,
4949            PixelFormat::Vyuy,
4950            None,
4951            include_bytes!(concat!(
4952                env!("CARGO_MANIFEST_DIR"),
4953                "/../../testdata/camera720p.vyuy"
4954            )),
4955        )
4956        .unwrap();
4957
4958        let dst = TensorDyn::image(
4959            1280,
4960            720,
4961            PixelFormat::Rgba,
4962            DType::U8,
4963            Some(TensorMemory::Dma),
4964        )
4965        .unwrap();
4966        let mut g2d_converter = G2DProcessor::new().unwrap();
4967
4968        let (result, _src, dst) = convert_img(
4969            &mut g2d_converter,
4970            src,
4971            dst,
4972            Rotation::None,
4973            Flip::None,
4974            Crop::no_crop(),
4975        );
4976        match result {
4977            Err(Error::G2D(_)) => {
4978                eprintln!("SKIPPED: test_vyuy_to_rgba_g2d - G2D does not support PixelFormat::Vyuy format");
4979                return;
4980            }
4981            r => r.unwrap(),
4982        }
4983
4984        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4985        target_image
4986            .as_u8()
4987            .unwrap()
4988            .map()
4989            .unwrap()
4990            .as_mut_slice()
4991            .copy_from_slice(include_bytes!(concat!(
4992                env!("CARGO_MANIFEST_DIR"),
4993                "/../../testdata/camera720p.rgba"
4994            )));
4995
4996        compare_images(&dst, &target_image, 0.98, function!());
4997    }
4998
4999    #[test]
5000    #[cfg(target_os = "linux")]
5001    #[ignore = "G2D does not support VYUY; re-enable when hardware support is added"]
5002    fn test_vyuy_to_rgb_g2d() {
5003        if !is_g2d_available() {
5004            eprintln!("SKIPPED: test_vyuy_to_rgb_g2d - G2D library (libg2d.so.2) not available");
5005            return;
5006        }
5007        if !is_dma_available() {
5008            eprintln!(
5009                "SKIPPED: test_vyuy_to_rgb_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
5010            );
5011            return;
5012        }
5013
5014        let src = load_bytes_to_tensor(
5015            1280,
5016            720,
5017            PixelFormat::Vyuy,
5018            None,
5019            include_bytes!(concat!(
5020                env!("CARGO_MANIFEST_DIR"),
5021                "/../../testdata/camera720p.vyuy"
5022            )),
5023        )
5024        .unwrap();
5025
5026        let g2d_dst = TensorDyn::image(
5027            1280,
5028            720,
5029            PixelFormat::Rgb,
5030            DType::U8,
5031            Some(TensorMemory::Dma),
5032        )
5033        .unwrap();
5034        let mut g2d_converter = G2DProcessor::new().unwrap();
5035
5036        let (result, src, g2d_dst) = convert_img(
5037            &mut g2d_converter,
5038            src,
5039            g2d_dst,
5040            Rotation::None,
5041            Flip::None,
5042            Crop::no_crop(),
5043        );
5044        match result {
5045            Err(Error::G2D(_)) => {
5046                eprintln!(
5047                    "SKIPPED: test_vyuy_to_rgb_g2d - G2D does not support PixelFormat::Vyuy format"
5048                );
5049                return;
5050            }
5051            r => r.unwrap(),
5052        }
5053
5054        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5055        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
5056
5057        let (result, _src, cpu_dst) = convert_img(
5058            &mut cpu_converter,
5059            src,
5060            cpu_dst,
5061            Rotation::None,
5062            Flip::None,
5063            Crop::no_crop(),
5064        );
5065        result.unwrap();
5066
5067        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
5068    }
5069
5070    #[test]
5071    #[cfg(target_os = "linux")]
5072    #[cfg(feature = "opengl")]
5073    fn test_vyuy_to_rgba_opengl() {
5074        if !is_opengl_available() {
5075            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5076            return;
5077        }
5078        if !is_dma_available() {
5079            eprintln!(
5080                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
5081                function!()
5082            );
5083            return;
5084        }
5085
5086        let src = load_bytes_to_tensor(
5087            1280,
5088            720,
5089            PixelFormat::Vyuy,
5090            Some(TensorMemory::Dma),
5091            include_bytes!(concat!(
5092                env!("CARGO_MANIFEST_DIR"),
5093                "/../../testdata/camera720p.vyuy"
5094            )),
5095        )
5096        .unwrap();
5097
5098        let dst = TensorDyn::image(
5099            1280,
5100            720,
5101            PixelFormat::Rgba,
5102            DType::U8,
5103            Some(TensorMemory::Dma),
5104        )
5105        .unwrap();
5106        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
5107
5108        let (result, _src, dst) = convert_img(
5109            &mut gl_converter,
5110            src,
5111            dst,
5112            Rotation::None,
5113            Flip::None,
5114            Crop::no_crop(),
5115        );
5116        match result {
5117            Err(Error::NotSupported(_)) => {
5118                eprintln!(
5119                    "SKIPPED: {} - OpenGL does not support PixelFormat::Vyuy DMA format",
5120                    function!()
5121                );
5122                return;
5123            }
5124            r => r.unwrap(),
5125        }
5126
5127        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5128        target_image
5129            .as_u8()
5130            .unwrap()
5131            .map()
5132            .unwrap()
5133            .as_mut_slice()
5134            .copy_from_slice(include_bytes!(concat!(
5135                env!("CARGO_MANIFEST_DIR"),
5136                "/../../testdata/camera720p.rgba"
5137            )));
5138
5139        compare_images(&dst, &target_image, 0.98, function!());
5140    }
5141
5142    #[test]
5143    fn test_nv12_to_rgba_cpu() {
5144        let file = include_bytes!(concat!(
5145            env!("CARGO_MANIFEST_DIR"),
5146            "/../../testdata/zidane.nv12"
5147        ))
5148        .to_vec();
5149        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5150        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5151            .copy_from_slice(&file);
5152
5153        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5154        let mut cpu_converter = CPUProcessor::new();
5155
5156        let (result, _src, dst) = convert_img(
5157            &mut cpu_converter,
5158            src,
5159            dst,
5160            Rotation::None,
5161            Flip::None,
5162            Crop::no_crop(),
5163        );
5164        result.unwrap();
5165
5166        let target_image = crate::load_image(
5167            include_bytes!(concat!(
5168                env!("CARGO_MANIFEST_DIR"),
5169                "/../../testdata/zidane.jpg"
5170            )),
5171            Some(PixelFormat::Rgba),
5172            None,
5173        )
5174        .unwrap();
5175
5176        compare_images(&dst, &target_image, 0.98, function!());
5177    }
5178
5179    #[test]
5180    fn test_nv12_to_rgb_cpu() {
5181        let file = include_bytes!(concat!(
5182            env!("CARGO_MANIFEST_DIR"),
5183            "/../../testdata/zidane.nv12"
5184        ))
5185        .to_vec();
5186        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5187        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5188            .copy_from_slice(&file);
5189
5190        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5191        let mut cpu_converter = CPUProcessor::new();
5192
5193        let (result, _src, dst) = convert_img(
5194            &mut cpu_converter,
5195            src,
5196            dst,
5197            Rotation::None,
5198            Flip::None,
5199            Crop::no_crop(),
5200        );
5201        result.unwrap();
5202
5203        let target_image = crate::load_image(
5204            include_bytes!(concat!(
5205                env!("CARGO_MANIFEST_DIR"),
5206                "/../../testdata/zidane.jpg"
5207            )),
5208            Some(PixelFormat::Rgb),
5209            None,
5210        )
5211        .unwrap();
5212
5213        compare_images(&dst, &target_image, 0.98, function!());
5214    }
5215
5216    #[test]
5217    fn test_nv12_to_grey_cpu() {
5218        let file = include_bytes!(concat!(
5219            env!("CARGO_MANIFEST_DIR"),
5220            "/../../testdata/zidane.nv12"
5221        ))
5222        .to_vec();
5223        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5224        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5225            .copy_from_slice(&file);
5226
5227        let dst = TensorDyn::image(1280, 720, PixelFormat::Grey, DType::U8, None).unwrap();
5228        let mut cpu_converter = CPUProcessor::new();
5229
5230        let (result, _src, dst) = convert_img(
5231            &mut cpu_converter,
5232            src,
5233            dst,
5234            Rotation::None,
5235            Flip::None,
5236            Crop::no_crop(),
5237        );
5238        result.unwrap();
5239
5240        let target_image = crate::load_image(
5241            include_bytes!(concat!(
5242                env!("CARGO_MANIFEST_DIR"),
5243                "/../../testdata/zidane.jpg"
5244            )),
5245            Some(PixelFormat::Grey),
5246            None,
5247        )
5248        .unwrap();
5249
5250        compare_images(&dst, &target_image, 0.98, function!());
5251    }
5252
5253    #[test]
5254    fn test_nv12_to_yuyv_cpu() {
5255        let file = include_bytes!(concat!(
5256            env!("CARGO_MANIFEST_DIR"),
5257            "/../../testdata/zidane.nv12"
5258        ))
5259        .to_vec();
5260        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5261        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5262            .copy_from_slice(&file);
5263
5264        let dst = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
5265        let mut cpu_converter = CPUProcessor::new();
5266
5267        let (result, _src, dst) = convert_img(
5268            &mut cpu_converter,
5269            src,
5270            dst,
5271            Rotation::None,
5272            Flip::None,
5273            Crop::no_crop(),
5274        );
5275        result.unwrap();
5276
5277        let target_image = crate::load_image(
5278            include_bytes!(concat!(
5279                env!("CARGO_MANIFEST_DIR"),
5280                "/../../testdata/zidane.jpg"
5281            )),
5282            Some(PixelFormat::Rgb),
5283            None,
5284        )
5285        .unwrap();
5286
5287        compare_images_convert_to_rgb(&dst, &target_image, 0.98, function!());
5288    }
5289
5290    #[test]
5291    fn test_cpu_resize_planar_rgb() {
5292        let src = TensorDyn::image(4, 4, PixelFormat::Rgba, DType::U8, None).unwrap();
5293        #[rustfmt::skip]
5294        let src_image = [
5295                    255, 0, 0, 255,     0, 255, 0, 255,     0, 0, 255, 255,     255, 255, 0, 255,
5296                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5297                    0, 0, 255, 0,       0, 255, 255, 255,   255, 255, 0, 0,     0, 0, 0, 255,
5298                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5299        ];
5300        src.as_u8()
5301            .unwrap()
5302            .map()
5303            .unwrap()
5304            .as_mut_slice()
5305            .copy_from_slice(&src_image);
5306
5307        let cpu_dst = TensorDyn::image(5, 5, PixelFormat::PlanarRgb, DType::U8, None).unwrap();
5308        let mut cpu_converter = CPUProcessor::new();
5309
5310        let (result, _src, cpu_dst) = convert_img(
5311            &mut cpu_converter,
5312            src,
5313            cpu_dst,
5314            Rotation::None,
5315            Flip::None,
5316            Crop::new()
5317                .with_dst_rect(Some(Rect {
5318                    left: 1,
5319                    top: 1,
5320                    width: 4,
5321                    height: 4,
5322                }))
5323                .with_dst_color(Some([114, 114, 114, 255])),
5324        );
5325        result.unwrap();
5326
5327        #[rustfmt::skip]
5328        let expected_dst = [
5329            114, 114, 114, 114, 114,    114, 255, 0, 0, 255,    114, 255, 0, 255, 255,      114, 0, 0, 255, 0,        114, 255, 0, 255, 255,
5330            114, 114, 114, 114, 114,    114, 0, 255, 0, 255,    114, 0, 0, 0, 0,            114, 0, 255, 255, 0,      114, 0, 0, 0, 0,
5331            114, 114, 114, 114, 114,    114, 0, 0, 255, 0,      114, 0, 0, 255, 255,        114, 255, 255, 0, 0,      114, 0, 0, 255, 255,
5332        ];
5333
5334        assert_eq!(
5335            cpu_dst.as_u8().unwrap().map().unwrap().as_slice(),
5336            &expected_dst
5337        );
5338    }
5339
5340    #[test]
5341    fn test_cpu_resize_planar_rgba() {
5342        let src = TensorDyn::image(4, 4, PixelFormat::Rgba, DType::U8, None).unwrap();
5343        #[rustfmt::skip]
5344        let src_image = [
5345                    255, 0, 0, 255,     0, 255, 0, 255,     0, 0, 255, 255,     255, 255, 0, 255,
5346                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5347                    0, 0, 255, 0,       0, 255, 255, 255,   255, 255, 0, 0,     0, 0, 0, 255,
5348                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5349        ];
5350        src.as_u8()
5351            .unwrap()
5352            .map()
5353            .unwrap()
5354            .as_mut_slice()
5355            .copy_from_slice(&src_image);
5356
5357        let cpu_dst = TensorDyn::image(5, 5, PixelFormat::PlanarRgba, DType::U8, None).unwrap();
5358        let mut cpu_converter = CPUProcessor::new();
5359
5360        let (result, _src, cpu_dst) = convert_img(
5361            &mut cpu_converter,
5362            src,
5363            cpu_dst,
5364            Rotation::None,
5365            Flip::None,
5366            Crop::new()
5367                .with_dst_rect(Some(Rect {
5368                    left: 1,
5369                    top: 1,
5370                    width: 4,
5371                    height: 4,
5372                }))
5373                .with_dst_color(Some([114, 114, 114, 255])),
5374        );
5375        result.unwrap();
5376
5377        #[rustfmt::skip]
5378        let expected_dst = [
5379            114, 114, 114, 114, 114,    114, 255, 0, 0, 255,        114, 255, 0, 255, 255,      114, 0, 0, 255, 0,        114, 255, 0, 255, 255,
5380            114, 114, 114, 114, 114,    114, 0, 255, 0, 255,        114, 0, 0, 0, 0,            114, 0, 255, 255, 0,      114, 0, 0, 0, 0,
5381            114, 114, 114, 114, 114,    114, 0, 0, 255, 0,          114, 0, 0, 255, 255,        114, 255, 255, 0, 0,      114, 0, 0, 255, 255,
5382            255, 255, 255, 255, 255,    255, 255, 255, 255, 255,    255, 0, 255, 0, 255,        255, 0, 255, 0, 255,      255, 0, 255, 0, 255,
5383        ];
5384
5385        assert_eq!(
5386            cpu_dst.as_u8().unwrap().map().unwrap().as_slice(),
5387            &expected_dst
5388        );
5389    }
5390
5391    #[test]
5392    #[cfg(target_os = "linux")]
5393    #[cfg(feature = "opengl")]
5394    fn test_opengl_resize_planar_rgb() {
5395        if !is_opengl_available() {
5396            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5397            return;
5398        }
5399
5400        if !is_dma_available() {
5401            eprintln!(
5402                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
5403                function!()
5404            );
5405            return;
5406        }
5407
5408        let dst_width = 640;
5409        let dst_height = 640;
5410        let file = include_bytes!(concat!(
5411            env!("CARGO_MANIFEST_DIR"),
5412            "/../../testdata/test_image.jpg"
5413        ))
5414        .to_vec();
5415        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
5416
5417        let cpu_dst = TensorDyn::image(
5418            dst_width,
5419            dst_height,
5420            PixelFormat::PlanarRgb,
5421            DType::U8,
5422            None,
5423        )
5424        .unwrap();
5425        let mut cpu_converter = CPUProcessor::new();
5426        let (result, src, cpu_dst) = convert_img(
5427            &mut cpu_converter,
5428            src,
5429            cpu_dst,
5430            Rotation::None,
5431            Flip::None,
5432            Crop::no_crop(),
5433        );
5434        result.unwrap();
5435        let crop_letterbox = Crop::new()
5436            .with_dst_rect(Some(Rect {
5437                left: 102,
5438                top: 102,
5439                width: 440,
5440                height: 440,
5441            }))
5442            .with_dst_color(Some([114, 114, 114, 114]));
5443        let (result, src, cpu_dst) = convert_img(
5444            &mut cpu_converter,
5445            src,
5446            cpu_dst,
5447            Rotation::None,
5448            Flip::None,
5449            crop_letterbox,
5450        );
5451        result.unwrap();
5452
5453        let gl_dst = TensorDyn::image(
5454            dst_width,
5455            dst_height,
5456            PixelFormat::PlanarRgb,
5457            DType::U8,
5458            None,
5459        )
5460        .unwrap();
5461        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
5462
5463        let (result, _src, gl_dst) = convert_img(
5464            &mut gl_converter,
5465            src,
5466            gl_dst,
5467            Rotation::None,
5468            Flip::None,
5469            crop_letterbox,
5470        );
5471        result.unwrap();
5472        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
5473    }
5474
5475    #[test]
5476    fn test_cpu_resize_nv16() {
5477        let file = include_bytes!(concat!(
5478            env!("CARGO_MANIFEST_DIR"),
5479            "/../../testdata/zidane.jpg"
5480        ))
5481        .to_vec();
5482        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
5483
5484        let cpu_nv16_dst = TensorDyn::image(640, 640, PixelFormat::Nv16, DType::U8, None).unwrap();
5485        let cpu_rgb_dst = TensorDyn::image(640, 640, PixelFormat::Rgb, DType::U8, None).unwrap();
5486        let mut cpu_converter = CPUProcessor::new();
5487        let crop = Crop::new()
5488            .with_dst_rect(Some(Rect {
5489                left: 20,
5490                top: 140,
5491                width: 600,
5492                height: 360,
5493            }))
5494            .with_dst_color(Some([255, 128, 0, 255]));
5495
5496        let (result, src, cpu_nv16_dst) = convert_img(
5497            &mut cpu_converter,
5498            src,
5499            cpu_nv16_dst,
5500            Rotation::None,
5501            Flip::None,
5502            crop,
5503        );
5504        result.unwrap();
5505
5506        let (result, _src, cpu_rgb_dst) = convert_img(
5507            &mut cpu_converter,
5508            src,
5509            cpu_rgb_dst,
5510            Rotation::None,
5511            Flip::None,
5512            crop,
5513        );
5514        result.unwrap();
5515        compare_images_convert_to_rgb(&cpu_nv16_dst, &cpu_rgb_dst, 0.99, function!());
5516    }
5517
5518    fn load_bytes_to_tensor(
5519        width: usize,
5520        height: usize,
5521        format: PixelFormat,
5522        memory: Option<TensorMemory>,
5523        bytes: &[u8],
5524    ) -> Result<TensorDyn, Error> {
5525        let src = TensorDyn::image(width, height, format, DType::U8, memory)?;
5526        src.as_u8()
5527            .unwrap()
5528            .map()?
5529            .as_mut_slice()
5530            .copy_from_slice(bytes);
5531        Ok(src)
5532    }
5533
5534    fn compare_images(img1: &TensorDyn, img2: &TensorDyn, threshold: f64, name: &str) {
5535        assert_eq!(img1.height(), img2.height(), "Heights differ");
5536        assert_eq!(img1.width(), img2.width(), "Widths differ");
5537        assert_eq!(
5538            img1.format().unwrap(),
5539            img2.format().unwrap(),
5540            "PixelFormat differ"
5541        );
5542        assert!(
5543            matches!(
5544                img1.format().unwrap(),
5545                PixelFormat::Rgb | PixelFormat::Rgba | PixelFormat::Grey | PixelFormat::PlanarRgb
5546            ),
5547            "format must be Rgb or Rgba for comparison"
5548        );
5549
5550        let image1 = match img1.format().unwrap() {
5551            PixelFormat::Rgb => image::RgbImage::from_vec(
5552                img1.width().unwrap() as u32,
5553                img1.height().unwrap() as u32,
5554                img1.as_u8().unwrap().map().unwrap().to_vec(),
5555            )
5556            .unwrap(),
5557            PixelFormat::Rgba => image::RgbaImage::from_vec(
5558                img1.width().unwrap() as u32,
5559                img1.height().unwrap() as u32,
5560                img1.as_u8().unwrap().map().unwrap().to_vec(),
5561            )
5562            .unwrap()
5563            .convert(),
5564            PixelFormat::Grey => image::GrayImage::from_vec(
5565                img1.width().unwrap() as u32,
5566                img1.height().unwrap() as u32,
5567                img1.as_u8().unwrap().map().unwrap().to_vec(),
5568            )
5569            .unwrap()
5570            .convert(),
5571            PixelFormat::PlanarRgb => image::GrayImage::from_vec(
5572                img1.width().unwrap() as u32,
5573                (img1.height().unwrap() * 3) as u32,
5574                img1.as_u8().unwrap().map().unwrap().to_vec(),
5575            )
5576            .unwrap()
5577            .convert(),
5578            _ => return,
5579        };
5580
5581        let image2 = match img2.format().unwrap() {
5582            PixelFormat::Rgb => image::RgbImage::from_vec(
5583                img2.width().unwrap() as u32,
5584                img2.height().unwrap() as u32,
5585                img2.as_u8().unwrap().map().unwrap().to_vec(),
5586            )
5587            .unwrap(),
5588            PixelFormat::Rgba => image::RgbaImage::from_vec(
5589                img2.width().unwrap() as u32,
5590                img2.height().unwrap() as u32,
5591                img2.as_u8().unwrap().map().unwrap().to_vec(),
5592            )
5593            .unwrap()
5594            .convert(),
5595            PixelFormat::Grey => image::GrayImage::from_vec(
5596                img2.width().unwrap() as u32,
5597                img2.height().unwrap() as u32,
5598                img2.as_u8().unwrap().map().unwrap().to_vec(),
5599            )
5600            .unwrap()
5601            .convert(),
5602            PixelFormat::PlanarRgb => image::GrayImage::from_vec(
5603                img2.width().unwrap() as u32,
5604                (img2.height().unwrap() * 3) as u32,
5605                img2.as_u8().unwrap().map().unwrap().to_vec(),
5606            )
5607            .unwrap()
5608            .convert(),
5609            _ => return,
5610        };
5611
5612        let similarity = image_compare::rgb_similarity_structure(
5613            &image_compare::Algorithm::RootMeanSquared,
5614            &image1,
5615            &image2,
5616        )
5617        .expect("Image Comparison failed");
5618        if similarity.score < threshold {
5619            // image1.save(format!("{name}_1.png"));
5620            // image2.save(format!("{name}_2.png"));
5621            similarity
5622                .image
5623                .to_color_map()
5624                .save(format!("{name}.png"))
5625                .unwrap();
5626            panic!(
5627                "{name}: converted image and target image have similarity score too low: {} < {}",
5628                similarity.score, threshold
5629            )
5630        }
5631    }
5632
5633    fn compare_images_convert_to_rgb(
5634        img1: &TensorDyn,
5635        img2: &TensorDyn,
5636        threshold: f64,
5637        name: &str,
5638    ) {
5639        assert_eq!(img1.height(), img2.height(), "Heights differ");
5640        assert_eq!(img1.width(), img2.width(), "Widths differ");
5641
5642        let mut img_rgb1 = TensorDyn::image(
5643            img1.width().unwrap(),
5644            img1.height().unwrap(),
5645            PixelFormat::Rgb,
5646            DType::U8,
5647            Some(TensorMemory::Mem),
5648        )
5649        .unwrap();
5650        let mut img_rgb2 = TensorDyn::image(
5651            img1.width().unwrap(),
5652            img1.height().unwrap(),
5653            PixelFormat::Rgb,
5654            DType::U8,
5655            Some(TensorMemory::Mem),
5656        )
5657        .unwrap();
5658        let mut __cv = CPUProcessor::default();
5659        let r1 = __cv.convert(
5660            img1,
5661            &mut img_rgb1,
5662            crate::Rotation::None,
5663            crate::Flip::None,
5664            crate::Crop::default(),
5665        );
5666        let r2 = __cv.convert(
5667            img2,
5668            &mut img_rgb2,
5669            crate::Rotation::None,
5670            crate::Flip::None,
5671            crate::Crop::default(),
5672        );
5673        if r1.is_err() || r2.is_err() {
5674            // Fallback: compare raw bytes as greyscale strip
5675            let w = img1.width().unwrap() as u32;
5676            let data1 = img1.as_u8().unwrap().map().unwrap().to_vec();
5677            let data2 = img2.as_u8().unwrap().map().unwrap().to_vec();
5678            let h1 = (data1.len() as u32) / w;
5679            let h2 = (data2.len() as u32) / w;
5680            let g1 = image::GrayImage::from_vec(w, h1, data1).unwrap();
5681            let g2 = image::GrayImage::from_vec(w, h2, data2).unwrap();
5682            let similarity = image_compare::gray_similarity_structure(
5683                &image_compare::Algorithm::RootMeanSquared,
5684                &g1,
5685                &g2,
5686            )
5687            .expect("Image Comparison failed");
5688            if similarity.score < threshold {
5689                panic!(
5690                    "{name}: converted image and target image have similarity score too low: {} < {}",
5691                    similarity.score, threshold
5692                )
5693            }
5694            return;
5695        }
5696
5697        let image1 = image::RgbImage::from_vec(
5698            img_rgb1.width().unwrap() as u32,
5699            img_rgb1.height().unwrap() as u32,
5700            img_rgb1.as_u8().unwrap().map().unwrap().to_vec(),
5701        )
5702        .unwrap();
5703
5704        let image2 = image::RgbImage::from_vec(
5705            img_rgb2.width().unwrap() as u32,
5706            img_rgb2.height().unwrap() as u32,
5707            img_rgb2.as_u8().unwrap().map().unwrap().to_vec(),
5708        )
5709        .unwrap();
5710
5711        let similarity = image_compare::rgb_similarity_structure(
5712            &image_compare::Algorithm::RootMeanSquared,
5713            &image1,
5714            &image2,
5715        )
5716        .expect("Image Comparison failed");
5717        if similarity.score < threshold {
5718            // image1.save(format!("{name}_1.png"));
5719            // image2.save(format!("{name}_2.png"));
5720            similarity
5721                .image
5722                .to_color_map()
5723                .save(format!("{name}.png"))
5724                .unwrap();
5725            panic!(
5726                "{name}: converted image and target image have similarity score too low: {} < {}",
5727                similarity.score, threshold
5728            )
5729        }
5730    }
5731
5732    // =========================================================================
5733    // PixelFormat::Nv12 Format Tests
5734    // =========================================================================
5735
5736    #[test]
5737    fn test_nv12_image_creation() {
5738        let width = 640;
5739        let height = 480;
5740        let img = TensorDyn::image(width, height, PixelFormat::Nv12, DType::U8, None).unwrap();
5741
5742        assert_eq!(img.width(), Some(width));
5743        assert_eq!(img.height(), Some(height));
5744        assert_eq!(img.format().unwrap(), PixelFormat::Nv12);
5745        // PixelFormat::Nv12 uses shape [H*3/2, W] to store Y plane + UV plane
5746        assert_eq!(img.as_u8().unwrap().shape(), &[height * 3 / 2, width]);
5747    }
5748
5749    #[test]
5750    fn test_nv12_channels() {
5751        let img = TensorDyn::image(640, 480, PixelFormat::Nv12, DType::U8, None).unwrap();
5752        // PixelFormat::Nv12.channels() returns 1 (luma plane)
5753        assert_eq!(img.format().unwrap().channels(), 1);
5754    }
5755
5756    // =========================================================================
5757    // Tensor Format Metadata Tests
5758    // =========================================================================
5759
5760    #[test]
5761    fn test_tensor_set_format_planar() {
5762        let mut tensor = Tensor::<u8>::new(&[3, 480, 640], None, None).unwrap();
5763        tensor.set_format(PixelFormat::PlanarRgb).unwrap();
5764        assert_eq!(tensor.format(), Some(PixelFormat::PlanarRgb));
5765        assert_eq!(tensor.width(), Some(640));
5766        assert_eq!(tensor.height(), Some(480));
5767    }
5768
5769    #[test]
5770    fn test_tensor_set_format_interleaved() {
5771        let mut tensor = Tensor::<u8>::new(&[480, 640, 4], None, None).unwrap();
5772        tensor.set_format(PixelFormat::Rgba).unwrap();
5773        assert_eq!(tensor.format(), Some(PixelFormat::Rgba));
5774        assert_eq!(tensor.width(), Some(640));
5775        assert_eq!(tensor.height(), Some(480));
5776    }
5777
5778    #[test]
5779    fn test_tensordyn_image_rgb() {
5780        let img = TensorDyn::image(640, 480, PixelFormat::Rgb, DType::U8, None).unwrap();
5781        assert_eq!(img.width(), Some(640));
5782        assert_eq!(img.height(), Some(480));
5783        assert_eq!(img.format(), Some(PixelFormat::Rgb));
5784    }
5785
5786    #[test]
5787    fn test_tensordyn_image_planar_rgb() {
5788        let img = TensorDyn::image(640, 480, PixelFormat::PlanarRgb, DType::U8, None).unwrap();
5789        assert_eq!(img.width(), Some(640));
5790        assert_eq!(img.height(), Some(480));
5791        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
5792    }
5793
5794    #[test]
5795    fn test_rgb_int8_format() {
5796        // Int8 variant: same PixelFormat::Rgb but with DType::I8
5797        let img = TensorDyn::image(
5798            1280,
5799            720,
5800            PixelFormat::Rgb,
5801            DType::I8,
5802            Some(TensorMemory::Mem),
5803        )
5804        .unwrap();
5805        assert_eq!(img.width(), Some(1280));
5806        assert_eq!(img.height(), Some(720));
5807        assert_eq!(img.format(), Some(PixelFormat::Rgb));
5808        assert_eq!(img.dtype(), DType::I8);
5809    }
5810
5811    #[test]
5812    fn test_planar_rgb_int8_format() {
5813        let img = TensorDyn::image(
5814            1280,
5815            720,
5816            PixelFormat::PlanarRgb,
5817            DType::I8,
5818            Some(TensorMemory::Mem),
5819        )
5820        .unwrap();
5821        assert_eq!(img.width(), Some(1280));
5822        assert_eq!(img.height(), Some(720));
5823        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
5824        assert_eq!(img.dtype(), DType::I8);
5825    }
5826
5827    #[test]
5828    fn test_rgb_from_tensor() {
5829        let mut tensor = Tensor::<u8>::new(&[720, 1280, 3], None, None).unwrap();
5830        tensor.set_format(PixelFormat::Rgb).unwrap();
5831        let img = TensorDyn::from(tensor);
5832        assert_eq!(img.width(), Some(1280));
5833        assert_eq!(img.height(), Some(720));
5834        assert_eq!(img.format(), Some(PixelFormat::Rgb));
5835    }
5836
5837    #[test]
5838    fn test_planar_rgb_from_tensor() {
5839        let mut tensor = Tensor::<u8>::new(&[3, 720, 1280], None, None).unwrap();
5840        tensor.set_format(PixelFormat::PlanarRgb).unwrap();
5841        let img = TensorDyn::from(tensor);
5842        assert_eq!(img.width(), Some(1280));
5843        assert_eq!(img.height(), Some(720));
5844        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
5845    }
5846
5847    #[test]
5848    fn test_dtype_determines_int8() {
5849        // DType::I8 indicates int8 data
5850        let u8_img = TensorDyn::image(64, 64, PixelFormat::Rgb, DType::U8, None).unwrap();
5851        let i8_img = TensorDyn::image(64, 64, PixelFormat::Rgb, DType::I8, None).unwrap();
5852        assert_eq!(u8_img.dtype(), DType::U8);
5853        assert_eq!(i8_img.dtype(), DType::I8);
5854    }
5855
5856    #[test]
5857    fn test_pixel_layout_packed_vs_planar() {
5858        // Packed vs planar layout classification
5859        assert_eq!(PixelFormat::Rgb.layout(), PixelLayout::Packed);
5860        assert_eq!(PixelFormat::Rgba.layout(), PixelLayout::Packed);
5861        assert_eq!(PixelFormat::PlanarRgb.layout(), PixelLayout::Planar);
5862        assert_eq!(PixelFormat::Nv12.layout(), PixelLayout::SemiPlanar);
5863    }
5864
5865    /// Integration test that exercises the PBO-to-PBO convert path.
5866    /// Uses ImageProcessor::create_image() to allocate PBO-backed tensors,
5867    /// then converts between them. Skipped when GL is unavailable or the
5868    /// backend is not PBO (e.g. DMA-buf systems).
5869    #[cfg(target_os = "linux")]
5870    #[cfg(feature = "opengl")]
5871    #[test]
5872    fn test_convert_pbo_to_pbo() {
5873        let mut converter = ImageProcessor::new().unwrap();
5874
5875        // Skip if GL is not available or backend is not PBO
5876        let is_pbo = converter
5877            .opengl
5878            .as_ref()
5879            .is_some_and(|gl| gl.transfer_backend() == opengl_headless::TransferBackend::Pbo);
5880        if !is_pbo {
5881            eprintln!("Skipping test_convert_pbo_to_pbo: backend is not PBO");
5882            return;
5883        }
5884
5885        let src_w = 640;
5886        let src_h = 480;
5887        let dst_w = 320;
5888        let dst_h = 240;
5889
5890        // Create PBO-backed source image
5891        let pbo_src = converter
5892            .create_image(src_w, src_h, PixelFormat::Rgba, DType::U8, None)
5893            .unwrap();
5894        assert_eq!(
5895            pbo_src.as_u8().unwrap().memory(),
5896            TensorMemory::Pbo,
5897            "create_image should produce a PBO tensor"
5898        );
5899
5900        // Fill source PBO with test pattern: load JPEG then convert Mem→PBO
5901        let file = include_bytes!(concat!(
5902            env!("CARGO_MANIFEST_DIR"),
5903            "/../../testdata/zidane.jpg"
5904        ))
5905        .to_vec();
5906        let jpeg_src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
5907
5908        // Resize JPEG into a Mem temp of the right size, then copy into PBO
5909        let mem_src = TensorDyn::image(
5910            src_w,
5911            src_h,
5912            PixelFormat::Rgba,
5913            DType::U8,
5914            Some(TensorMemory::Mem),
5915        )
5916        .unwrap();
5917        let (result, _jpeg_src, mem_src) = convert_img(
5918            &mut CPUProcessor::new(),
5919            jpeg_src,
5920            mem_src,
5921            Rotation::None,
5922            Flip::None,
5923            Crop::no_crop(),
5924        );
5925        result.unwrap();
5926
5927        // Copy pixel data into the PBO source by mapping it
5928        {
5929            let src_data = mem_src.as_u8().unwrap().map().unwrap();
5930            let mut pbo_map = pbo_src.as_u8().unwrap().map().unwrap();
5931            pbo_map.copy_from_slice(&src_data);
5932        }
5933
5934        // Create PBO-backed destination image
5935        let pbo_dst = converter
5936            .create_image(dst_w, dst_h, PixelFormat::Rgba, DType::U8, None)
5937            .unwrap();
5938        assert_eq!(pbo_dst.as_u8().unwrap().memory(), TensorMemory::Pbo);
5939
5940        // Convert PBO→PBO (this exercises convert_pbo_to_pbo)
5941        let mut pbo_dst = pbo_dst;
5942        let result = converter.convert(
5943            &pbo_src,
5944            &mut pbo_dst,
5945            Rotation::None,
5946            Flip::None,
5947            Crop::no_crop(),
5948        );
5949        result.unwrap();
5950
5951        // Verify: compare with CPU-only conversion of the same input
5952        let cpu_dst = TensorDyn::image(
5953            dst_w,
5954            dst_h,
5955            PixelFormat::Rgba,
5956            DType::U8,
5957            Some(TensorMemory::Mem),
5958        )
5959        .unwrap();
5960        let (result, _mem_src, cpu_dst) = convert_img(
5961            &mut CPUProcessor::new(),
5962            mem_src,
5963            cpu_dst,
5964            Rotation::None,
5965            Flip::None,
5966            Crop::no_crop(),
5967        );
5968        result.unwrap();
5969
5970        let pbo_dst_img = {
5971            let mut __t = pbo_dst.into_u8().unwrap();
5972            __t.set_format(PixelFormat::Rgba).unwrap();
5973            TensorDyn::from(__t)
5974        };
5975        compare_images(&pbo_dst_img, &cpu_dst, 0.95, function!());
5976        log::info!("test_convert_pbo_to_pbo: PASS — PBO-to-PBO convert matches CPU reference");
5977    }
5978
5979    #[test]
5980    fn test_image_bgra() {
5981        let img = TensorDyn::image(
5982            640,
5983            480,
5984            PixelFormat::Bgra,
5985            DType::U8,
5986            Some(edgefirst_tensor::TensorMemory::Mem),
5987        )
5988        .unwrap();
5989        assert_eq!(img.width(), Some(640));
5990        assert_eq!(img.height(), Some(480));
5991        assert_eq!(img.format().unwrap().channels(), 4);
5992        assert_eq!(img.format().unwrap(), PixelFormat::Bgra);
5993    }
5994
5995    // ========================================================================
5996    // Tests for EDGEFIRST_FORCE_BACKEND env var
5997    // ========================================================================
5998
5999    #[test]
6000    fn test_force_backend_cpu() {
6001        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6002        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
6003        let result = ImageProcessor::new();
6004        match original {
6005            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6006            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6007        }
6008        let converter = result.unwrap();
6009        assert!(converter.cpu.is_some());
6010        assert_eq!(converter.forced_backend, Some(ForcedBackend::Cpu));
6011    }
6012
6013    #[test]
6014    fn test_force_backend_invalid() {
6015        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6016        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "invalid") };
6017        let result = ImageProcessor::new();
6018        match original {
6019            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6020            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6021        }
6022        assert!(
6023            matches!(&result, Err(Error::ForcedBackendUnavailable(s)) if s.contains("unknown")),
6024            "invalid backend value should return ForcedBackendUnavailable error: {result:?}"
6025        );
6026    }
6027
6028    #[test]
6029    fn test_force_backend_unset() {
6030        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6031        unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") };
6032        let result = ImageProcessor::new();
6033        match original {
6034            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6035            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6036        }
6037        let converter = result.unwrap();
6038        assert!(converter.forced_backend.is_none());
6039    }
6040
6041    // ========================================================================
6042    // Tests for hybrid mask path error handling
6043    // ========================================================================
6044
6045    #[test]
6046    fn test_draw_proto_masks_no_cpu_returns_error() {
6047        // Disable CPU backend to trigger the error path
6048        let original_cpu = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
6049        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
6050        let original_gl = std::env::var("EDGEFIRST_DISABLE_GL").ok();
6051        unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
6052        let original_g2d = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
6053        unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
6054
6055        let result = ImageProcessor::new();
6056
6057        match original_cpu {
6058            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
6059            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
6060        }
6061        match original_gl {
6062            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
6063            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
6064        }
6065        match original_g2d {
6066            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
6067            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
6068        }
6069
6070        let mut converter = result.unwrap();
6071        assert!(converter.cpu.is_none(), "CPU should be disabled");
6072
6073        let dst = TensorDyn::image(
6074            640,
6075            480,
6076            PixelFormat::Rgba,
6077            DType::U8,
6078            Some(TensorMemory::Mem),
6079        )
6080        .unwrap();
6081        let mut dst_dyn = dst;
6082        let det = [DetectBox {
6083            bbox: edgefirst_decoder::BoundingBox {
6084                xmin: 0.1,
6085                ymin: 0.1,
6086                xmax: 0.5,
6087                ymax: 0.5,
6088            },
6089            score: 0.9,
6090            label: 0,
6091        }];
6092        let proto_data = ProtoData {
6093            mask_coefficients: vec![vec![0.5; 4]],
6094            protos: edgefirst_decoder::ProtoTensor::Float(ndarray::Array3::<f32>::zeros((8, 8, 4))),
6095        };
6096        let result =
6097            converter.draw_proto_masks(&mut dst_dyn, &det, &proto_data, Default::default());
6098        assert!(
6099            matches!(&result, Err(Error::Internal(s)) if s.contains("CPU backend")),
6100            "draw_proto_masks without CPU should return Internal error: {result:?}"
6101        );
6102    }
6103
6104    #[test]
6105    fn test_draw_proto_masks_cpu_fallback_works() {
6106        // Force CPU-only backend to ensure the CPU fallback path executes
6107        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6108        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
6109        let result = ImageProcessor::new();
6110        match original {
6111            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6112            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6113        }
6114
6115        let mut converter = result.unwrap();
6116        assert!(converter.cpu.is_some());
6117
6118        let dst = TensorDyn::image(
6119            64,
6120            64,
6121            PixelFormat::Rgba,
6122            DType::U8,
6123            Some(TensorMemory::Mem),
6124        )
6125        .unwrap();
6126        let mut dst_dyn = dst;
6127        let det = [DetectBox {
6128            bbox: edgefirst_decoder::BoundingBox {
6129                xmin: 0.1,
6130                ymin: 0.1,
6131                xmax: 0.5,
6132                ymax: 0.5,
6133            },
6134            score: 0.9,
6135            label: 0,
6136        }];
6137        let proto_data = ProtoData {
6138            mask_coefficients: vec![vec![0.5; 4]],
6139            protos: edgefirst_decoder::ProtoTensor::Float(ndarray::Array3::<f32>::zeros((8, 8, 4))),
6140        };
6141        let result =
6142            converter.draw_proto_masks(&mut dst_dyn, &det, &proto_data, Default::default());
6143        assert!(result.is_ok(), "CPU fallback path should work: {result:?}");
6144    }
6145
6146    // ============================================================
6147    // draw_decoded_masks / draw_proto_masks — 4-scenario pixel-
6148    // verified tests. Exercises each backend against the full
6149    // output-contract matrix:
6150    //
6151    //   | detections | background | expected dst             |
6152    //   |------------|------------|--------------------------|
6153    //   | empty      | none       | fully cleared (0x00)     |
6154    //   | empty      | set        | fully equal to bg        |
6155    //   | set        | none       | cleared outside box +    |
6156    //   |            |            | mask-coloured inside     |
6157    //   | set        | set        | bg outside box + mask    |
6158    //   |            |            | blended inside           |
6159    //
6160    // Every test pre-fills dst with a non-zero "dirty" pattern so
6161    // that any silent `return Ok(())` leaks the pattern into the
6162    // asserted output and fails loudly.
6163    // ============================================================
6164
6165    /// Run `body` with `EDGEFIRST_FORCE_BACKEND` temporarily set (or
6166    /// removed), restoring the prior value afterward. Tests are mutated
6167    /// env-serialized via the process-wide `FORCE_BACKEND_MUTEX`.
6168    fn with_force_backend<R>(value: Option<&str>, body: impl FnOnce() -> R) -> R {
6169        use std::sync::{Mutex, MutexGuard, OnceLock};
6170        static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
6171        let _guard: MutexGuard<()> = LOCK
6172            .get_or_init(|| Mutex::new(()))
6173            .lock()
6174            .unwrap_or_else(|e| e.into_inner());
6175        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6176        match value {
6177            Some(v) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", v) },
6178            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6179        }
6180        let r = body();
6181        match original {
6182            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6183            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6184        }
6185        r
6186    }
6187
6188    /// Allocate an RGBA image tensor and pre-fill every byte with a
6189    /// distinctive non-zero pattern. Any test that relies on the old
6190    /// "dst is already cleared" assumption will see this pattern leak
6191    /// through to the output and fail.
6192    fn make_dirty_dst(w: usize, h: usize, mem: Option<TensorMemory>) -> TensorDyn {
6193        let dst = TensorDyn::image(w, h, PixelFormat::Rgba, DType::U8, mem).unwrap();
6194        {
6195            use edgefirst_tensor::TensorMapTrait;
6196            let u8t = dst.as_u8().unwrap();
6197            let mut map = u8t.map().unwrap();
6198            for (i, b) in map.as_mut_slice().iter_mut().enumerate() {
6199                *b = 0xA0u8.wrapping_add((i as u8) & 0x3F);
6200            }
6201        }
6202        dst
6203    }
6204
6205    /// Allocate an RGBA background filled with a constant colour.
6206    fn make_bg(w: usize, h: usize, mem: Option<TensorMemory>, rgba: [u8; 4]) -> TensorDyn {
6207        let bg = TensorDyn::image(w, h, PixelFormat::Rgba, DType::U8, mem).unwrap();
6208        {
6209            use edgefirst_tensor::TensorMapTrait;
6210            let u8t = bg.as_u8().unwrap();
6211            let mut map = u8t.map().unwrap();
6212            for chunk in map.as_mut_slice().chunks_exact_mut(4) {
6213                chunk.copy_from_slice(&rgba);
6214            }
6215        }
6216        bg
6217    }
6218
6219    fn pixel_at(dst: &TensorDyn, x: usize, y: usize) -> [u8; 4] {
6220        use edgefirst_tensor::TensorMapTrait;
6221        let w = dst.width().unwrap();
6222        let off = (y * w + x) * 4;
6223        let u8t = dst.as_u8().unwrap();
6224        let map = u8t.map().unwrap();
6225        let s = map.as_slice();
6226        [s[off], s[off + 1], s[off + 2], s[off + 3]]
6227    }
6228
6229    fn assert_every_pixel_eq(dst: &TensorDyn, expected: [u8; 4], case: &str) {
6230        use edgefirst_tensor::TensorMapTrait;
6231        let u8t = dst.as_u8().unwrap();
6232        let map = u8t.map().unwrap();
6233        for (i, chunk) in map.as_slice().chunks_exact(4).enumerate() {
6234            assert_eq!(
6235                chunk, &expected,
6236                "{case}: pixel idx {i} = {chunk:?}, expected {expected:?}"
6237            );
6238        }
6239    }
6240
6241    /// Scenario 1: empty detections, empty segmentation, no background
6242    /// → dst must be fully cleared to 0x00000000.
6243    fn scenario_empty_no_bg(processor: &mut ImageProcessor, case: &str) {
6244        let mut dst = make_dirty_dst(64, 64, None);
6245        processor
6246            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::default())
6247            .unwrap_or_else(|e| panic!("{case}/decoded_masks empty+no-bg failed: {e:?}"));
6248        assert_every_pixel_eq(&dst, [0, 0, 0, 0], &format!("{case}/decoded"));
6249
6250        let mut dst = make_dirty_dst(64, 64, None);
6251        let proto = ProtoData {
6252            mask_coefficients: vec![],
6253            protos: edgefirst_decoder::ProtoTensor::Float(ndarray::Array3::<f32>::zeros((8, 8, 4))),
6254        };
6255        processor
6256            .draw_proto_masks(&mut dst, &[], &proto, MaskOverlay::default())
6257            .unwrap_or_else(|e| panic!("{case}/proto_masks empty+no-bg failed: {e:?}"));
6258        assert_every_pixel_eq(&dst, [0, 0, 0, 0], &format!("{case}/proto"));
6259    }
6260
6261    /// Scenario 2: empty detections, empty segmentation, background set
6262    /// → dst must be fully equal to bg.
6263    fn scenario_empty_with_bg(processor: &mut ImageProcessor, case: &str) {
6264        let bg_color = [42, 99, 200, 255];
6265        let bg = make_bg(64, 64, None, bg_color);
6266        let overlay = MaskOverlay::new().with_background(&bg);
6267
6268        let mut dst = make_dirty_dst(64, 64, None);
6269        processor
6270            .draw_decoded_masks(&mut dst, &[], &[], overlay)
6271            .unwrap_or_else(|e| panic!("{case}/decoded_masks empty+bg failed: {e:?}"));
6272        assert_every_pixel_eq(&dst, bg_color, &format!("{case}/decoded bg blit"));
6273
6274        let mut dst = make_dirty_dst(64, 64, None);
6275        let proto = ProtoData {
6276            mask_coefficients: vec![],
6277            protos: edgefirst_decoder::ProtoTensor::Float(ndarray::Array3::<f32>::zeros((8, 8, 4))),
6278        };
6279        processor
6280            .draw_proto_masks(&mut dst, &[], &proto, overlay)
6281            .unwrap_or_else(|e| panic!("{case}/proto_masks empty+bg failed: {e:?}"));
6282        assert_every_pixel_eq(&dst, bg_color, &format!("{case}/proto bg blit"));
6283    }
6284
6285    /// Scenario 3: one detection with a fully-opaque segmentation fill,
6286    /// no background → outside the box dst must be 0x00, inside it must
6287    /// be a non-zero mask colour (the render_segmentation output).
6288    fn scenario_detect_no_bg(processor: &mut ImageProcessor, case: &str) {
6289        use edgefirst_decoder::Segmentation;
6290        use ndarray::Array3;
6291        processor
6292            .set_class_colors(&[[200, 80, 40, 255]])
6293            .expect("set_class_colors");
6294
6295        let detect = DetectBox {
6296            bbox: [0.25, 0.25, 0.75, 0.75].into(),
6297            score: 0.99,
6298            label: 0,
6299        };
6300        let seg_arr = Array3::from_shape_fn((4, 4, 1), |_| 255u8);
6301        let seg = Segmentation {
6302            segmentation: seg_arr,
6303            xmin: 0.25,
6304            ymin: 0.25,
6305            xmax: 0.75,
6306            ymax: 0.75,
6307        };
6308
6309        let mut dst = make_dirty_dst(64, 64, None);
6310        processor
6311            .draw_decoded_masks(&mut dst, &[detect], &[seg], MaskOverlay::default())
6312            .unwrap_or_else(|e| panic!("{case}/decoded_masks detect+no-bg failed: {e:?}"));
6313
6314        // Outside the bbox (corner): must be cleared black.
6315        let corner = pixel_at(&dst, 2, 2);
6316        assert_eq!(
6317            corner,
6318            [0, 0, 0, 0],
6319            "{case}/decoded: corner (2,2) leaked dirty pattern: {corner:?}"
6320        );
6321        // Inside the bbox (center): the mask colour must be visible.
6322        // Any non-zero pixel is acceptable — exact rendering varies
6323        // between backends (GL smoothstep, CPU nearest).
6324        let center = pixel_at(&dst, 32, 32);
6325        assert!(
6326            center != [0, 0, 0, 0],
6327            "{case}/decoded: center (32,32) was not coloured: {center:?}"
6328        );
6329    }
6330
6331    /// Scenario 4: detection + background. Outside the box must match
6332    /// bg; inside the box must NOT match bg (mask blended on top).
6333    fn scenario_detect_with_bg(processor: &mut ImageProcessor, case: &str) {
6334        use edgefirst_decoder::Segmentation;
6335        use ndarray::Array3;
6336        processor
6337            .set_class_colors(&[[200, 80, 40, 255]])
6338            .expect("set_class_colors");
6339        let bg_color = [10, 20, 30, 255];
6340        let bg = make_bg(64, 64, None, bg_color);
6341
6342        let detect = DetectBox {
6343            bbox: [0.25, 0.25, 0.75, 0.75].into(),
6344            score: 0.99,
6345            label: 0,
6346        };
6347        let seg_arr = Array3::from_shape_fn((4, 4, 1), |_| 255u8);
6348        let seg = Segmentation {
6349            segmentation: seg_arr,
6350            xmin: 0.25,
6351            ymin: 0.25,
6352            xmax: 0.75,
6353            ymax: 0.75,
6354        };
6355
6356        let overlay = MaskOverlay::new().with_background(&bg);
6357        let mut dst = make_dirty_dst(64, 64, None);
6358        processor
6359            .draw_decoded_masks(&mut dst, &[detect], &[seg], overlay)
6360            .unwrap_or_else(|e| panic!("{case}/decoded_masks detect+bg failed: {e:?}"));
6361
6362        // Outside the bbox (corner): bg colour.
6363        let corner = pixel_at(&dst, 2, 2);
6364        assert_eq!(
6365            corner, bg_color,
6366            "{case}/decoded: corner (2,2) should show bg {bg_color:?} got {corner:?}"
6367        );
6368        // Inside the bbox (center): mask blended on bg, must differ from
6369        // pure bg (alpha-blend with mask colour produces a distinct shade).
6370        let center = pixel_at(&dst, 32, 32);
6371        assert!(
6372            center != bg_color,
6373            "{case}/decoded: center (32,32) should differ from bg {bg_color:?}, got {center:?}"
6374        );
6375    }
6376
6377    /// Run all 4 scenarios against the processor. Skip gracefully if
6378    /// construction fails (backend unavailable on this host).
6379    fn run_all_scenarios(
6380        force_backend: Option<&'static str>,
6381        case: &'static str,
6382        require_dma_for_bg: bool,
6383    ) {
6384        if require_dma_for_bg && !edgefirst_tensor::is_dma_available() {
6385            eprintln!("SKIPPED: {case} — DMA not available on this host");
6386            return;
6387        }
6388        let processor_result = with_force_backend(force_backend, ImageProcessor::new);
6389        let mut processor = match processor_result {
6390            Ok(p) => p,
6391            Err(e) => {
6392                eprintln!("SKIPPED: {case} — backend init failed: {e:?}");
6393                return;
6394            }
6395        };
6396        scenario_empty_no_bg(&mut processor, case);
6397        scenario_empty_with_bg(&mut processor, case);
6398        scenario_detect_no_bg(&mut processor, case);
6399        scenario_detect_with_bg(&mut processor, case);
6400    }
6401
6402    #[test]
6403    fn test_draw_masks_4_scenarios_cpu() {
6404        run_all_scenarios(Some("cpu"), "cpu", false);
6405    }
6406
6407    #[test]
6408    fn test_draw_masks_4_scenarios_auto() {
6409        run_all_scenarios(None, "auto", false);
6410    }
6411
6412    #[cfg(target_os = "linux")]
6413    #[cfg(feature = "opengl")]
6414    #[test]
6415    fn test_draw_masks_4_scenarios_opengl() {
6416        run_all_scenarios(Some("opengl"), "opengl", false);
6417    }
6418
6419    /// G2D forced backend: exercises the zero-detection empty-frame
6420    /// paths via `g2d_clear` and `g2d_blit`. Scenarios 3 and 4 (with
6421    /// detections) expect `NotImplemented` since G2D has no rasterizer
6422    /// for boxes / masks.
6423    #[cfg(target_os = "linux")]
6424    #[test]
6425    fn test_draw_masks_zero_detection_g2d_forced() {
6426        if !edgefirst_tensor::is_dma_available() {
6427            eprintln!("SKIPPED: g2d forced — DMA not available on this host");
6428            return;
6429        }
6430        let processor_result = with_force_backend(Some("g2d"), ImageProcessor::new);
6431        let mut processor = match processor_result {
6432            Ok(p) => p,
6433            Err(e) => {
6434                eprintln!("SKIPPED: g2d forced — init failed: {e:?}");
6435                return;
6436            }
6437        };
6438
6439        // Case 1: empty + no bg. G2D requires DMA-backed dst.
6440        let mut dst = TensorDyn::image(
6441            64,
6442            64,
6443            PixelFormat::Rgba,
6444            DType::U8,
6445            Some(TensorMemory::Dma),
6446        )
6447        .unwrap();
6448        {
6449            use edgefirst_tensor::TensorMapTrait;
6450            let u8t = dst.as_u8_mut().unwrap();
6451            let mut map = u8t.map().unwrap();
6452            map.as_mut_slice().fill(0xBB);
6453        }
6454        processor
6455            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::default())
6456            .expect("g2d empty+no-bg");
6457        assert_every_pixel_eq(&dst, [0, 0, 0, 0], "g2d/case1 cleared");
6458
6459        // Case 2: empty + bg. Both surfaces DMA-backed for g2d_blit.
6460        let bg_color = [7, 11, 13, 255];
6461        let bg = {
6462            let t = TensorDyn::image(
6463                64,
6464                64,
6465                PixelFormat::Rgba,
6466                DType::U8,
6467                Some(TensorMemory::Dma),
6468            )
6469            .unwrap();
6470            {
6471                use edgefirst_tensor::TensorMapTrait;
6472                let u8t = t.as_u8().unwrap();
6473                let mut map = u8t.map().unwrap();
6474                for chunk in map.as_mut_slice().chunks_exact_mut(4) {
6475                    chunk.copy_from_slice(&bg_color);
6476                }
6477            }
6478            t
6479        };
6480        let mut dst = TensorDyn::image(
6481            64,
6482            64,
6483            PixelFormat::Rgba,
6484            DType::U8,
6485            Some(TensorMemory::Dma),
6486        )
6487        .unwrap();
6488        {
6489            use edgefirst_tensor::TensorMapTrait;
6490            let u8t = dst.as_u8_mut().unwrap();
6491            let mut map = u8t.map().unwrap();
6492            map.as_mut_slice().fill(0x55);
6493        }
6494        processor
6495            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::new().with_background(&bg))
6496            .expect("g2d empty+bg");
6497        assert_every_pixel_eq(&dst, bg_color, "g2d/case2 bg blit");
6498
6499        // Case 3 and 4: detect present — must return NotImplemented.
6500        let detect = DetectBox {
6501            bbox: [0.25, 0.25, 0.75, 0.75].into(),
6502            score: 0.9,
6503            label: 0,
6504        };
6505        let mut dst = TensorDyn::image(
6506            64,
6507            64,
6508            PixelFormat::Rgba,
6509            DType::U8,
6510            Some(TensorMemory::Dma),
6511        )
6512        .unwrap();
6513        let err = processor
6514            .draw_decoded_masks(&mut dst, &[detect], &[], MaskOverlay::default())
6515            .expect_err("g2d must reject detect-present draw_decoded_masks");
6516        assert!(
6517            matches!(err, Error::NotImplemented(_)),
6518            "g2d case3 wrong error: {err:?}"
6519        );
6520    }
6521
6522    #[test]
6523    fn test_set_format_then_cpu_convert() {
6524        // Force CPU backend (save/restore to avoid leaking into other tests)
6525        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6526        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
6527        let mut processor = ImageProcessor::new().unwrap();
6528        match original {
6529            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6530            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6531        }
6532
6533        // Load a source image
6534        let image = include_bytes!(concat!(
6535            env!("CARGO_MANIFEST_DIR"),
6536            "/../../testdata/zidane.jpg"
6537        ));
6538        let src = load_image(image, Some(PixelFormat::Rgba), None).unwrap();
6539
6540        // Create a raw tensor, then attach format — simulating the from_fd workflow
6541        let mut dst =
6542            TensorDyn::new(&[640, 640, 3], DType::U8, Some(TensorMemory::Mem), None).unwrap();
6543        dst.set_format(PixelFormat::Rgb).unwrap();
6544
6545        // Convert should work with the set_format-annotated tensor
6546        processor
6547            .convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
6548            .unwrap();
6549
6550        // Verify format survived conversion
6551        assert_eq!(dst.format(), Some(PixelFormat::Rgb));
6552        assert_eq!(dst.width(), Some(640));
6553        assert_eq!(dst.height(), Some(640));
6554    }
6555
6556    /// Verify that creating multiple ImageProcessors on the same thread and
6557    /// performing a resize on each does not deadlock or error.
6558    ///
6559    /// Uses automatic memory allocation (DMA → PBO → Mem fallback) so that
6560    /// hardware backends (OpenGL, G2D) are exercised on capable targets.
6561    #[test]
6562    fn test_multiple_image_processors_same_thread() {
6563        let mut processors: Vec<ImageProcessor> = (0..4)
6564            .map(|_| ImageProcessor::new().expect("ImageProcessor::new() failed"))
6565            .collect();
6566
6567        for proc in &mut processors {
6568            let src = proc
6569                .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
6570                .expect("create src failed");
6571            let mut dst = proc
6572                .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
6573                .expect("create dst failed");
6574            proc.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
6575                .expect("convert failed");
6576            assert_eq!(dst.width(), Some(64));
6577            assert_eq!(dst.height(), Some(64));
6578        }
6579    }
6580
6581    /// Verify that creating ImageProcessors on separate threads and performing
6582    /// a resize on each does not deadlock or error.
6583    ///
6584    /// Uses automatic memory allocation (DMA → PBO → Mem fallback) so that
6585    /// hardware backends (OpenGL, G2D) are exercised on capable targets.
6586    /// A 60-second timeout prevents CI from hanging on deadlock regressions.
6587    #[test]
6588    fn test_multiple_image_processors_separate_threads() {
6589        use std::sync::mpsc;
6590        use std::time::Duration;
6591
6592        const TIMEOUT: Duration = Duration::from_secs(60);
6593
6594        let (tx, rx) = mpsc::channel::<()>();
6595
6596        std::thread::spawn(move || {
6597            let handles: Vec<_> = (0..4)
6598                .map(|i| {
6599                    std::thread::spawn(move || {
6600                        let mut proc = ImageProcessor::new().unwrap_or_else(|e| {
6601                            panic!("ImageProcessor::new() failed on thread {i}: {e}")
6602                        });
6603                        let src = proc
6604                            .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
6605                            .unwrap_or_else(|e| panic!("create src failed on thread {i}: {e}"));
6606                        let mut dst = proc
6607                            .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
6608                            .unwrap_or_else(|e| panic!("create dst failed on thread {i}: {e}"));
6609                        proc.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
6610                            .unwrap_or_else(|e| panic!("convert failed on thread {i}: {e}"));
6611                        assert_eq!(dst.width(), Some(64));
6612                        assert_eq!(dst.height(), Some(64));
6613                    })
6614                })
6615                .collect();
6616
6617            for (i, h) in handles.into_iter().enumerate() {
6618                h.join()
6619                    .unwrap_or_else(|e| panic!("thread {i} panicked: {e:?}"));
6620            }
6621
6622            let _ = tx.send(());
6623        });
6624
6625        rx.recv_timeout(TIMEOUT).unwrap_or_else(|_| {
6626            panic!("test_multiple_image_processors_separate_threads timed out after {TIMEOUT:?}")
6627        });
6628    }
6629
6630    /// Verify that 4 fully-initialized ImageProcessors on separate threads can
6631    /// all operate concurrently without deadlocking each other.
6632    ///
6633    /// All processors are created first, then a barrier synchronizes them so
6634    /// they all start converting at the same instant — maximizing contention.
6635    /// A 60-second timeout prevents CI from hanging on deadlock regressions.
6636    #[test]
6637    fn test_image_processors_concurrent_operations() {
6638        use std::sync::{mpsc, Arc, Barrier};
6639        use std::time::Duration;
6640
6641        const N: usize = 4;
6642        const ROUNDS: usize = 10;
6643        const TIMEOUT: Duration = Duration::from_secs(60);
6644
6645        let (tx, rx) = mpsc::channel::<()>();
6646
6647        std::thread::spawn(move || {
6648            let barrier = Arc::new(Barrier::new(N));
6649
6650            let handles: Vec<_> = (0..N)
6651                .map(|i| {
6652                    let barrier = Arc::clone(&barrier);
6653                    std::thread::spawn(move || {
6654                        let mut proc = ImageProcessor::new().unwrap_or_else(|e| {
6655                            panic!("ImageProcessor::new() failed on thread {i}: {e}")
6656                        });
6657
6658                        // All threads wait here until every processor is initialized.
6659                        barrier.wait();
6660
6661                        // Now all 4 hammer the GPU concurrently.
6662                        for round in 0..ROUNDS {
6663                            let src = proc
6664                                .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
6665                                .unwrap_or_else(|e| {
6666                                    panic!("create src failed on thread {i} round {round}: {e}")
6667                                });
6668                            let mut dst = proc
6669                                .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
6670                                .unwrap_or_else(|e| {
6671                                    panic!("create dst failed on thread {i} round {round}: {e}")
6672                                });
6673                            proc.convert(
6674                                &src,
6675                                &mut dst,
6676                                Rotation::None,
6677                                Flip::None,
6678                                Crop::default(),
6679                            )
6680                            .unwrap_or_else(|e| {
6681                                panic!("convert failed on thread {i} round {round}: {e}")
6682                            });
6683                            assert_eq!(dst.width(), Some(64));
6684                            assert_eq!(dst.height(), Some(64));
6685                        }
6686                    })
6687                })
6688                .collect();
6689
6690            for (i, h) in handles.into_iter().enumerate() {
6691                h.join()
6692                    .unwrap_or_else(|e| panic!("thread {i} panicked: {e:?}"));
6693            }
6694
6695            let _ = tx.send(());
6696        });
6697
6698        rx.recv_timeout(TIMEOUT).unwrap_or_else(|_| {
6699            panic!("test_image_processors_concurrent_operations timed out after {TIMEOUT:?}")
6700        });
6701    }
6702}