Skip to main content

edgefirst_image/
lib.rs

1// SPDX-FileCopyrightText: Copyright 2025 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4/*!
5
6## EdgeFirst HAL - Image Converter
7
8The `edgefirst_image` crate is part of the EdgeFirst Hardware Abstraction
9Layer (HAL) and provides functionality for converting images between
10different formats and sizes.  The crate is designed to work with hardware
11acceleration when available, but also provides a CPU-based fallback for
12environments where hardware acceleration is not present or not suitable.
13
14The main features of the `edgefirst_image` crate include:
15- Support for various image formats, including YUYV, RGB, RGBA, and GREY.
16- Support for source crop, destination crop, rotation, and flipping.
17- Image conversion using hardware acceleration (G2D, OpenGL) when available.
18- CPU-based image conversion as a fallback option.
19
20The crate uses [`TensorDyn`] from `edgefirst_tensor` to represent images,
21with [`PixelFormat`] metadata describing the pixel layout. The
22[`ImageProcessor`] struct manages the conversion process, selecting
23the appropriate conversion method based on the available hardware.
24
25## Examples
26
27```rust
28# use edgefirst_image::{ImageProcessor, Rotation, Flip, Crop, ImageProcessorTrait, load_image};
29# use edgefirst_tensor::{PixelFormat, DType, TensorDyn};
30# fn main() -> Result<(), edgefirst_image::Error> {
31let image = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/../../testdata/zidane.jpg"));
32let src = load_image(image, Some(PixelFormat::Rgba), None)?;
33let mut converter = ImageProcessor::new()?;
34let mut dst = converter.create_image(640, 480, PixelFormat::Rgb, DType::U8, None)?;
35converter.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())?;
36# Ok(())
37# }
38```
39
40## Environment Variables
41The behavior of the `edgefirst_image::ImageProcessor` struct can be influenced by the
42following environment variables:
43- `EDGEFIRST_FORCE_BACKEND`: When set to `cpu`, `g2d`, or `opengl` (case-insensitive),
44  only that single backend is initialized and no fallback chain is used. If the
45  forced backend fails to initialize, an error is returned immediately. This is
46  useful for benchmarking individual backends in isolation. When this variable is
47  set, the `EDGEFIRST_DISABLE_*` variables are ignored.
48- `EDGEFIRST_DISABLE_GL`: If set to `1`, disables the use of OpenGL for image
49  conversion, forcing the use of CPU or other available hardware methods.
50- `EDGEFIRST_DISABLE_G2D`: If set to `1`, disables the use of G2D for image
51  conversion, forcing the use of CPU or other available hardware methods.
52- `EDGEFIRST_DISABLE_CPU`: If set to `1`, disables the use of CPU for image
53  conversion, forcing the use of hardware acceleration methods. If no hardware
54  acceleration methods are available, an error will be returned when attempting
55  to create an `ImageProcessor`.
56
57Additionally the TensorMemory used by default allocations can be controlled using the
58`EDGEFIRST_TENSOR_FORCE_MEM` environment variable. If set to `1`, default tensor memory
59uses system memory. This will disable the use of specialized memory regions for tensors
60and hardware acceleration. However, this will increase the performance of the CPU converter.
61*/
62#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
63
64/// Pitch alignment requirement for DMA-BUF tensors that may be imported as
65/// EGLImages by the GL backend. Mali Valhall (i.MX 95 / G310) rejects
66/// `eglCreateImageKHR` with `EGL_BAD_ALLOC` for any DMA-BUF whose row pitch
67/// is not a multiple of 64 bytes; Vivante GC7000UL (i.MX 8MP) accepts any
68/// pitch so the constant is harmless on that path. 64 is the smallest
69/// alignment that satisfies every embedded ARM GPU we ship to.
70///
71/// Applied automatically inside [`ImageProcessor::create_image`] when the
72/// allocation lands on `TensorMemory::Dma`. External callers that allocate
73/// their own DMA-BUF tensors (e.g. GStreamer plugins, video pipelines) can
74/// use [`align_width_for_gpu_pitch`] to compute a width whose resulting row
75/// stride satisfies this requirement.
76pub const GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES: usize = 64;
77
78/// Round `width` (in pixels) up so the resulting row stride
79/// `width * bpp` is a multiple of [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`]
80/// AND a multiple of `bpp` (so the rounded width is an integer pixel count).
81///
82/// `bpp` must be the per-pixel byte count for the image's primary plane
83/// (e.g. 4 for RGBA8/BGRA8, 3 for RGB888, 1 for Grey/NV12-luma).
84///
85/// External callers — GStreamer plugins, video pipelines, anyone wrapping a
86/// foreign DMA-BUF — should call this when sizing the destination so that
87/// `eglCreateImageKHR` doesn't reject the import on Mali. Pre-aligned widths
88/// (640, 1280, 1920, 3008, 3840 …) round-trip unchanged; misaligned widths
89/// are bumped up to the next valid value.
90///
91/// # Overflow behaviour
92///
93/// All arithmetic is checked. If the alignment computation or the rounded
94/// width would overflow `usize`, the function logs a warning and returns the
95/// original `width` unchanged rather than wrapping or producing a smaller
96/// value. Callers can rely on the returned width being **at least** the
97/// requested width.
98///
99/// `bpp == 0` and `width == 0` short-circuit to return the input unchanged.
100///
101/// # Examples
102///
103/// ```
104/// use edgefirst_image::align_width_for_gpu_pitch;
105///
106/// // RGBA8 (bpp=4): width must round to a multiple of 16 pixels (64-byte stride).
107/// assert_eq!(align_width_for_gpu_pitch(1920, 4), 1920); // already aligned
108/// assert_eq!(align_width_for_gpu_pitch(3004, 4), 3008); // crowd.png case: +4 px
109/// assert_eq!(align_width_for_gpu_pitch(1281, 4), 1296); // +15 px
110///
111/// // RGB888 (bpp=3): width must round to a multiple of 64 pixels (192-byte stride).
112/// assert_eq!(align_width_for_gpu_pitch(640, 3), 640);
113/// assert_eq!(align_width_for_gpu_pitch(641, 3), 704);
114/// ```
115pub fn align_width_for_gpu_pitch(width: usize, bpp: usize) -> usize {
116    if bpp == 0 || width == 0 {
117        return width;
118    }
119
120    // The minimum aligned stride must be a common multiple of both the
121    // GPU's pitch alignment and the per-pixel byte count. Using the LCM
122    // guarantees the rounded stride is an integer multiple of `bpp`, so
123    // converting back to a pixel count is exact.
124    //
125    // Compute the alignment in pixels (`width_alignment`) so we never need
126    // to multiply `width * bpp`, which is the only operation that could
127    // realistically overflow for large caller-supplied widths.
128    let Some(lcm_alignment) = checked_num_integer_lcm(GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES, bpp)
129    else {
130        log::warn!(
131            "align_width_for_gpu_pitch: lcm({GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES}, {bpp}) \
132             overflows usize, returning unaligned width {width}"
133        );
134        return width;
135    };
136    if lcm_alignment == 0 {
137        return width;
138    }
139
140    debug_assert_eq!(lcm_alignment % bpp, 0);
141    let width_alignment = lcm_alignment / bpp;
142    if width_alignment == 0 {
143        return width;
144    }
145
146    let remainder = width % width_alignment;
147    if remainder == 0 {
148        return width;
149    }
150
151    let pad = width_alignment - remainder;
152    match width.checked_add(pad) {
153        Some(aligned) => aligned,
154        None => {
155            log::warn!(
156                "align_width_for_gpu_pitch: width {width} + pad {pad} overflows usize, \
157                 returning unaligned (caller should use a smaller width or pre-aligned size)"
158            );
159            width
160        }
161    }
162}
163
164/// Round `min_pitch_bytes` up to the next multiple of
165/// [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`]. Returns `None` if the rounded
166/// value would overflow `usize`. Returns `Some(0)` for input 0.
167///
168/// Used internally by [`ImageProcessor::create_image`] to compute the
169/// padded row stride for DMA-backed image allocations. External callers
170/// that need pixel-counted alignment (instead of raw byte pitch) should
171/// use [`align_width_for_gpu_pitch`] instead.
172#[cfg(target_os = "linux")]
173pub(crate) fn align_pitch_bytes_to_gpu_alignment(min_pitch_bytes: usize) -> Option<usize> {
174    let alignment = GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES;
175    if min_pitch_bytes == 0 {
176        return Some(0);
177    }
178    let remainder = min_pitch_bytes % alignment;
179    if remainder == 0 {
180        return Some(min_pitch_bytes);
181    }
182    min_pitch_bytes.checked_add(alignment - remainder)
183}
184
185/// Overflow-safe least common multiple. Returns `None` when `(a / gcd) * b`
186/// would wrap.
187fn checked_num_integer_lcm(a: usize, b: usize) -> Option<usize> {
188    if a == 0 || b == 0 {
189        return Some(0);
190    }
191    let g = num_integer_gcd(a, b);
192    // a / g is exact (g divides a by definition) and at most a, so this
193    // division never panics. Only the subsequent multiply can overflow.
194    (a / g).checked_mul(b)
195}
196
197fn num_integer_gcd(a: usize, b: usize) -> usize {
198    if b == 0 {
199        a
200    } else {
201        num_integer_gcd(b, a % b)
202    }
203}
204
205/// Bytes-per-pixel for the primary plane of `format` at element size `elem`.
206/// Returns `None` for formats that don't have a single packed BPP (semi-planar
207/// chroma is handled separately, returning the luma-plane bpp).
208///
209/// External callers can use this together with [`align_width_for_gpu_pitch`]
210/// to size their own DMA-BUFs without having to remember per-format BPPs:
211///
212/// ```
213/// use edgefirst_image::{align_width_for_gpu_pitch, primary_plane_bpp};
214/// use edgefirst_tensor::PixelFormat;
215///
216/// let bpp = primary_plane_bpp(PixelFormat::Rgba, 1).unwrap();
217/// let aligned = align_width_for_gpu_pitch(3004, bpp);
218/// assert_eq!(aligned, 3008);
219/// ```
220pub fn primary_plane_bpp(format: PixelFormat, elem: usize) -> Option<usize> {
221    use edgefirst_tensor::PixelLayout;
222    match format.layout() {
223        PixelLayout::Packed => Some(format.channels() * elem),
224        PixelLayout::Planar => Some(elem),
225        // For NV12/NV16 the luma plane is single-channel so the pitch
226        // matches `elem`; the chroma plane uses the same pitch in bytes
227        // (UV is half-width but two interleaved channels = same pitch).
228        PixelLayout::SemiPlanar => Some(elem),
229        // `PixelLayout` is non-exhaustive — fall through unaligned for
230        // any future variant we don't yet recognise.
231        _ => None,
232    }
233}
234
235use edgefirst_decoder::{DetectBox, ProtoData, Segmentation};
236use edgefirst_tensor::{
237    DType, PixelFormat, PixelLayout, Tensor, TensorDyn, TensorMemory, TensorTrait as _,
238};
239use enum_dispatch::enum_dispatch;
240use std::{fmt::Display, time::Instant};
241use zune_jpeg::{
242    zune_core::{colorspace::ColorSpace, options::DecoderOptions},
243    JpegDecoder,
244};
245use zune_png::PngDecoder;
246
247pub use cpu::CPUProcessor;
248pub use error::{Error, Result};
249#[cfg(target_os = "linux")]
250pub use g2d::G2DProcessor;
251#[cfg(target_os = "linux")]
252#[cfg(feature = "opengl")]
253pub use opengl_headless::GLProcessorThreaded;
254#[cfg(target_os = "linux")]
255#[cfg(feature = "opengl")]
256pub use opengl_headless::Int8InterpolationMode;
257#[cfg(target_os = "linux")]
258#[cfg(feature = "opengl")]
259pub use opengl_headless::{probe_egl_displays, EglDisplayInfo, EglDisplayKind};
260
261mod cpu;
262mod error;
263mod g2d;
264#[path = "gl/mod.rs"]
265mod opengl_headless;
266
267// Use `edgefirst_tensor::PixelFormat` variants (Rgb, Rgba, Grey, etc.) and
268// `TensorDyn` / `Tensor<u8>` with `.format()` metadata instead.
269
270/// Flips the image data, then rotates it. Returns a new `TensorDyn`.
271fn rotate_flip_to_dyn(
272    src: &Tensor<u8>,
273    src_fmt: PixelFormat,
274    rotation: Rotation,
275    flip: Flip,
276    memory: Option<TensorMemory>,
277) -> Result<TensorDyn, Error> {
278    let src_w = src.width().unwrap();
279    let src_h = src.height().unwrap();
280    let channels = src_fmt.channels();
281
282    let (dst_w, dst_h) = match rotation {
283        Rotation::None | Rotation::Rotate180 => (src_w, src_h),
284        Rotation::Clockwise90 | Rotation::CounterClockwise90 => (src_h, src_w),
285    };
286
287    let dst = Tensor::<u8>::image(dst_w, dst_h, src_fmt, memory)?;
288    let src_map = src.map()?;
289    let mut dst_map = dst.map()?;
290
291    CPUProcessor::flip_rotate_ndarray_pf(
292        &src_map,
293        &mut dst_map,
294        dst_w,
295        dst_h,
296        channels,
297        rotation,
298        flip,
299    )?;
300    drop(dst_map);
301    drop(src_map);
302
303    Ok(TensorDyn::from(dst))
304}
305
306#[derive(Debug, Clone, Copy, PartialEq, Eq)]
307pub enum Rotation {
308    None = 0,
309    Clockwise90 = 1,
310    Rotate180 = 2,
311    CounterClockwise90 = 3,
312}
313impl Rotation {
314    /// Creates a Rotation enum from an angle in degrees. The angle must be a
315    /// multiple of 90.
316    ///
317    /// # Panics
318    /// Panics if the angle is not a multiple of 90.
319    ///
320    /// # Examples
321    /// ```rust
322    /// # use edgefirst_image::Rotation;
323    /// let rotation = Rotation::from_degrees_clockwise(270);
324    /// assert_eq!(rotation, Rotation::CounterClockwise90);
325    /// ```
326    pub fn from_degrees_clockwise(angle: usize) -> Rotation {
327        match angle.rem_euclid(360) {
328            0 => Rotation::None,
329            90 => Rotation::Clockwise90,
330            180 => Rotation::Rotate180,
331            270 => Rotation::CounterClockwise90,
332            _ => panic!("rotation angle is not a multiple of 90"),
333        }
334    }
335}
336
337#[derive(Debug, Clone, Copy, PartialEq, Eq)]
338pub enum Flip {
339    None = 0,
340    Vertical = 1,
341    Horizontal = 2,
342}
343
344/// Controls how the color palette index is chosen for each detected object.
345#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
346pub enum ColorMode {
347    /// Color is chosen by object class label (`det.label`). Default.
348    ///
349    /// Preserves backward compatibility and is correct for semantic
350    /// segmentation where colors carry class meaning.
351    #[default]
352    Class,
353    /// Color is chosen by instance order (loop index, zero-based).
354    ///
355    /// Each detected object gets a unique color regardless of class,
356    /// useful for instance segmentation.
357    Instance,
358    /// Color is chosen by track ID (future use; currently behaves like
359    /// [`Instance`](Self::Instance)).
360    Track,
361}
362
363impl ColorMode {
364    /// Return the palette index for a detection given its loop index and label.
365    #[inline]
366    pub fn index(self, idx: usize, label: usize) -> usize {
367        match self {
368            ColorMode::Class => label,
369            ColorMode::Instance | ColorMode::Track => idx,
370        }
371    }
372}
373
374/// Options for mask overlay rendering.
375///
376/// Controls how segmentation masks are composited onto the destination image:
377/// - `background`: when set, the background image is drawn first and masks
378///   are composited over it (result written to `dst`). When `None`, `dst` is
379///   cleared to `0x00000000` (fully transparent) before masks are drawn.
380///   **`dst` is always fully overwritten — its prior contents are never
381///   preserved.** Callers who used to pre-load an image into `dst` before
382///   calling `draw_decoded_masks` / `draw_proto_masks` must now supply that
383///   image via `background` instead (behaviour changed in v0.16.4).
384/// - `opacity`: scales the alpha of rendered mask colors. `1.0` (default)
385///   preserves the class color's alpha unchanged; `0.5` makes masks
386///   semi-transparent.
387/// - `color_mode`: controls whether colors are assigned by class label,
388///   instance index, or track ID. Defaults to [`ColorMode::Class`].
389#[derive(Debug, Clone, Copy)]
390pub struct MaskOverlay<'a> {
391    /// Compositing source image. Must have the same dimensions and pixel
392    /// format as `dst`. When `Some`, the output is `background + masks`.
393    /// When `None`, `dst` is cleared to `0x00000000` before masks are drawn.
394    pub background: Option<&'a TensorDyn>,
395    pub opacity: f32,
396    /// Normalized letterbox region `[xmin, ymin, xmax, ymax]` in model-input
397    /// space that contains actual image content (the rest is padding).
398    ///
399    /// When set, bounding boxes and mask coordinates from the decoder (which
400    /// are in model-input normalized space) are mapped back to the original
401    /// image coordinate space before rendering.
402    ///
403    /// Use [`with_letterbox_crop`](Self::with_letterbox_crop) to compute this
404    /// from the [`Crop`] that was used in the model input [`convert`](crate::ImageProcessorTrait::convert) call.
405    pub letterbox: Option<[f32; 4]>,
406    pub color_mode: ColorMode,
407}
408
409impl Default for MaskOverlay<'_> {
410    fn default() -> Self {
411        Self {
412            background: None,
413            opacity: 1.0,
414            letterbox: None,
415            color_mode: ColorMode::Class,
416        }
417    }
418}
419
420impl<'a> MaskOverlay<'a> {
421    pub fn new() -> Self {
422        Self::default()
423    }
424
425    /// Set the compositing source image.
426    ///
427    /// `bg` must have the same dimensions and pixel format as the `dst` passed
428    /// to [`draw_decoded_masks`](crate::ImageProcessorTrait::draw_decoded_masks) /
429    /// [`draw_proto_masks`](crate::ImageProcessorTrait::draw_proto_masks).
430    /// The output will be `bg + masks`. Without a background, `dst` is cleared
431    /// to `0x00000000`.
432    pub fn with_background(mut self, bg: &'a TensorDyn) -> Self {
433        self.background = Some(bg);
434        self
435    }
436
437    pub fn with_opacity(mut self, opacity: f32) -> Self {
438        self.opacity = opacity.clamp(0.0, 1.0);
439        self
440    }
441
442    pub fn with_color_mode(mut self, mode: ColorMode) -> Self {
443        self.color_mode = mode;
444        self
445    }
446
447    /// Set the letterbox transform from the [`Crop`] used when preparing the
448    /// model input, so that bounding boxes and masks are correctly mapped back
449    /// to the original image coordinate space during rendering.
450    ///
451    /// Pass the same `crop` that was given to
452    /// [`convert`](crate::ImageProcessorTrait::convert) along with the model
453    /// input dimensions (`model_w` × `model_h`).
454    ///
455    /// Has no effect when `crop.dst_rect` is `None` (no letterbox applied).
456    pub fn with_letterbox_crop(mut self, crop: &Crop, model_w: usize, model_h: usize) -> Self {
457        if let Some(r) = crop.dst_rect {
458            self.letterbox = Some([
459                r.left as f32 / model_w as f32,
460                r.top as f32 / model_h as f32,
461                (r.left + r.width) as f32 / model_w as f32,
462                (r.top + r.height) as f32 / model_h as f32,
463            ]);
464        }
465        self
466    }
467}
468
469/// Apply the inverse letterbox transform to a bounding box.
470///
471/// `letterbox` is `[lx0, ly0, lx1, ly1]` — the normalized region of the model
472/// input that contains actual image content (output of
473/// [`MaskOverlay::with_letterbox_crop`]).
474///
475/// Converts model-input-normalized coords to output-image-normalized coords,
476/// clamped to `[0.0, 1.0]`. Also canonicalises the bbox (ensures xmin ≤ xmax).
477#[inline]
478fn unletter_bbox(bbox: DetectBox, lb: [f32; 4]) -> DetectBox {
479    let b = bbox.bbox.to_canonical();
480    let [lx0, ly0, lx1, ly1] = lb;
481    let inv_w = if lx1 > lx0 { 1.0 / (lx1 - lx0) } else { 1.0 };
482    let inv_h = if ly1 > ly0 { 1.0 / (ly1 - ly0) } else { 1.0 };
483    DetectBox {
484        bbox: edgefirst_decoder::BoundingBox {
485            xmin: ((b.xmin - lx0) * inv_w).clamp(0.0, 1.0),
486            ymin: ((b.ymin - ly0) * inv_h).clamp(0.0, 1.0),
487            xmax: ((b.xmax - lx0) * inv_w).clamp(0.0, 1.0),
488            ymax: ((b.ymax - ly0) * inv_h).clamp(0.0, 1.0),
489        },
490        ..bbox
491    }
492}
493
494#[derive(Debug, Clone, Copy, PartialEq, Eq)]
495pub struct Crop {
496    pub src_rect: Option<Rect>,
497    pub dst_rect: Option<Rect>,
498    pub dst_color: Option<[u8; 4]>,
499}
500
501impl Default for Crop {
502    fn default() -> Self {
503        Crop::new()
504    }
505}
506impl Crop {
507    // Creates a new Crop with default values (no cropping).
508    pub fn new() -> Self {
509        Crop {
510            src_rect: None,
511            dst_rect: None,
512            dst_color: None,
513        }
514    }
515
516    // Sets the source rectangle for cropping.
517    pub fn with_src_rect(mut self, src_rect: Option<Rect>) -> Self {
518        self.src_rect = src_rect;
519        self
520    }
521
522    // Sets the destination rectangle for cropping.
523    pub fn with_dst_rect(mut self, dst_rect: Option<Rect>) -> Self {
524        self.dst_rect = dst_rect;
525        self
526    }
527
528    // Sets the destination color for areas outside the cropped region.
529    pub fn with_dst_color(mut self, dst_color: Option<[u8; 4]>) -> Self {
530        self.dst_color = dst_color;
531        self
532    }
533
534    // Creates a new Crop with no cropping.
535    pub fn no_crop() -> Self {
536        Crop::new()
537    }
538
539    /// Validate crop rectangles against explicit dimensions.
540    pub(crate) fn check_crop_dims(
541        &self,
542        src_w: usize,
543        src_h: usize,
544        dst_w: usize,
545        dst_h: usize,
546    ) -> Result<(), Error> {
547        let src_ok = self
548            .src_rect
549            .is_none_or(|r| r.left + r.width <= src_w && r.top + r.height <= src_h);
550        let dst_ok = self
551            .dst_rect
552            .is_none_or(|r| r.left + r.width <= dst_w && r.top + r.height <= dst_h);
553        match (src_ok, dst_ok) {
554            (true, true) => Ok(()),
555            (true, false) => Err(Error::CropInvalid(format!(
556                "Dest crop invalid: {:?}",
557                self.dst_rect
558            ))),
559            (false, true) => Err(Error::CropInvalid(format!(
560                "Src crop invalid: {:?}",
561                self.src_rect
562            ))),
563            (false, false) => Err(Error::CropInvalid(format!(
564                "Dest and Src crop invalid: {:?} {:?}",
565                self.dst_rect, self.src_rect
566            ))),
567        }
568    }
569
570    /// Validate crop rectangles against TensorDyn source and destination.
571    pub fn check_crop_dyn(
572        &self,
573        src: &edgefirst_tensor::TensorDyn,
574        dst: &edgefirst_tensor::TensorDyn,
575    ) -> Result<(), Error> {
576        self.check_crop_dims(
577            src.width().unwrap_or(0),
578            src.height().unwrap_or(0),
579            dst.width().unwrap_or(0),
580            dst.height().unwrap_or(0),
581        )
582    }
583}
584
585#[derive(Debug, Clone, Copy, PartialEq, Eq)]
586pub struct Rect {
587    pub left: usize,
588    pub top: usize,
589    pub width: usize,
590    pub height: usize,
591}
592
593impl Rect {
594    // Creates a new Rect with the specified left, top, width, and height.
595    pub fn new(left: usize, top: usize, width: usize, height: usize) -> Self {
596        Self {
597            left,
598            top,
599            width,
600            height,
601        }
602    }
603
604    // Checks if the rectangle is valid for the given TensorDyn image.
605    pub fn check_rect_dyn(&self, image: &TensorDyn) -> bool {
606        let w = image.width().unwrap_or(0);
607        let h = image.height().unwrap_or(0);
608        self.left + self.width <= w && self.top + self.height <= h
609    }
610}
611
612#[enum_dispatch(ImageProcessor)]
613pub trait ImageProcessorTrait {
614    /// Converts the source image to the destination image format and size. The
615    /// image is cropped first, then flipped, then rotated
616    ///
617    /// # Arguments
618    ///
619    /// * `dst` - The destination image to be converted to.
620    /// * `src` - The source image to convert from.
621    /// * `rotation` - The rotation to apply to the destination image.
622    /// * `flip` - Flips the image
623    /// * `crop` - An optional rectangle specifying the area to crop from the
624    ///   source image
625    ///
626    /// # Returns
627    ///
628    /// A `Result` indicating success or failure of the conversion.
629    fn convert(
630        &mut self,
631        src: &TensorDyn,
632        dst: &mut TensorDyn,
633        rotation: Rotation,
634        flip: Flip,
635        crop: Crop,
636    ) -> Result<()>;
637
638    /// Draw pre-decoded detection boxes and segmentation masks onto `dst`.
639    ///
640    /// Supports two segmentation modes based on the mask channel count:
641    /// - **Instance segmentation** (`C=1`): one `Segmentation` per detection,
642    ///   `segmentation` and `detect` are zipped.
643    /// - **Semantic segmentation** (`C>1`): a single `Segmentation` covering
644    ///   all classes; only the first element is used.
645    ///
646    /// # Format requirements
647    ///
648    /// - CPU backend: `dst` must be `RGBA` or `RGB`.
649    /// - OpenGL backend: `dst` must be `RGBA`, `BGRA`, or `RGB`.
650    /// - G2D backend: only produces the base frame (empty detections);
651    ///   returns `NotImplemented` when any detection or segmentation is
652    ///   supplied.
653    ///
654    /// # Output contract
655    ///
656    /// This function always fully writes `dst` — it never relies on the
657    /// caller having pre-cleared the destination. The four cases are:
658    ///
659    /// | detections | background | output                              |
660    /// |------------|------------|-------------------------------------|
661    /// | none       | none       | dst cleared to `0x00000000`         |
662    /// | none       | set        | dst ← background                    |
663    /// | set        | none       | masks drawn over cleared dst        |
664    /// | set        | set        | masks drawn over background         |
665    ///
666    /// Each backend implements this with its native primitives: G2D uses
667    /// `g2d_clear` / `g2d_blit`, OpenGL uses `glClear` / DMA-BUF GPU blit
668    /// plus the mask program, and CPU uses direct buffer fill / memcpy as
669    /// the terminal fallback. CPU-memcpy of DMA buffers is avoided on the
670    /// accelerated paths.
671    ///
672    /// An empty `segmentation` slice is valid — only bounding boxes are drawn.
673    ///
674    /// `overlay` controls compositing: `background` is the compositing source
675    /// (must match `dst` in size and format); `opacity` scales mask alpha.
676    ///
677    /// # Buffer aliasing
678    ///
679    /// `dst` and `overlay.background` must reference **distinct underlying
680    /// buffers**. An aliased pair returns [`Error::AliasedBuffers`] without
681    /// dispatching to any backend — the GL path would otherwise read and
682    /// write the same texture in a single draw, which is undefined behaviour
683    /// on most drivers. Aliasing is detected via
684    /// [`TensorDyn::aliases`](edgefirst_tensor::TensorDyn::aliases), which
685    /// catches both shared-allocation clones and separate imports over the
686    /// same dmabuf fd.
687    ///
688    /// # Migration from v0.16.3 and earlier
689    ///
690    /// Prior to v0.16.4 the call silently preserved `dst`'s contents on empty
691    /// detections. That invariant no longer holds — `dst` is always fully
692    /// written. Callers who pre-loaded an image into `dst` before calling this
693    /// function must now pass that image via `overlay.background` instead.
694    fn draw_decoded_masks(
695        &mut self,
696        dst: &mut TensorDyn,
697        detect: &[DetectBox],
698        segmentation: &[Segmentation],
699        overlay: MaskOverlay<'_>,
700    ) -> Result<()>;
701
702    /// Draw masks from proto data onto image (fused decode+draw).
703    ///
704    /// For YOLO segmentation models, this avoids materializing intermediate
705    /// `Array3<u8>` masks. The `ProtoData` contains mask coefficients and the
706    /// prototype tensor; the renderer computes `mask_coeff @ protos` directly
707    /// at the output resolution using bilinear sampling.
708    ///
709    /// `detect` and `proto_data.mask_coefficients` must have the same length
710    /// (enforced by zip — excess entries are silently ignored). An empty
711    /// `detect` slice is valid and produces the base frame — cleared or
712    /// background-blitted — via the selected backend's native primitive.
713    ///
714    /// # Format requirements and output contract
715    ///
716    /// Same as [`draw_decoded_masks`](Self::draw_decoded_masks), including
717    /// the "always fully writes dst" guarantee across all four
718    /// detection/background combinations.
719    ///
720    /// `overlay` controls compositing — see [`draw_decoded_masks`](Self::draw_decoded_masks).
721    fn draw_proto_masks(
722        &mut self,
723        dst: &mut TensorDyn,
724        detect: &[DetectBox],
725        proto_data: &ProtoData,
726        overlay: MaskOverlay<'_>,
727    ) -> Result<()>;
728
729    /// Sets the colors used for rendering segmentation masks. Up to 20 colors
730    /// can be set.
731    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()>;
732}
733
734/// Configuration for [`ImageProcessor`] construction.
735///
736/// Use with [`ImageProcessor::with_config`] to override the default EGL
737/// display auto-detection and backend selection. The default configuration
738/// preserves the existing auto-detection behaviour.
739#[derive(Debug, Clone, Default)]
740pub struct ImageProcessorConfig {
741    /// Force OpenGL to use this EGL display type instead of auto-detecting.
742    ///
743    /// When `None`, the processor probes displays in priority order: GBM,
744    /// PlatformDevice, Default. Use [`probe_egl_displays`] to discover
745    /// which displays are available on the current system.
746    ///
747    /// Ignored when `EDGEFIRST_DISABLE_GL=1` is set.
748    #[cfg(target_os = "linux")]
749    #[cfg(feature = "opengl")]
750    pub egl_display: Option<EglDisplayKind>,
751
752    /// Preferred compute backend.
753    ///
754    /// When set to a specific backend (not [`ComputeBackend::Auto`]), the
755    /// processor initializes that backend with no fallback — returns an error if the conversion is not supported.
756    /// This takes precedence over `EDGEFIRST_FORCE_BACKEND` and the
757    /// `EDGEFIRST_DISABLE_*` environment variables.
758    ///
759    /// - [`ComputeBackend::OpenGl`]: init OpenGL + CPU, skip G2D
760    /// - [`ComputeBackend::G2d`]: init G2D + CPU, skip OpenGL
761    /// - [`ComputeBackend::Cpu`]: init CPU only
762    /// - [`ComputeBackend::Auto`]: existing env-var-driven selection
763    pub backend: ComputeBackend,
764}
765
766/// Compute backend selection for [`ImageProcessor`].
767///
768/// Use with [`ImageProcessorConfig::backend`] to select which backend the
769/// processor should prefer. When a specific backend is selected, the
770/// processor initializes that backend plus CPU as a fallback. When `Auto`
771/// is used, the existing environment-variable-driven selection applies.
772#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
773pub enum ComputeBackend {
774    /// Auto-detect based on available hardware and environment variables.
775    #[default]
776    Auto,
777    /// CPU-only processing (no hardware acceleration).
778    Cpu,
779    /// Prefer G2D hardware blitter (+ CPU fallback).
780    G2d,
781    /// Prefer OpenGL ES (+ CPU fallback).
782    OpenGl,
783}
784
785/// Backend forced via the `EDGEFIRST_FORCE_BACKEND` environment variable
786/// or [`ImageProcessorConfig::backend`].
787///
788/// When set, the [`ImageProcessor`] only initializes and dispatches to the
789/// selected backend — no fallback chain is used.
790#[derive(Debug, Clone, Copy, PartialEq, Eq)]
791pub(crate) enum ForcedBackend {
792    Cpu,
793    G2d,
794    OpenGl,
795}
796
797/// Image converter that uses available hardware acceleration or CPU as a
798/// fallback.
799#[derive(Debug)]
800pub struct ImageProcessor {
801    /// CPU-based image converter as a fallback. This is only None if the
802    /// EDGEFIRST_DISABLE_CPU environment variable is set.
803    pub cpu: Option<CPUProcessor>,
804
805    #[cfg(target_os = "linux")]
806    /// G2D-based image converter for Linux systems. This is only available if
807    /// the EDGEFIRST_DISABLE_G2D environment variable is not set and libg2d.so
808    /// is available.
809    pub g2d: Option<G2DProcessor>,
810    #[cfg(target_os = "linux")]
811    #[cfg(feature = "opengl")]
812    /// OpenGL-based image converter for Linux systems. This is only available
813    /// if the EDGEFIRST_DISABLE_GL environment variable is not set and OpenGL
814    /// ES is available.
815    pub opengl: Option<GLProcessorThreaded>,
816
817    /// When set, only the specified backend is used — no fallback chain.
818    pub(crate) forced_backend: Option<ForcedBackend>,
819}
820
821unsafe impl Send for ImageProcessor {}
822unsafe impl Sync for ImageProcessor {}
823
824impl ImageProcessor {
825    /// Creates a new `ImageProcessor` instance, initializing available
826    /// hardware converters based on the system capabilities and environment
827    /// variables.
828    ///
829    /// # Examples
830    /// ```rust
831    /// # use edgefirst_image::{ImageProcessor, Rotation, Flip, Crop, ImageProcessorTrait, load_image};
832    /// # use edgefirst_tensor::{PixelFormat, DType, TensorDyn};
833    /// # fn main() -> Result<(), edgefirst_image::Error> {
834    /// let image = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/../../testdata/zidane.jpg"));
835    /// let src = load_image(image, Some(PixelFormat::Rgba), None)?;
836    /// let mut converter = ImageProcessor::new()?;
837    /// let mut dst = converter.create_image(640, 480, PixelFormat::Rgb, DType::U8, None)?;
838    /// converter.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())?;
839    /// # Ok(())
840    /// # }
841    /// ```
842    pub fn new() -> Result<Self> {
843        Self::with_config(ImageProcessorConfig::default())
844    }
845
846    /// Creates a new `ImageProcessor` with the given configuration.
847    ///
848    /// When [`ImageProcessorConfig::backend`] is set to a specific backend,
849    /// environment variables are ignored and the processor initializes the
850    /// requested backend plus CPU as a fallback.
851    ///
852    /// When `Auto`, the existing `EDGEFIRST_FORCE_BACKEND` and
853    /// `EDGEFIRST_DISABLE_*` environment variables apply.
854    #[allow(unused_variables)]
855    pub fn with_config(config: ImageProcessorConfig) -> Result<Self> {
856        // ── Config-driven backend selection ──────────────────────────
857        // When the caller explicitly requests a backend via the config,
858        // skip all environment variable logic.
859        match config.backend {
860            ComputeBackend::Cpu => {
861                log::info!("ComputeBackend::Cpu — CPU only");
862                return Ok(Self {
863                    cpu: Some(CPUProcessor::new()),
864                    #[cfg(target_os = "linux")]
865                    g2d: None,
866                    #[cfg(target_os = "linux")]
867                    #[cfg(feature = "opengl")]
868                    opengl: None,
869                    forced_backend: None,
870                });
871            }
872            ComputeBackend::G2d => {
873                log::info!("ComputeBackend::G2d — G2D + CPU fallback");
874                #[cfg(target_os = "linux")]
875                {
876                    let g2d = match G2DProcessor::new() {
877                        Ok(g) => Some(g),
878                        Err(e) => {
879                            log::warn!("G2D requested but failed to initialize: {e:?}");
880                            None
881                        }
882                    };
883                    return Ok(Self {
884                        cpu: Some(CPUProcessor::new()),
885                        g2d,
886                        #[cfg(feature = "opengl")]
887                        opengl: None,
888                        forced_backend: None,
889                    });
890                }
891                #[cfg(not(target_os = "linux"))]
892                {
893                    log::warn!("G2D requested but not available on this platform, using CPU");
894                    return Ok(Self {
895                        cpu: Some(CPUProcessor::new()),
896                        forced_backend: None,
897                    });
898                }
899            }
900            ComputeBackend::OpenGl => {
901                log::info!("ComputeBackend::OpenGl — OpenGL + CPU fallback");
902                #[cfg(target_os = "linux")]
903                {
904                    #[cfg(feature = "opengl")]
905                    let opengl = match GLProcessorThreaded::new(config.egl_display) {
906                        Ok(gl) => Some(gl),
907                        Err(e) => {
908                            log::warn!("OpenGL requested but failed to initialize: {e:?}");
909                            None
910                        }
911                    };
912                    return Ok(Self {
913                        cpu: Some(CPUProcessor::new()),
914                        g2d: None,
915                        #[cfg(feature = "opengl")]
916                        opengl,
917                        forced_backend: None,
918                    });
919                }
920                #[cfg(not(target_os = "linux"))]
921                {
922                    log::warn!("OpenGL requested but not available on this platform, using CPU");
923                    return Ok(Self {
924                        cpu: Some(CPUProcessor::new()),
925                        forced_backend: None,
926                    });
927                }
928            }
929            ComputeBackend::Auto => { /* fall through to env-var logic below */ }
930        }
931
932        // ── EDGEFIRST_FORCE_BACKEND ──────────────────────────────────
933        // When set, only the requested backend is initialised and no
934        // fallback chain is used. Accepted values (case-insensitive):
935        //   "cpu", "g2d", "opengl"
936        if let Ok(val) = std::env::var("EDGEFIRST_FORCE_BACKEND") {
937            let val_lower = val.to_lowercase();
938            let forced = match val_lower.as_str() {
939                "cpu" => ForcedBackend::Cpu,
940                "g2d" => ForcedBackend::G2d,
941                "opengl" => ForcedBackend::OpenGl,
942                other => {
943                    return Err(Error::ForcedBackendUnavailable(format!(
944                        "unknown EDGEFIRST_FORCE_BACKEND value: {other:?} (expected cpu, g2d, or opengl)"
945                    )));
946                }
947            };
948
949            log::info!("EDGEFIRST_FORCE_BACKEND={val} — only initializing {val_lower} backend");
950
951            return match forced {
952                ForcedBackend::Cpu => Ok(Self {
953                    cpu: Some(CPUProcessor::new()),
954                    #[cfg(target_os = "linux")]
955                    g2d: None,
956                    #[cfg(target_os = "linux")]
957                    #[cfg(feature = "opengl")]
958                    opengl: None,
959                    forced_backend: Some(ForcedBackend::Cpu),
960                }),
961                ForcedBackend::G2d => {
962                    #[cfg(target_os = "linux")]
963                    {
964                        let g2d = G2DProcessor::new().map_err(|e| {
965                            Error::ForcedBackendUnavailable(format!(
966                                "g2d forced but failed to initialize: {e:?}"
967                            ))
968                        })?;
969                        Ok(Self {
970                            cpu: None,
971                            g2d: Some(g2d),
972                            #[cfg(feature = "opengl")]
973                            opengl: None,
974                            forced_backend: Some(ForcedBackend::G2d),
975                        })
976                    }
977                    #[cfg(not(target_os = "linux"))]
978                    {
979                        Err(Error::ForcedBackendUnavailable(
980                            "g2d backend is only available on Linux".into(),
981                        ))
982                    }
983                }
984                ForcedBackend::OpenGl => {
985                    #[cfg(target_os = "linux")]
986                    #[cfg(feature = "opengl")]
987                    {
988                        let opengl = GLProcessorThreaded::new(config.egl_display).map_err(|e| {
989                            Error::ForcedBackendUnavailable(format!(
990                                "opengl forced but failed to initialize: {e:?}"
991                            ))
992                        })?;
993                        Ok(Self {
994                            cpu: None,
995                            g2d: None,
996                            opengl: Some(opengl),
997                            forced_backend: Some(ForcedBackend::OpenGl),
998                        })
999                    }
1000                    #[cfg(not(all(target_os = "linux", feature = "opengl")))]
1001                    {
1002                        Err(Error::ForcedBackendUnavailable(
1003                            "opengl backend requires Linux with the 'opengl' feature enabled"
1004                                .into(),
1005                        ))
1006                    }
1007                }
1008            };
1009        }
1010
1011        // ── Existing DISABLE logic (unchanged) ──────────────────────
1012        #[cfg(target_os = "linux")]
1013        let g2d = if std::env::var("EDGEFIRST_DISABLE_G2D")
1014            .map(|x| x != "0" && x.to_lowercase() != "false")
1015            .unwrap_or(false)
1016        {
1017            log::debug!("EDGEFIRST_DISABLE_G2D is set");
1018            None
1019        } else {
1020            match G2DProcessor::new() {
1021                Ok(g2d_converter) => Some(g2d_converter),
1022                Err(err) => {
1023                    log::warn!("Failed to initialize G2D converter: {err:?}");
1024                    None
1025                }
1026            }
1027        };
1028
1029        #[cfg(target_os = "linux")]
1030        #[cfg(feature = "opengl")]
1031        let opengl = if std::env::var("EDGEFIRST_DISABLE_GL")
1032            .map(|x| x != "0" && x.to_lowercase() != "false")
1033            .unwrap_or(false)
1034        {
1035            log::debug!("EDGEFIRST_DISABLE_GL is set");
1036            None
1037        } else {
1038            match GLProcessorThreaded::new(config.egl_display) {
1039                Ok(gl_converter) => Some(gl_converter),
1040                Err(err) => {
1041                    log::warn!("Failed to initialize GL converter: {err:?}");
1042                    None
1043                }
1044            }
1045        };
1046
1047        let cpu = if std::env::var("EDGEFIRST_DISABLE_CPU")
1048            .map(|x| x != "0" && x.to_lowercase() != "false")
1049            .unwrap_or(false)
1050        {
1051            log::debug!("EDGEFIRST_DISABLE_CPU is set");
1052            None
1053        } else {
1054            Some(CPUProcessor::new())
1055        };
1056        Ok(Self {
1057            cpu,
1058            #[cfg(target_os = "linux")]
1059            g2d,
1060            #[cfg(target_os = "linux")]
1061            #[cfg(feature = "opengl")]
1062            opengl,
1063            forced_backend: None,
1064        })
1065    }
1066
1067    /// Sets the interpolation mode for int8 proto textures on the OpenGL
1068    /// backend. No-op if OpenGL is not available.
1069    #[cfg(target_os = "linux")]
1070    #[cfg(feature = "opengl")]
1071    pub fn set_int8_interpolation_mode(&mut self, mode: Int8InterpolationMode) -> Result<()> {
1072        if let Some(ref mut gl) = self.opengl {
1073            gl.set_int8_interpolation_mode(mode)?;
1074        }
1075        Ok(())
1076    }
1077
1078    /// Create a [`TensorDyn`] image with the best available memory backend.
1079    ///
1080    /// Priority: DMA-buf → PBO (byte-sized types: u8, i8) → system memory.
1081    ///
1082    /// Use this method instead of [`TensorDyn::image()`] when the tensor will
1083    /// be used with [`ImageProcessor::convert()`]. It selects the optimal
1084    /// memory backing (including PBO for GPU zero-copy) which direct
1085    /// allocation cannot achieve.
1086    ///
1087    /// This method is on [`ImageProcessor`] rather than [`ImageProcessorTrait`]
1088    /// because optimal allocation requires knowledge of the active compute
1089    /// backends (e.g. the GL context handle for PBO allocation). Individual
1090    /// backend implementations ([`CPUProcessor`], etc.) do not have this
1091    /// cross-backend visibility.
1092    ///
1093    /// # Arguments
1094    ///
1095    /// * `width` - Image width in pixels
1096    /// * `height` - Image height in pixels
1097    /// * `format` - Pixel format
1098    /// * `dtype` - Element data type (e.g. `DType::U8`, `DType::I8`)
1099    /// * `memory` - Optional memory type override; when `None`, the best
1100    ///   available backend is selected automatically.
1101    ///
1102    /// # Returns
1103    ///
1104    /// A [`TensorDyn`] backed by the highest-performance memory type
1105    /// available on this system.
1106    ///
1107    /// # Pitch alignment for DMA-backed allocations
1108    ///
1109    /// DMA-BUF imports into the GL backend (Mali Valhall on i.MX 95
1110    /// specifically) require every row pitch to be a multiple of
1111    /// [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`] (currently 64). When this
1112    /// method lands on `TensorMemory::Dma`, the underlying allocation is
1113    /// silently padded so the row stride satisfies that requirement.
1114    ///
1115    /// **The user-requested `width` is preserved** — `tensor.width()`
1116    /// returns the same value you passed in. The padding is carried by
1117    /// [`TensorDyn::row_stride`] / `effective_row_stride()`, which the
1118    /// GL backend reads when importing the buffer as an EGLImage.
1119    /// Callers that compute byte offsets from the tensor must use the
1120    /// stride, not `width × bytes_per_pixel`; the CPU mapping spans the
1121    /// full `stride × height` bytes.
1122    ///
1123    /// Pre-aligned widths (640, 1280, 1920, 3008, 3840 …) allocate
1124    /// exactly `width × bpp × height` bytes with no padding. PBO and
1125    /// Mem fallbacks never pad — they don't go through EGLImage import.
1126    ///
1127    /// See also [`align_width_for_gpu_pitch`] for an advisory helper
1128    /// that external callers (GStreamer plugins, video pipelines) can
1129    /// use to size their own DMA-BUFs for GL compatibility.
1130    ///
1131    /// # Errors
1132    ///
1133    /// Returns an error if all allocation strategies fail.
1134    pub fn create_image(
1135        &self,
1136        width: usize,
1137        height: usize,
1138        format: PixelFormat,
1139        dtype: DType,
1140        memory: Option<TensorMemory>,
1141    ) -> Result<TensorDyn> {
1142        // Compute the GPU-aligned row stride in bytes for this image.
1143        // `None` means either the format has no defined primary-plane bpp
1144        // (unknown future layout) or the stride calculation would overflow
1145        // — in both cases we fall back to the natural layout via the plain
1146        // `TensorDyn::image` constructor, and the slow-path warning inside
1147        // `draw_*_masks` will fire if the subsequent GL import fails.
1148        //
1149        // DMA allocation is Linux-only (see `TensorMemory::Dma` cfg gate),
1150        // so both the stride computation and the helper closure are gated
1151        // accordingly — the callers below are already Linux-only.
1152        #[cfg(target_os = "linux")]
1153        let dma_stride_bytes: Option<usize> = primary_plane_bpp(format, dtype.size())
1154            .and_then(|bpp| width.checked_mul(bpp))
1155            .and_then(align_pitch_bytes_to_gpu_alignment);
1156
1157        // Helper: allocate a DMA image, using the padded-stride constructor
1158        // when the computed stride exceeds the natural pitch, otherwise the
1159        // plain constructor (byte-identical result in the common case).
1160        #[cfg(target_os = "linux")]
1161        let try_dma = || -> Result<TensorDyn> {
1162            // Stride padding is only meaningful for packed pixel layouts
1163            // (RGBA8, BGRA8, RGB888, Grey) — the formats the GL backend
1164            // renders into. Semi-planar (NV12, NV16) and planar (PlanarRgb,
1165            // PlanarRgba) tensors go through `TensorDyn::image(...)` with
1166            // their natural layout; they're imported from camera capture
1167            // via `from_fd` far more often than allocated here, and
1168            // `Tensor::image_with_stride` explicitly rejects them.
1169            let packed = format.layout() == edgefirst_tensor::PixelLayout::Packed;
1170            match dma_stride_bytes {
1171                Some(stride)
1172                    if packed
1173                        && primary_plane_bpp(format, dtype.size())
1174                            .and_then(|bpp| width.checked_mul(bpp))
1175                            .is_some_and(|natural| stride > natural) =>
1176                {
1177                    log::debug!(
1178                        "create_image: padding row stride for {format:?} {width}x{height} \
1179                         from natural pitch to {stride} bytes for GPU alignment"
1180                    );
1181                    Ok(TensorDyn::image_with_stride(
1182                        width,
1183                        height,
1184                        format,
1185                        dtype,
1186                        stride,
1187                        Some(edgefirst_tensor::TensorMemory::Dma),
1188                    )?)
1189                }
1190                _ => Ok(TensorDyn::image(
1191                    width,
1192                    height,
1193                    format,
1194                    dtype,
1195                    Some(edgefirst_tensor::TensorMemory::Dma),
1196                )?),
1197            }
1198        };
1199
1200        // If an explicit memory type is requested, honour it directly.
1201        // On Linux, `TensorMemory::Dma` gets the padded-stride treatment;
1202        // other memory types take the user-requested width verbatim.
1203        match memory {
1204            #[cfg(target_os = "linux")]
1205            Some(TensorMemory::Dma) => {
1206                return try_dma();
1207            }
1208            Some(mem) => {
1209                return Ok(TensorDyn::image(width, height, format, dtype, Some(mem))?);
1210            }
1211            None => {}
1212        }
1213
1214        // Try DMA first on Linux — skip only when GL has explicitly selected PBO
1215        // as the preferred transfer path (PBO is better than DMA in that case).
1216        #[cfg(target_os = "linux")]
1217        {
1218            #[cfg(feature = "opengl")]
1219            let gl_uses_pbo = self
1220                .opengl
1221                .as_ref()
1222                .is_some_and(|gl| gl.transfer_backend() == opengl_headless::TransferBackend::Pbo);
1223            #[cfg(not(feature = "opengl"))]
1224            let gl_uses_pbo = false;
1225
1226            if !gl_uses_pbo {
1227                if let Ok(img) = try_dma() {
1228                    return Ok(img);
1229                }
1230            }
1231        }
1232
1233        // Try PBO (if GL available).
1234        // PBO buffers are u8-sized; the int8 shader emulates i8 output via
1235        // XOR 0x80 on the same underlying buffer, so both U8 and I8 work.
1236        #[cfg(target_os = "linux")]
1237        #[cfg(feature = "opengl")]
1238        if dtype.size() == 1 {
1239            if let Some(gl) = &self.opengl {
1240                match gl.create_pbo_image(width, height, format) {
1241                    Ok(t) => {
1242                        if dtype == DType::I8 {
1243                            // SAFETY: Tensor<u8> and Tensor<i8> are layout-
1244                            // identical (same element size, no T-dependent
1245                            // drop glue). The int8 shader applies XOR 0x80
1246                            // on the same PBO buffer. Same rationale as
1247                            // gl::processor::tensor_i8_as_u8_mut.
1248                            // Invariant: PBO tensors never have chroma
1249                            // (create_pbo_image → Tensor::wrap sets it None).
1250                            debug_assert!(
1251                                t.chroma().is_none(),
1252                                "PBO i8 transmute requires chroma == None"
1253                            );
1254                            let t_i8: Tensor<i8> = unsafe { std::mem::transmute(t) };
1255                            return Ok(TensorDyn::from(t_i8));
1256                        }
1257                        return Ok(TensorDyn::from(t));
1258                    }
1259                    Err(e) => log::debug!("PBO image creation failed, falling back to Mem: {e:?}"),
1260                }
1261            }
1262        }
1263
1264        // Fallback to Mem
1265        Ok(TensorDyn::image(
1266            width,
1267            height,
1268            format,
1269            dtype,
1270            Some(edgefirst_tensor::TensorMemory::Mem),
1271        )?)
1272    }
1273
1274    /// Import an external DMA-BUF image.
1275    ///
1276    /// Each [`PlaneDescriptor`] owns an already-duped fd; this method
1277    /// consumes the descriptors and takes ownership of those fds (whether
1278    /// the call succeeds or fails).
1279    ///
1280    /// The caller must ensure the DMA-BUF allocation is large enough for the
1281    /// specified width, height, format, and any stride/offset on the plane
1282    /// descriptors. No buffer-size validation is performed; an undersized
1283    /// buffer may cause GPU faults or EGL import failure.
1284    ///
1285    /// # Arguments
1286    ///
1287    /// * `image` - Plane descriptor for the primary (or only) plane
1288    /// * `chroma` - Optional plane descriptor for the UV chroma plane
1289    ///   (required for multiplane NV12)
1290    /// * `width` - Image width in pixels
1291    /// * `height` - Image height in pixels
1292    /// * `format` - Pixel format of the buffer
1293    /// * `dtype` - Element data type (e.g. `DType::U8`)
1294    ///
1295    /// # Returns
1296    ///
1297    /// A `TensorDyn` configured as an image.
1298    ///
1299    /// # Errors
1300    ///
1301    /// * [`Error::NotSupported`] if `chroma` is `Some` for a non-semi-planar
1302    ///   format, or multiplane NV16 (not yet supported), or the fd is not
1303    ///   DMA-backed
1304    /// * [`Error::InvalidShape`] if NV12 height is odd
1305    ///
1306    /// # Platform
1307    ///
1308    /// Linux only.
1309    ///
1310    /// # Examples
1311    ///
1312    /// ```rust,ignore
1313    /// use edgefirst_tensor::PlaneDescriptor;
1314    ///
1315    /// // Single-plane RGBA
1316    /// let pd = PlaneDescriptor::new(fd.as_fd())?;
1317    /// let src = proc.import_image(pd, None, 1920, 1080, PixelFormat::Rgba, DType::U8)?;
1318    ///
1319    /// // Multi-plane NV12 with stride
1320    /// let y_pd = PlaneDescriptor::new(y_fd.as_fd())?.with_stride(2048);
1321    /// let uv_pd = PlaneDescriptor::new(uv_fd.as_fd())?.with_stride(2048);
1322    /// let src = proc.import_image(y_pd, Some(uv_pd), 1920, 1080,
1323    ///                             PixelFormat::Nv12, DType::U8)?;
1324    /// ```
1325    #[cfg(target_os = "linux")]
1326    pub fn import_image(
1327        &self,
1328        image: edgefirst_tensor::PlaneDescriptor,
1329        chroma: Option<edgefirst_tensor::PlaneDescriptor>,
1330        width: usize,
1331        height: usize,
1332        format: PixelFormat,
1333        dtype: DType,
1334    ) -> Result<TensorDyn> {
1335        use edgefirst_tensor::{Tensor, TensorMemory};
1336
1337        // Capture stride/offset from descriptors before consuming them
1338        let image_stride = image.stride();
1339        let image_offset = image.offset();
1340        let chroma_stride = chroma.as_ref().and_then(|c| c.stride());
1341        let chroma_offset = chroma.as_ref().and_then(|c| c.offset());
1342
1343        if let Some(chroma_pd) = chroma {
1344            // ── Multiplane path ──────────────────────────────────────
1345            // Multiplane tensors are backed by Tensor<u8> (or transmuted to
1346            // Tensor<i8>). Reject other dtypes to avoid silently returning a
1347            // tensor with the wrong element type.
1348            if dtype != DType::U8 && dtype != DType::I8 {
1349                return Err(Error::NotSupported(format!(
1350                    "multiplane import only supports U8/I8, got {dtype:?}"
1351                )));
1352            }
1353            if format.layout() != PixelLayout::SemiPlanar {
1354                return Err(Error::NotSupported(format!(
1355                    "import_image with chroma requires a semi-planar format, got {format:?}"
1356                )));
1357            }
1358
1359            let chroma_h = match format {
1360                PixelFormat::Nv12 => {
1361                    if !height.is_multiple_of(2) {
1362                        return Err(Error::InvalidShape(format!(
1363                            "NV12 requires even height, got {height}"
1364                        )));
1365                    }
1366                    height / 2
1367                }
1368                // NV16 multiplane will be supported in a future release;
1369                // the GL backend currently only handles NV12 plane1 attributes.
1370                PixelFormat::Nv16 => {
1371                    return Err(Error::NotSupported(
1372                        "multiplane NV16 is not yet supported; use contiguous NV16 instead".into(),
1373                    ))
1374                }
1375                _ => {
1376                    return Err(Error::NotSupported(format!(
1377                        "unsupported semi-planar format: {format:?}"
1378                    )))
1379                }
1380            };
1381
1382            let luma = Tensor::<u8>::from_fd(image.into_fd(), &[height, width], Some("luma"))?;
1383            if luma.memory() != TensorMemory::Dma {
1384                return Err(Error::NotSupported(format!(
1385                    "luma fd must be DMA-backed, got {:?}",
1386                    luma.memory()
1387                )));
1388            }
1389
1390            let chroma_tensor =
1391                Tensor::<u8>::from_fd(chroma_pd.into_fd(), &[chroma_h, width], Some("chroma"))?;
1392            if chroma_tensor.memory() != TensorMemory::Dma {
1393                return Err(Error::NotSupported(format!(
1394                    "chroma fd must be DMA-backed, got {:?}",
1395                    chroma_tensor.memory()
1396                )));
1397            }
1398
1399            // from_planes creates the combined tensor with format set,
1400            // preserving luma's row_stride (currently None since luma was raw).
1401            let mut tensor = Tensor::<u8>::from_planes(luma, chroma_tensor, format)?;
1402
1403            // Apply stride/offset to the combined tensor (luma plane)
1404            if let Some(s) = image_stride {
1405                tensor.set_row_stride(s)?;
1406            }
1407            if let Some(o) = image_offset {
1408                tensor.set_plane_offset(o);
1409            }
1410
1411            // Apply stride/offset to the chroma sub-tensor.
1412            // The chroma tensor is a raw 2D [chroma_h, width] tensor without
1413            // format metadata, so we validate stride manually rather than
1414            // using set_row_stride (which requires format).
1415            if let Some(chroma_ref) = tensor.chroma_mut() {
1416                if let Some(s) = chroma_stride {
1417                    if s < width {
1418                        return Err(Error::InvalidShape(format!(
1419                            "chroma stride {s} < minimum {width} for {format:?}"
1420                        )));
1421                    }
1422                    chroma_ref.set_row_stride_unchecked(s);
1423                }
1424                if let Some(o) = chroma_offset {
1425                    chroma_ref.set_plane_offset(o);
1426                }
1427            }
1428
1429            if dtype == DType::I8 {
1430                // SAFETY: Tensor<u8> and Tensor<i8> have identical layout because
1431                // the struct contains only type-erased storage (OwnedFd, shape, name),
1432                // no inline T values. This assertion catches layout drift at compile time.
1433                const {
1434                    assert!(std::mem::size_of::<Tensor<u8>>() == std::mem::size_of::<Tensor<i8>>());
1435                    assert!(
1436                        std::mem::align_of::<Tensor<u8>>() == std::mem::align_of::<Tensor<i8>>()
1437                    );
1438                }
1439                let tensor_i8: Tensor<i8> = unsafe { std::mem::transmute(tensor) };
1440                return Ok(TensorDyn::from(tensor_i8));
1441            }
1442            Ok(TensorDyn::from(tensor))
1443        } else {
1444            // ── Single-plane path ────────────────────────────────────
1445            let shape = match format.layout() {
1446                PixelLayout::Packed => vec![height, width, format.channels()],
1447                PixelLayout::Planar => vec![format.channels(), height, width],
1448                PixelLayout::SemiPlanar => {
1449                    let total_h = match format {
1450                        PixelFormat::Nv12 => {
1451                            if !height.is_multiple_of(2) {
1452                                return Err(Error::InvalidShape(format!(
1453                                    "NV12 requires even height, got {height}"
1454                                )));
1455                            }
1456                            height * 3 / 2
1457                        }
1458                        PixelFormat::Nv16 => height * 2,
1459                        _ => {
1460                            return Err(Error::InvalidShape(format!(
1461                                "unknown semi-planar height multiplier for {format:?}"
1462                            )))
1463                        }
1464                    };
1465                    vec![total_h, width]
1466                }
1467                _ => {
1468                    return Err(Error::NotSupported(format!(
1469                        "unsupported pixel layout for import_image: {:?}",
1470                        format.layout()
1471                    )));
1472                }
1473            };
1474            let tensor = TensorDyn::from_fd(image.into_fd(), &shape, dtype, None)?;
1475            if tensor.memory() != TensorMemory::Dma {
1476                return Err(Error::NotSupported(format!(
1477                    "import_image requires DMA-backed fd, got {:?}",
1478                    tensor.memory()
1479                )));
1480            }
1481            let mut tensor = tensor.with_format(format)?;
1482            if let Some(s) = image_stride {
1483                tensor.set_row_stride(s)?;
1484            }
1485            if let Some(o) = image_offset {
1486                tensor.set_plane_offset(o);
1487            }
1488            Ok(tensor)
1489        }
1490    }
1491
1492    /// Decode model outputs and draw segmentation masks onto `dst`.
1493    ///
1494    /// This is the primary mask rendering API. The processor decodes via the
1495    /// provided [`Decoder`], selects the optimal rendering path (hybrid
1496    /// CPU+GL or fused GPU), and composites masks onto `dst`.
1497    ///
1498    /// Returns the detected bounding boxes.
1499    pub fn draw_masks(
1500        &mut self,
1501        decoder: &edgefirst_decoder::Decoder,
1502        outputs: &[&TensorDyn],
1503        dst: &mut TensorDyn,
1504        overlay: MaskOverlay<'_>,
1505    ) -> Result<Vec<DetectBox>> {
1506        let mut output_boxes = Vec::with_capacity(100);
1507
1508        // Try proto path first (fused rendering without materializing masks)
1509        let proto_result = decoder
1510            .decode_proto(outputs, &mut output_boxes)
1511            .map_err(|e| Error::Internal(format!("decode_proto: {e:#?}")))?;
1512
1513        if let Some(proto_data) = proto_result {
1514            self.draw_proto_masks(dst, &output_boxes, &proto_data, overlay)?;
1515        } else {
1516            // Detection-only or unsupported model: full decode + render
1517            let mut output_masks = Vec::with_capacity(100);
1518            decoder
1519                .decode(outputs, &mut output_boxes, &mut output_masks)
1520                .map_err(|e| Error::Internal(format!("decode: {e:#?}")))?;
1521            self.draw_decoded_masks(dst, &output_boxes, &output_masks, overlay)?;
1522        }
1523        Ok(output_boxes)
1524    }
1525
1526    /// Decode tracked model outputs and draw segmentation masks onto `dst`.
1527    ///
1528    /// Like [`draw_masks`](Self::draw_masks) but integrates a tracker for
1529    /// maintaining object identities across frames. The tracker runs after
1530    /// NMS but before mask extraction.
1531    ///
1532    /// Returns detected boxes and track info.
1533    #[cfg(feature = "tracker")]
1534    pub fn draw_masks_tracked<TR: edgefirst_tracker::Tracker<DetectBox>>(
1535        &mut self,
1536        decoder: &edgefirst_decoder::Decoder,
1537        tracker: &mut TR,
1538        timestamp: u64,
1539        outputs: &[&TensorDyn],
1540        dst: &mut TensorDyn,
1541        overlay: MaskOverlay<'_>,
1542    ) -> Result<(Vec<DetectBox>, Vec<edgefirst_tracker::TrackInfo>)> {
1543        let mut output_boxes = Vec::with_capacity(100);
1544        let mut output_tracks = Vec::new();
1545
1546        let proto_result = decoder
1547            .decode_proto_tracked(
1548                tracker,
1549                timestamp,
1550                outputs,
1551                &mut output_boxes,
1552                &mut output_tracks,
1553            )
1554            .map_err(|e| Error::Internal(format!("decode_proto_tracked: {e:#?}")))?;
1555
1556        if let Some(proto_data) = proto_result {
1557            self.draw_proto_masks(dst, &output_boxes, &proto_data, overlay)?;
1558        } else {
1559            // Note: decode_proto_tracked returns None for detection-only/ModelPack
1560            // models WITHOUT calling the tracker. The else branch below is the
1561            // first (and only) tracker call for those model types.
1562            let mut output_masks = Vec::with_capacity(100);
1563            decoder
1564                .decode_tracked(
1565                    tracker,
1566                    timestamp,
1567                    outputs,
1568                    &mut output_boxes,
1569                    &mut output_masks,
1570                    &mut output_tracks,
1571                )
1572                .map_err(|e| Error::Internal(format!("decode_tracked: {e:#?}")))?;
1573            self.draw_decoded_masks(dst, &output_boxes, &output_masks, overlay)?;
1574        }
1575        Ok((output_boxes, output_tracks))
1576    }
1577
1578    /// Materialize per-instance segmentation masks from raw prototype data.
1579    ///
1580    /// Computes `mask_coeff @ protos` with sigmoid activation for each detection,
1581    /// producing compact masks at prototype resolution (e.g., 160×160 crops).
1582    /// Mask values are continuous sigmoid confidence outputs quantized to u8
1583    /// (0 = background, 255 = full confidence), NOT binary thresholded.
1584    ///
1585    /// The returned [`Vec<Segmentation>`] can be:
1586    /// - Inspected or exported for analytics, IoU computation, etc.
1587    /// - Passed directly to [`ImageProcessorTrait::draw_decoded_masks`] for
1588    ///   GPU-interpolated rendering.
1589    ///
1590    /// # Performance Note
1591    ///
1592    /// Calling `materialize_masks` + `draw_decoded_masks` separately prevents
1593    /// the HAL from using its internal fused optimization path. For render-only
1594    /// use cases, prefer [`ImageProcessorTrait::draw_proto_masks`] which selects
1595    /// the fastest path automatically (currently 1.6×–27× faster on tested
1596    /// platforms). Use this method when you need access to the intermediate masks.
1597    ///
1598    /// # Errors
1599    ///
1600    /// Returns [`Error::NoConverter`] if the CPU backend is not available.
1601    pub fn materialize_masks(
1602        &self,
1603        detect: &[DetectBox],
1604        proto_data: &ProtoData,
1605        letterbox: Option<[f32; 4]>,
1606    ) -> Result<Vec<Segmentation>> {
1607        let cpu = self.cpu.as_ref().ok_or(Error::NoConverter)?;
1608        cpu.materialize_segmentations(detect, proto_data, letterbox)
1609    }
1610}
1611
1612impl ImageProcessorTrait for ImageProcessor {
1613    /// Converts the source image to the destination image format and size. The
1614    /// image is cropped first, then flipped, then rotated
1615    ///
1616    /// Prefer hardware accelerators when available, falling back to CPU if
1617    /// necessary.
1618    fn convert(
1619        &mut self,
1620        src: &TensorDyn,
1621        dst: &mut TensorDyn,
1622        rotation: Rotation,
1623        flip: Flip,
1624        crop: Crop,
1625    ) -> Result<()> {
1626        let start = Instant::now();
1627        let src_fmt = src.format();
1628        let dst_fmt = dst.format();
1629        log::trace!(
1630            "convert: {src_fmt:?}({:?}/{:?}) → {dst_fmt:?}({:?}/{:?}), \
1631             rotation={rotation:?}, flip={flip:?}, backend={:?}",
1632            src.dtype(),
1633            src.memory(),
1634            dst.dtype(),
1635            dst.memory(),
1636            self.forced_backend,
1637        );
1638
1639        // ── Forced backend: no fallback chain ────────────────────────
1640        if let Some(forced) = self.forced_backend {
1641            return match forced {
1642                ForcedBackend::Cpu => {
1643                    if let Some(cpu) = self.cpu.as_mut() {
1644                        let r = cpu.convert(src, dst, rotation, flip, crop);
1645                        log::trace!(
1646                            "convert: forced=cpu result={} ({:?})",
1647                            if r.is_ok() { "ok" } else { "err" },
1648                            start.elapsed()
1649                        );
1650                        return r;
1651                    }
1652                    Err(Error::ForcedBackendUnavailable("cpu".into()))
1653                }
1654                ForcedBackend::G2d => {
1655                    #[cfg(target_os = "linux")]
1656                    if let Some(g2d) = self.g2d.as_mut() {
1657                        let r = g2d.convert(src, dst, rotation, flip, crop);
1658                        log::trace!(
1659                            "convert: forced=g2d result={} ({:?})",
1660                            if r.is_ok() { "ok" } else { "err" },
1661                            start.elapsed()
1662                        );
1663                        return r;
1664                    }
1665                    Err(Error::ForcedBackendUnavailable("g2d".into()))
1666                }
1667                ForcedBackend::OpenGl => {
1668                    #[cfg(target_os = "linux")]
1669                    #[cfg(feature = "opengl")]
1670                    if let Some(opengl) = self.opengl.as_mut() {
1671                        let r = opengl.convert(src, dst, rotation, flip, crop);
1672                        log::trace!(
1673                            "convert: forced=opengl result={} ({:?})",
1674                            if r.is_ok() { "ok" } else { "err" },
1675                            start.elapsed()
1676                        );
1677                        return r;
1678                    }
1679                    Err(Error::ForcedBackendUnavailable("opengl".into()))
1680                }
1681            };
1682        }
1683
1684        // ── Auto fallback chain: OpenGL → G2D → CPU ──────────────────
1685        #[cfg(target_os = "linux")]
1686        #[cfg(feature = "opengl")]
1687        if let Some(opengl) = self.opengl.as_mut() {
1688            match opengl.convert(src, dst, rotation, flip, crop) {
1689                Ok(_) => {
1690                    log::trace!(
1691                        "convert: auto selected=opengl for {src_fmt:?}→{dst_fmt:?} ({:?})",
1692                        start.elapsed()
1693                    );
1694                    return Ok(());
1695                }
1696                Err(e) => {
1697                    log::trace!("convert: auto opengl declined {src_fmt:?}→{dst_fmt:?}: {e}");
1698                }
1699            }
1700        }
1701
1702        #[cfg(target_os = "linux")]
1703        if let Some(g2d) = self.g2d.as_mut() {
1704            match g2d.convert(src, dst, rotation, flip, crop) {
1705                Ok(_) => {
1706                    log::trace!(
1707                        "convert: auto selected=g2d for {src_fmt:?}→{dst_fmt:?} ({:?})",
1708                        start.elapsed()
1709                    );
1710                    return Ok(());
1711                }
1712                Err(e) => {
1713                    log::trace!("convert: auto g2d declined {src_fmt:?}→{dst_fmt:?}: {e}");
1714                }
1715            }
1716        }
1717
1718        if let Some(cpu) = self.cpu.as_mut() {
1719            match cpu.convert(src, dst, rotation, flip, crop) {
1720                Ok(_) => {
1721                    log::trace!(
1722                        "convert: auto selected=cpu for {src_fmt:?}→{dst_fmt:?} ({:?})",
1723                        start.elapsed()
1724                    );
1725                    return Ok(());
1726                }
1727                Err(e) => {
1728                    log::trace!("convert: auto cpu failed {src_fmt:?}→{dst_fmt:?}: {e}");
1729                    return Err(e);
1730                }
1731            }
1732        }
1733        Err(Error::NoConverter)
1734    }
1735
1736    fn draw_decoded_masks(
1737        &mut self,
1738        dst: &mut TensorDyn,
1739        detect: &[DetectBox],
1740        segmentation: &[Segmentation],
1741        overlay: MaskOverlay<'_>,
1742    ) -> Result<()> {
1743        let start = Instant::now();
1744
1745        if let Some(bg) = overlay.background {
1746            if bg.aliases(dst) {
1747                return Err(Error::AliasedBuffers(
1748                    "background must not reference the same buffer as dst".to_string(),
1749                ));
1750            }
1751        }
1752
1753        // Un-letterbox detect boxes and segmentation bboxes for rendering when
1754        // a letterbox was applied to prepare the model input.
1755        let lb_boxes: Vec<DetectBox>;
1756        let lb_segs: Vec<Segmentation>;
1757        let (detect, segmentation) = if let Some(lb) = overlay.letterbox {
1758            lb_boxes = detect.iter().map(|&d| unletter_bbox(d, lb)).collect();
1759            // Keep segmentation bboxes in sync with the transformed detect boxes
1760            // when we have a 1:1 correspondence (instance segmentation).
1761            lb_segs = if segmentation.len() == lb_boxes.len() {
1762                segmentation
1763                    .iter()
1764                    .zip(lb_boxes.iter())
1765                    .map(|(s, d)| Segmentation {
1766                        xmin: d.bbox.xmin,
1767                        ymin: d.bbox.ymin,
1768                        xmax: d.bbox.xmax,
1769                        ymax: d.bbox.ymax,
1770                        segmentation: s.segmentation.clone(),
1771                    })
1772                    .collect()
1773            } else {
1774                segmentation.to_vec()
1775            };
1776            (lb_boxes.as_slice(), lb_segs.as_slice())
1777        } else {
1778            (detect, segmentation)
1779        };
1780        #[cfg(target_os = "linux")]
1781        let is_empty_frame = detect.is_empty() && segmentation.is_empty();
1782
1783        // ── Forced backend: no fallback chain ────────────────────────
1784        if let Some(forced) = self.forced_backend {
1785            return match forced {
1786                ForcedBackend::Cpu => {
1787                    if let Some(cpu) = self.cpu.as_mut() {
1788                        return cpu.draw_decoded_masks(dst, detect, segmentation, overlay);
1789                    }
1790                    Err(Error::ForcedBackendUnavailable("cpu".into()))
1791                }
1792                ForcedBackend::G2d => {
1793                    // G2D can only produce empty frames (clear / bg blit).
1794                    // For populated frames it has no rasterizer — fail loudly.
1795                    #[cfg(target_os = "linux")]
1796                    if let Some(g2d) = self.g2d.as_mut() {
1797                        return g2d.draw_decoded_masks(dst, detect, segmentation, overlay);
1798                    }
1799                    Err(Error::ForcedBackendUnavailable("g2d".into()))
1800                }
1801                ForcedBackend::OpenGl => {
1802                    // GL handles background natively via GPU blit, and now
1803                    // actively clears when there is no background.
1804                    #[cfg(target_os = "linux")]
1805                    #[cfg(feature = "opengl")]
1806                    if let Some(opengl) = self.opengl.as_mut() {
1807                        return opengl.draw_decoded_masks(dst, detect, segmentation, overlay);
1808                    }
1809                    Err(Error::ForcedBackendUnavailable("opengl".into()))
1810                }
1811            };
1812        }
1813
1814        // ── Auto dispatch ──────────────────────────────────────────
1815        // Empty frames prefer G2D when available — a single g2d_clear or
1816        // g2d_blit is the cheapest HW path to produce the correct output
1817        // and avoids spinning up the GL pipeline every zero-detection
1818        // frame in a triple-buffered display loop.
1819        #[cfg(target_os = "linux")]
1820        if is_empty_frame {
1821            if let Some(g2d) = self.g2d.as_mut() {
1822                match g2d.draw_decoded_masks(dst, detect, segmentation, overlay) {
1823                    Ok(_) => {
1824                        log::trace!(
1825                            "draw_decoded_masks empty frame via g2d in {:?}",
1826                            start.elapsed()
1827                        );
1828                        return Ok(());
1829                    }
1830                    Err(e) => log::trace!("g2d empty-frame path unavailable: {e:?}"),
1831                }
1832            }
1833        }
1834
1835        // Populated frames (or G2D unavailable): GL first, CPU fallback.
1836        // Both backends now own their own base-layer handling (bg blit
1837        // or clear), so we hand the overlay through untouched.
1838        #[cfg(target_os = "linux")]
1839        #[cfg(feature = "opengl")]
1840        if let Some(opengl) = self.opengl.as_mut() {
1841            log::trace!(
1842                "draw_decoded_masks started with opengl in {:?}",
1843                start.elapsed()
1844            );
1845            match opengl.draw_decoded_masks(dst, detect, segmentation, overlay) {
1846                Ok(_) => {
1847                    log::trace!("draw_decoded_masks with opengl in {:?}", start.elapsed());
1848                    return Ok(());
1849                }
1850                Err(e) => {
1851                    log::trace!("draw_decoded_masks didn't work with opengl: {e:?}")
1852                }
1853            }
1854        }
1855
1856        log::trace!(
1857            "draw_decoded_masks started with cpu in {:?}",
1858            start.elapsed()
1859        );
1860        if let Some(cpu) = self.cpu.as_mut() {
1861            match cpu.draw_decoded_masks(dst, detect, segmentation, overlay) {
1862                Ok(_) => {
1863                    log::trace!("draw_decoded_masks with cpu in {:?}", start.elapsed());
1864                    return Ok(());
1865                }
1866                Err(e) => {
1867                    log::trace!("draw_decoded_masks didn't work with cpu: {e:?}");
1868                    return Err(e);
1869                }
1870            }
1871        }
1872        Err(Error::NoConverter)
1873    }
1874
1875    fn draw_proto_masks(
1876        &mut self,
1877        dst: &mut TensorDyn,
1878        detect: &[DetectBox],
1879        proto_data: &ProtoData,
1880        overlay: MaskOverlay<'_>,
1881    ) -> Result<()> {
1882        let start = Instant::now();
1883
1884        if let Some(bg) = overlay.background {
1885            if bg.aliases(dst) {
1886                return Err(Error::AliasedBuffers(
1887                    "background must not reference the same buffer as dst".to_string(),
1888                ));
1889            }
1890        }
1891
1892        // Un-letterbox detect boxes for rendering when a letterbox was applied
1893        // to prepare the model input.  The original `detect` coords are still
1894        // passed to `materialize_segmentations` (which needs model-space coords
1895        // to correctly crop the proto tensor) alongside `overlay.letterbox` so
1896        // it can emit `Segmentation` structs in output-image space.
1897        let lb_boxes: Vec<DetectBox>;
1898        let render_detect = if let Some(lb) = overlay.letterbox {
1899            lb_boxes = detect.iter().map(|&d| unletter_bbox(d, lb)).collect();
1900            lb_boxes.as_slice()
1901        } else {
1902            detect
1903        };
1904        #[cfg(target_os = "linux")]
1905        let is_empty_frame = detect.is_empty();
1906
1907        // ── Forced backend: no fallback chain ────────────────────────
1908        if let Some(forced) = self.forced_backend {
1909            return match forced {
1910                ForcedBackend::Cpu => {
1911                    if let Some(cpu) = self.cpu.as_mut() {
1912                        return cpu.draw_proto_masks(dst, render_detect, proto_data, overlay);
1913                    }
1914                    Err(Error::ForcedBackendUnavailable("cpu".into()))
1915                }
1916                ForcedBackend::G2d => {
1917                    #[cfg(target_os = "linux")]
1918                    if let Some(g2d) = self.g2d.as_mut() {
1919                        return g2d.draw_proto_masks(dst, render_detect, proto_data, overlay);
1920                    }
1921                    Err(Error::ForcedBackendUnavailable("g2d".into()))
1922                }
1923                ForcedBackend::OpenGl => {
1924                    #[cfg(target_os = "linux")]
1925                    #[cfg(feature = "opengl")]
1926                    if let Some(opengl) = self.opengl.as_mut() {
1927                        return opengl.draw_proto_masks(dst, render_detect, proto_data, overlay);
1928                    }
1929                    Err(Error::ForcedBackendUnavailable("opengl".into()))
1930                }
1931            };
1932        }
1933
1934        // ── Auto dispatch ──────────────────────────────────────────
1935        // Empty frames: prefer G2D — cheapest HW path (clear or bg blit).
1936        #[cfg(target_os = "linux")]
1937        if is_empty_frame {
1938            if let Some(g2d) = self.g2d.as_mut() {
1939                match g2d.draw_proto_masks(dst, render_detect, proto_data, overlay) {
1940                    Ok(_) => {
1941                        log::trace!(
1942                            "draw_proto_masks empty frame via g2d in {:?}",
1943                            start.elapsed()
1944                        );
1945                        return Ok(());
1946                    }
1947                    Err(e) => log::trace!("g2d empty-frame path unavailable: {e:?}"),
1948                }
1949            }
1950        }
1951
1952        // Hybrid path: CPU materialize + GL overlay (benchmarked faster than
1953        // full-GPU draw_proto_masks on all tested platforms: 27× on imx8mp,
1954        // 4× on imx95, 2.5× on rpi5, 1.6× on x86).
1955        // GL owns its own bg-blit / glClear — we pass the overlay through.
1956        #[cfg(target_os = "linux")]
1957        #[cfg(feature = "opengl")]
1958        if let Some(opengl) = self.opengl.as_mut() {
1959            let Some(cpu) = self.cpu.as_ref() else {
1960                return Err(Error::Internal(
1961                    "draw_proto_masks requires CPU backend for hybrid path".into(),
1962                ));
1963            };
1964            log::trace!(
1965                "draw_proto_masks started with hybrid (cpu+opengl) in {:?}",
1966                start.elapsed()
1967            );
1968            let segmentation =
1969                cpu.materialize_segmentations(detect, proto_data, overlay.letterbox)?;
1970            match opengl.draw_decoded_masks(dst, render_detect, &segmentation, overlay) {
1971                Ok(_) => {
1972                    log::trace!(
1973                        "draw_proto_masks with hybrid (cpu+opengl) in {:?}",
1974                        start.elapsed()
1975                    );
1976                    return Ok(());
1977                }
1978                Err(e) => {
1979                    log::trace!("draw_proto_masks hybrid path failed, falling back to cpu: {e:?}");
1980                }
1981            }
1982        }
1983
1984        let Some(cpu) = self.cpu.as_mut() else {
1985            return Err(Error::Internal(
1986                "draw_proto_masks requires CPU backend for fallback path".into(),
1987            ));
1988        };
1989        log::trace!("draw_proto_masks started with cpu in {:?}", start.elapsed());
1990        cpu.draw_proto_masks(dst, render_detect, proto_data, overlay)
1991    }
1992
1993    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()> {
1994        let start = Instant::now();
1995
1996        // ── Forced backend: no fallback chain ────────────────────────
1997        if let Some(forced) = self.forced_backend {
1998            return match forced {
1999                ForcedBackend::Cpu => {
2000                    if let Some(cpu) = self.cpu.as_mut() {
2001                        return cpu.set_class_colors(colors);
2002                    }
2003                    Err(Error::ForcedBackendUnavailable("cpu".into()))
2004                }
2005                ForcedBackend::G2d => Err(Error::NotSupported(
2006                    "g2d does not support set_class_colors".into(),
2007                )),
2008                ForcedBackend::OpenGl => {
2009                    #[cfg(target_os = "linux")]
2010                    #[cfg(feature = "opengl")]
2011                    if let Some(opengl) = self.opengl.as_mut() {
2012                        return opengl.set_class_colors(colors);
2013                    }
2014                    Err(Error::ForcedBackendUnavailable("opengl".into()))
2015                }
2016            };
2017        }
2018
2019        // skip G2D as it doesn't support rendering to image
2020
2021        #[cfg(target_os = "linux")]
2022        #[cfg(feature = "opengl")]
2023        if let Some(opengl) = self.opengl.as_mut() {
2024            log::trace!("image started with opengl in {:?}", start.elapsed());
2025            match opengl.set_class_colors(colors) {
2026                Ok(_) => {
2027                    log::trace!("colors set with opengl in {:?}", start.elapsed());
2028                    return Ok(());
2029                }
2030                Err(e) => {
2031                    log::trace!("colors didn't set with opengl: {e:?}")
2032                }
2033            }
2034        }
2035        log::trace!("image started with cpu in {:?}", start.elapsed());
2036        if let Some(cpu) = self.cpu.as_mut() {
2037            match cpu.set_class_colors(colors) {
2038                Ok(_) => {
2039                    log::trace!("colors set with cpu in {:?}", start.elapsed());
2040                    return Ok(());
2041                }
2042                Err(e) => {
2043                    log::trace!("colors didn't set with cpu: {e:?}");
2044                    return Err(e);
2045                }
2046            }
2047        }
2048        Err(Error::NoConverter)
2049    }
2050}
2051
2052// ---------------------------------------------------------------------------
2053// Image loading / saving helpers
2054// ---------------------------------------------------------------------------
2055
2056/// Read EXIF orientation from raw EXIF bytes and return (Rotation, Flip).
2057fn read_exif_orientation(exif_bytes: &[u8]) -> (Rotation, Flip) {
2058    let exifreader = exif::Reader::new();
2059    let Ok(exif_) = exifreader.read_raw(exif_bytes.to_vec()) else {
2060        return (Rotation::None, Flip::None);
2061    };
2062    let Some(orientation) = exif_.get_field(exif::Tag::Orientation, exif::In::PRIMARY) else {
2063        return (Rotation::None, Flip::None);
2064    };
2065    match orientation.value.get_uint(0) {
2066        Some(1) => (Rotation::None, Flip::None),
2067        Some(2) => (Rotation::None, Flip::Horizontal),
2068        Some(3) => (Rotation::Rotate180, Flip::None),
2069        Some(4) => (Rotation::Rotate180, Flip::Horizontal),
2070        Some(5) => (Rotation::Clockwise90, Flip::Horizontal),
2071        Some(6) => (Rotation::Clockwise90, Flip::None),
2072        Some(7) => (Rotation::CounterClockwise90, Flip::Horizontal),
2073        Some(8) => (Rotation::CounterClockwise90, Flip::None),
2074        Some(v) => {
2075            log::warn!("broken orientation EXIF value: {v}");
2076            (Rotation::None, Flip::None)
2077        }
2078        None => (Rotation::None, Flip::None),
2079    }
2080}
2081
2082/// Map a [`PixelFormat`] to the zune-jpeg `ColorSpace` for decoding.
2083/// Returns `None` for formats that the JPEG decoder cannot output directly.
2084fn pixelfmt_to_colorspace(fmt: PixelFormat) -> Option<ColorSpace> {
2085    match fmt {
2086        PixelFormat::Rgb => Some(ColorSpace::RGB),
2087        PixelFormat::Rgba => Some(ColorSpace::RGBA),
2088        PixelFormat::Grey => Some(ColorSpace::Luma),
2089        _ => None,
2090    }
2091}
2092
2093/// Map a zune-jpeg `ColorSpace` to a [`PixelFormat`].
2094fn colorspace_to_pixelfmt(cs: ColorSpace) -> Option<PixelFormat> {
2095    match cs {
2096        ColorSpace::RGB => Some(PixelFormat::Rgb),
2097        ColorSpace::RGBA => Some(PixelFormat::Rgba),
2098        ColorSpace::Luma => Some(PixelFormat::Grey),
2099        _ => None,
2100    }
2101}
2102
2103/// Load a JPEG image from raw bytes and return a [`TensorDyn`].
2104fn load_jpeg(
2105    image: &[u8],
2106    format: Option<PixelFormat>,
2107    memory: Option<TensorMemory>,
2108) -> Result<TensorDyn> {
2109    let colour = match format {
2110        Some(f) => pixelfmt_to_colorspace(f)
2111            .ok_or_else(|| Error::NotSupported(format!("Unsupported image format {f:?}")))?,
2112        None => ColorSpace::RGB,
2113    };
2114    let options = DecoderOptions::default().jpeg_set_out_colorspace(colour);
2115    let mut decoder = JpegDecoder::new_with_options(image, options);
2116    decoder.decode_headers()?;
2117
2118    let image_info = decoder.info().ok_or(Error::Internal(
2119        "JPEG did not return decoded image info".to_string(),
2120    ))?;
2121
2122    let converted_cs = decoder
2123        .get_output_colorspace()
2124        .ok_or(Error::Internal("No output colorspace".to_string()))?;
2125
2126    let converted_fmt = colorspace_to_pixelfmt(converted_cs).ok_or(Error::NotSupported(
2127        "Unsupported JPEG decoder output".to_string(),
2128    ))?;
2129
2130    let dest_fmt = format.unwrap_or(converted_fmt);
2131
2132    let (rotation, flip) = decoder
2133        .exif()
2134        .map(|x| read_exif_orientation(x))
2135        .unwrap_or((Rotation::None, Flip::None));
2136
2137    let w = image_info.width as usize;
2138    let h = image_info.height as usize;
2139
2140    if (rotation, flip) == (Rotation::None, Flip::None) {
2141        let mut img = Tensor::<u8>::image(w, h, dest_fmt, memory)?;
2142
2143        if converted_fmt != dest_fmt {
2144            let tmp = Tensor::<u8>::image(w, h, converted_fmt, Some(TensorMemory::Mem))?;
2145            decoder.decode_into(&mut tmp.map()?)?;
2146            CPUProcessor::convert_format_pf(&tmp, &mut img, converted_fmt, dest_fmt)?;
2147            return Ok(TensorDyn::from(img));
2148        }
2149        decoder.decode_into(&mut img.map()?)?;
2150        return Ok(TensorDyn::from(img));
2151    }
2152
2153    let mut tmp = Tensor::<u8>::image(w, h, dest_fmt, Some(TensorMemory::Mem))?;
2154
2155    if converted_fmt != dest_fmt {
2156        let tmp2 = Tensor::<u8>::image(w, h, converted_fmt, Some(TensorMemory::Mem))?;
2157        decoder.decode_into(&mut tmp2.map()?)?;
2158        CPUProcessor::convert_format_pf(&tmp2, &mut tmp, converted_fmt, dest_fmt)?;
2159    } else {
2160        decoder.decode_into(&mut tmp.map()?)?;
2161    }
2162
2163    rotate_flip_to_dyn(&tmp, dest_fmt, rotation, flip, memory)
2164}
2165
2166/// Load a PNG image from raw bytes and return a [`TensorDyn`].
2167fn load_png(
2168    image: &[u8],
2169    format: Option<PixelFormat>,
2170    memory: Option<TensorMemory>,
2171) -> Result<TensorDyn> {
2172    let fmt = format.unwrap_or(PixelFormat::Rgb);
2173    let alpha = match fmt {
2174        PixelFormat::Rgb => false,
2175        PixelFormat::Rgba => true,
2176        _ => {
2177            return Err(Error::NotImplemented(
2178                "Unsupported image format".to_string(),
2179            ));
2180        }
2181    };
2182
2183    let options = DecoderOptions::default()
2184        .png_set_add_alpha_channel(alpha)
2185        .png_set_decode_animated(false);
2186    let mut decoder = PngDecoder::new_with_options(image, options);
2187    decoder.decode_headers()?;
2188    let image_info = decoder.get_info().ok_or(Error::Internal(
2189        "PNG did not return decoded image info".to_string(),
2190    ))?;
2191
2192    let (rotation, flip) = image_info
2193        .exif
2194        .as_ref()
2195        .map(|x| read_exif_orientation(x))
2196        .unwrap_or((Rotation::None, Flip::None));
2197
2198    if (rotation, flip) == (Rotation::None, Flip::None) {
2199        let img = Tensor::<u8>::image(image_info.width, image_info.height, fmt, memory)?;
2200        decoder.decode_into(&mut img.map()?)?;
2201        return Ok(TensorDyn::from(img));
2202    }
2203
2204    let tmp = Tensor::<u8>::image(
2205        image_info.width,
2206        image_info.height,
2207        fmt,
2208        Some(TensorMemory::Mem),
2209    )?;
2210    decoder.decode_into(&mut tmp.map()?)?;
2211
2212    rotate_flip_to_dyn(&tmp, fmt, rotation, flip, memory)
2213}
2214
2215/// Load an image from raw bytes (JPEG or PNG) and return a [`TensorDyn`].
2216///
2217/// The optional `format` specifies the desired output pixel format (e.g.,
2218/// [`PixelFormat::Rgb`], [`PixelFormat::Rgba`]); if `None`, the native
2219/// format of the file is used (typically RGB for JPEG).
2220///
2221/// # Examples
2222/// ```rust
2223/// use edgefirst_image::load_image;
2224/// use edgefirst_tensor::PixelFormat;
2225/// # fn main() -> Result<(), edgefirst_image::Error> {
2226/// let jpeg = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/../../testdata/zidane.jpg"));
2227/// let img = load_image(jpeg, Some(PixelFormat::Rgb), None)?;
2228/// assert_eq!(img.width(), Some(1280));
2229/// assert_eq!(img.height(), Some(720));
2230/// # Ok(())
2231/// # }
2232/// ```
2233pub fn load_image(
2234    image: &[u8],
2235    format: Option<PixelFormat>,
2236    memory: Option<TensorMemory>,
2237) -> Result<TensorDyn> {
2238    if let Ok(i) = load_jpeg(image, format, memory) {
2239        return Ok(i);
2240    }
2241    if let Ok(i) = load_png(image, format, memory) {
2242        return Ok(i);
2243    }
2244    Err(Error::NotSupported(
2245        "Could not decode as jpeg or png".to_string(),
2246    ))
2247}
2248
2249/// Save a [`TensorDyn`] image as a JPEG file.
2250///
2251/// Only packed RGB and RGBA formats are supported.
2252pub fn save_jpeg(tensor: &TensorDyn, path: impl AsRef<std::path::Path>, quality: u8) -> Result<()> {
2253    let t = tensor.as_u8().ok_or(Error::UnsupportedFormat(
2254        "save_jpeg requires u8 tensor".to_string(),
2255    ))?;
2256    let fmt = t.format().ok_or(Error::NotAnImage)?;
2257    if fmt.layout() != PixelLayout::Packed {
2258        return Err(Error::NotImplemented(
2259            "Saving planar images is not supported".to_string(),
2260        ));
2261    }
2262
2263    let colour = match fmt {
2264        PixelFormat::Rgb => jpeg_encoder::ColorType::Rgb,
2265        PixelFormat::Rgba => jpeg_encoder::ColorType::Rgba,
2266        _ => {
2267            return Err(Error::NotImplemented(
2268                "Unsupported image format for saving".to_string(),
2269            ));
2270        }
2271    };
2272
2273    let w = t.width().ok_or(Error::NotAnImage)?;
2274    let h = t.height().ok_or(Error::NotAnImage)?;
2275    let encoder = jpeg_encoder::Encoder::new_file(path, quality)?;
2276    let tensor_map = t.map()?;
2277
2278    encoder.encode(&tensor_map, w as u16, h as u16, colour)?;
2279
2280    Ok(())
2281}
2282
2283pub(crate) struct FunctionTimer<T: Display> {
2284    name: T,
2285    start: std::time::Instant,
2286}
2287
2288impl<T: Display> FunctionTimer<T> {
2289    pub fn new(name: T) -> Self {
2290        Self {
2291            name,
2292            start: std::time::Instant::now(),
2293        }
2294    }
2295}
2296
2297impl<T: Display> Drop for FunctionTimer<T> {
2298    fn drop(&mut self) {
2299        log::trace!("{} elapsed: {:?}", self.name, self.start.elapsed())
2300    }
2301}
2302
2303const DEFAULT_COLORS: [[f32; 4]; 20] = [
2304    [0., 1., 0., 0.7],
2305    [1., 0.5568628, 0., 0.7],
2306    [0.25882353, 0.15294118, 0.13333333, 0.7],
2307    [0.8, 0.7647059, 0.78039216, 0.7],
2308    [0.3137255, 0.3137255, 0.3137255, 0.7],
2309    [0.1411765, 0.3098039, 0.1215686, 0.7],
2310    [1., 0.95686275, 0.5137255, 0.7],
2311    [0.3529412, 0.32156863, 0., 0.7],
2312    [0.4235294, 0.6235294, 0.6509804, 0.7],
2313    [0.5098039, 0.5098039, 0.7294118, 0.7],
2314    [0.00784314, 0.18823529, 0.29411765, 0.7],
2315    [0.0, 0.2706, 1.0, 0.7],
2316    [0.0, 0.0, 0.0, 0.7],
2317    [0.0, 0.5, 0.0, 0.7],
2318    [1.0, 0.0, 0.0, 0.7],
2319    [0.0, 0.0, 1.0, 0.7],
2320    [1.0, 0.5, 0.5, 0.7],
2321    [0.1333, 0.5451, 0.1333, 0.7],
2322    [0.1176, 0.4118, 0.8235, 0.7],
2323    [1., 1., 1., 0.7],
2324];
2325
2326const fn denorm<const M: usize, const N: usize>(a: [[f32; M]; N]) -> [[u8; M]; N] {
2327    let mut result = [[0; M]; N];
2328    let mut i = 0;
2329    while i < N {
2330        let mut j = 0;
2331        while j < M {
2332            result[i][j] = (a[i][j] * 255.0).round() as u8;
2333            j += 1;
2334        }
2335        i += 1;
2336    }
2337    result
2338}
2339
2340const DEFAULT_COLORS_U8: [[u8; 4]; 20] = denorm(DEFAULT_COLORS);
2341
2342#[cfg(test)]
2343#[cfg_attr(coverage_nightly, coverage(off))]
2344mod alignment_tests {
2345    use super::*;
2346
2347    #[test]
2348    fn align_width_rgba8_common_widths() {
2349        // RGBA8 (bpp=4, lcm(64,4)=64, so width must round to multiple of 16 px).
2350        assert_eq!(align_width_for_gpu_pitch(640, 4), 640); // 2560 byte pitch — already aligned
2351        assert_eq!(align_width_for_gpu_pitch(1280, 4), 1280); // 5120
2352        assert_eq!(align_width_for_gpu_pitch(1920, 4), 1920); // 7680
2353        assert_eq!(align_width_for_gpu_pitch(3840, 4), 3840); // 15360
2354                                                              // crowd.png case from the imx95 investigation:
2355        assert_eq!(align_width_for_gpu_pitch(3004, 4), 3008); // 12016 → 12032
2356        assert_eq!(align_width_for_gpu_pitch(3000, 4), 3008); // 12000 → 12032
2357        assert_eq!(align_width_for_gpu_pitch(17, 4), 32); // 68 → 128
2358        assert_eq!(align_width_for_gpu_pitch(1, 4), 16); // 4 → 64
2359    }
2360
2361    #[test]
2362    fn align_width_rgb888_packed() {
2363        // RGB888 (bpp=3, lcm(64,3)=192, so width must round to multiple of 64 px).
2364        assert_eq!(align_width_for_gpu_pitch(64, 3), 64); // 192 byte pitch
2365        assert_eq!(align_width_for_gpu_pitch(640, 3), 640); // 1920
2366        assert_eq!(align_width_for_gpu_pitch(1, 3), 64); // 3 → 192
2367        assert_eq!(align_width_for_gpu_pitch(65, 3), 128); // 195 → 384
2368                                                           // Verify the rounded width × bpp is a clean multiple of the LCM.
2369        for w in [3004usize, 1281, 100, 17] {
2370            let padded = align_width_for_gpu_pitch(w, 3);
2371            assert!(padded >= w);
2372            assert_eq!((padded * 3) % 64, 0);
2373            assert_eq!((padded * 3) % 3, 0);
2374        }
2375    }
2376
2377    #[test]
2378    fn align_width_grey_u8() {
2379        // Grey (bpp=1, lcm(64,1)=64, so width must round to multiple of 64 px).
2380        assert_eq!(align_width_for_gpu_pitch(64, 1), 64);
2381        assert_eq!(align_width_for_gpu_pitch(640, 1), 640);
2382        assert_eq!(align_width_for_gpu_pitch(1, 1), 64);
2383        assert_eq!(align_width_for_gpu_pitch(65, 1), 128);
2384    }
2385
2386    #[test]
2387    fn align_width_zero_inputs() {
2388        assert_eq!(align_width_for_gpu_pitch(0, 4), 0);
2389        assert_eq!(align_width_for_gpu_pitch(640, 0), 640);
2390    }
2391
2392    #[test]
2393    fn align_width_never_returns_smaller_than_input() {
2394        // Spot-check the "returned width >= input width" contract across a
2395        // range of values that would previously have hit `width * bpp`
2396        // overflow paths.
2397        for &bpp in &[1usize, 2, 3, 4, 8] {
2398            for &w in &[
2399                1usize,
2400                17,
2401                64,
2402                65,
2403                100,
2404                1280,
2405                1281,
2406                1920,
2407                3004,
2408                3072,
2409                3840,
2410                usize::MAX / 8,
2411                usize::MAX / 4,
2412                usize::MAX / 2,
2413                usize::MAX - 1,
2414                usize::MAX,
2415            ] {
2416                let aligned = align_width_for_gpu_pitch(w, bpp);
2417                assert!(
2418                    aligned >= w,
2419                    "align_width_for_gpu_pitch({w}, {bpp}) = {aligned} < {w}"
2420                );
2421            }
2422        }
2423    }
2424
2425    #[test]
2426    fn align_width_overflow_returns_unaligned_not_smaller() {
2427        // For width values close to usize::MAX, padding up would wrap. The
2428        // function must return the original width rather than wrapping or
2429        // panicking. A pre-aligned width round-trips unchanged even at the
2430        // extreme.
2431        let aligned_extreme = usize::MAX - 15; // 16-pixel boundary for RGBA8
2432        assert_eq!(
2433            align_width_for_gpu_pitch(aligned_extreme, 4),
2434            aligned_extreme
2435        );
2436        // A misaligned extreme value cannot be rounded up — the function
2437        // returns the original.
2438        let misaligned_extreme = usize::MAX - 1;
2439        let result = align_width_for_gpu_pitch(misaligned_extreme, 4);
2440        assert!(
2441            result == misaligned_extreme || result >= misaligned_extreme,
2442            "extreme misaligned width must not be rounded down to {result}"
2443        );
2444    }
2445
2446    #[test]
2447    fn checked_lcm_basic_and_overflow() {
2448        assert_eq!(checked_num_integer_lcm(64, 4), Some(64));
2449        assert_eq!(checked_num_integer_lcm(64, 3), Some(192));
2450        assert_eq!(checked_num_integer_lcm(64, 1), Some(64));
2451        assert_eq!(checked_num_integer_lcm(0, 4), Some(0));
2452        assert_eq!(checked_num_integer_lcm(64, 0), Some(0));
2453        // Coprime values whose product exceeds usize::MAX must return None.
2454        assert_eq!(
2455            checked_num_integer_lcm(usize::MAX, usize::MAX - 1),
2456            None,
2457            "coprime extreme values must overflow detect, not panic"
2458        );
2459    }
2460
2461    #[test]
2462    fn primary_plane_bpp_known_formats() {
2463        // Packed formats use channels × elem_size.
2464        assert_eq!(primary_plane_bpp(PixelFormat::Rgba, 1), Some(4));
2465        assert_eq!(primary_plane_bpp(PixelFormat::Bgra, 1), Some(4));
2466        assert_eq!(primary_plane_bpp(PixelFormat::Rgb, 1), Some(3));
2467        assert_eq!(primary_plane_bpp(PixelFormat::Grey, 1), Some(1));
2468        // Semi-planar (NV12) reports the luma plane's bpp.
2469        assert_eq!(primary_plane_bpp(PixelFormat::Nv12, 1), Some(1));
2470    }
2471}
2472
2473#[cfg(test)]
2474#[cfg_attr(coverage_nightly, coverage(off))]
2475mod image_tests {
2476    use super::*;
2477    use crate::{CPUProcessor, Rotation};
2478    #[cfg(target_os = "linux")]
2479    use edgefirst_tensor::is_dma_available;
2480    use edgefirst_tensor::{TensorMapTrait, TensorMemory, TensorTrait};
2481    use image::buffer::ConvertBuffer;
2482
2483    /// Test helper: call `ImageProcessorTrait::convert()` on two `TensorDyn`s
2484    /// by going through the `TensorDyn` API.
2485    ///
2486    /// Returns the `(src_image, dst_image)` reconstructed from the TensorDyn
2487    /// round-trip so the caller can feed them to `compare_images` etc.
2488    fn convert_img(
2489        proc: &mut dyn ImageProcessorTrait,
2490        src: TensorDyn,
2491        dst: TensorDyn,
2492        rotation: Rotation,
2493        flip: Flip,
2494        crop: Crop,
2495    ) -> (Result<()>, TensorDyn, TensorDyn) {
2496        let src_fourcc = src.format().unwrap();
2497        let dst_fourcc = dst.format().unwrap();
2498        let src_dyn = src;
2499        let mut dst_dyn = dst;
2500        let result = proc.convert(&src_dyn, &mut dst_dyn, rotation, flip, crop);
2501        let src_back = {
2502            let mut __t = src_dyn.into_u8().unwrap();
2503            __t.set_format(src_fourcc).unwrap();
2504            TensorDyn::from(__t)
2505        };
2506        let dst_back = {
2507            let mut __t = dst_dyn.into_u8().unwrap();
2508            __t.set_format(dst_fourcc).unwrap();
2509            TensorDyn::from(__t)
2510        };
2511        (result, src_back, dst_back)
2512    }
2513
2514    #[ctor::ctor]
2515    fn init() {
2516        env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
2517    }
2518
2519    macro_rules! function {
2520        () => {{
2521            fn f() {}
2522            fn type_name_of<T>(_: T) -> &'static str {
2523                std::any::type_name::<T>()
2524            }
2525            let name = type_name_of(f);
2526
2527            // Find and cut the rest of the path
2528            match &name[..name.len() - 3].rfind(':') {
2529                Some(pos) => &name[pos + 1..name.len() - 3],
2530                None => &name[..name.len() - 3],
2531            }
2532        }};
2533    }
2534
2535    #[test]
2536    fn test_invalid_crop() {
2537        let src = TensorDyn::image(100, 100, PixelFormat::Rgb, DType::U8, None).unwrap();
2538        let dst = TensorDyn::image(100, 100, PixelFormat::Rgb, DType::U8, None).unwrap();
2539
2540        let crop = Crop::new()
2541            .with_src_rect(Some(Rect::new(50, 50, 60, 60)))
2542            .with_dst_rect(Some(Rect::new(0, 0, 150, 150)));
2543
2544        let result = crop.check_crop_dyn(&src, &dst);
2545        assert!(matches!(
2546            result,
2547            Err(Error::CropInvalid(e)) if e.starts_with("Dest and Src crop invalid")
2548        ));
2549
2550        let crop = crop.with_src_rect(Some(Rect::new(0, 0, 10, 10)));
2551        let result = crop.check_crop_dyn(&src, &dst);
2552        assert!(matches!(
2553            result,
2554            Err(Error::CropInvalid(e)) if e.starts_with("Dest crop invalid")
2555        ));
2556
2557        let crop = crop
2558            .with_src_rect(Some(Rect::new(50, 50, 60, 60)))
2559            .with_dst_rect(Some(Rect::new(0, 0, 50, 50)));
2560        let result = crop.check_crop_dyn(&src, &dst);
2561        assert!(matches!(
2562            result,
2563            Err(Error::CropInvalid(e)) if e.starts_with("Src crop invalid")
2564        ));
2565
2566        let crop = crop.with_src_rect(Some(Rect::new(50, 50, 50, 50)));
2567
2568        let result = crop.check_crop_dyn(&src, &dst);
2569        assert!(result.is_ok());
2570    }
2571
2572    #[test]
2573    fn test_invalid_tensor_format() -> Result<(), Error> {
2574        // 4D tensor cannot be set to a 3-channel pixel format
2575        let mut tensor = Tensor::<u8>::new(&[720, 1280, 4, 1], None, None)?;
2576        let result = tensor.set_format(PixelFormat::Rgb);
2577        assert!(result.is_err(), "4D tensor should reject set_format");
2578
2579        // Tensor with wrong channel count for the format
2580        let mut tensor = Tensor::<u8>::new(&[720, 1280, 4], None, None)?;
2581        let result = tensor.set_format(PixelFormat::Rgb);
2582        assert!(result.is_err(), "4-channel tensor should reject RGB format");
2583
2584        Ok(())
2585    }
2586
2587    #[test]
2588    fn test_invalid_image_file() -> Result<(), Error> {
2589        let result = crate::load_image(&[123; 5000], None, None);
2590        assert!(matches!(
2591            result,
2592            Err(Error::NotSupported(e)) if e == "Could not decode as jpeg or png"));
2593
2594        Ok(())
2595    }
2596
2597    #[test]
2598    fn test_invalid_jpeg_format() -> Result<(), Error> {
2599        let result = crate::load_image(&[123; 5000], Some(PixelFormat::Yuyv), None);
2600        assert!(matches!(
2601            result,
2602            Err(Error::NotSupported(e)) if e == "Could not decode as jpeg or png"));
2603
2604        Ok(())
2605    }
2606
2607    #[test]
2608    fn test_load_resize_save() {
2609        let file = include_bytes!(concat!(
2610            env!("CARGO_MANIFEST_DIR"),
2611            "/../../testdata/zidane.jpg"
2612        ));
2613        let img = crate::load_image(file, Some(PixelFormat::Rgba), None).unwrap();
2614        assert_eq!(img.width(), Some(1280));
2615        assert_eq!(img.height(), Some(720));
2616
2617        let dst = TensorDyn::image(640, 360, PixelFormat::Rgba, DType::U8, None).unwrap();
2618        let mut converter = CPUProcessor::new();
2619        let (result, _img, dst) = convert_img(
2620            &mut converter,
2621            img,
2622            dst,
2623            Rotation::None,
2624            Flip::None,
2625            Crop::no_crop(),
2626        );
2627        result.unwrap();
2628        assert_eq!(dst.width(), Some(640));
2629        assert_eq!(dst.height(), Some(360));
2630
2631        crate::save_jpeg(&dst, "zidane_resized.jpg", 80).unwrap();
2632
2633        let file = std::fs::read("zidane_resized.jpg").unwrap();
2634        let img = crate::load_image(&file, None, None).unwrap();
2635        assert_eq!(img.width(), Some(640));
2636        assert_eq!(img.height(), Some(360));
2637        assert_eq!(img.format().unwrap(), PixelFormat::Rgb);
2638    }
2639
2640    #[test]
2641    fn test_from_tensor_planar() -> Result<(), Error> {
2642        let mut tensor = Tensor::new(&[3, 720, 1280], None, None)?;
2643        tensor.map()?.copy_from_slice(include_bytes!(concat!(
2644            env!("CARGO_MANIFEST_DIR"),
2645            "/../../testdata/camera720p.8bps"
2646        )));
2647        let planar = {
2648            tensor
2649                .set_format(PixelFormat::PlanarRgb)
2650                .map_err(|e| crate::Error::Internal(e.to_string()))?;
2651            TensorDyn::from(tensor)
2652        };
2653
2654        let rbga = load_bytes_to_tensor(
2655            1280,
2656            720,
2657            PixelFormat::Rgba,
2658            None,
2659            include_bytes!(concat!(
2660                env!("CARGO_MANIFEST_DIR"),
2661                "/../../testdata/camera720p.rgba"
2662            )),
2663        )?;
2664        compare_images_convert_to_rgb(&planar, &rbga, 0.98, function!());
2665
2666        Ok(())
2667    }
2668
2669    #[test]
2670    fn test_from_tensor_invalid_format() {
2671        // PixelFormat::from_fourcc_str returns None for unknown FourCC codes.
2672        // Since there's no "TEST" pixel format, this validates graceful handling.
2673        assert!(PixelFormat::from_fourcc(u32::from_le_bytes(*b"TEST")).is_none());
2674    }
2675
2676    #[test]
2677    #[should_panic(expected = "Failed to save planar RGB image")]
2678    fn test_save_planar() {
2679        let planar_img = load_bytes_to_tensor(
2680            1280,
2681            720,
2682            PixelFormat::PlanarRgb,
2683            None,
2684            include_bytes!(concat!(
2685                env!("CARGO_MANIFEST_DIR"),
2686                "/../../testdata/camera720p.8bps"
2687            )),
2688        )
2689        .unwrap();
2690
2691        let save_path = "/tmp/planar_rgb.jpg";
2692        crate::save_jpeg(&planar_img, save_path, 90).expect("Failed to save planar RGB image");
2693    }
2694
2695    #[test]
2696    #[should_panic(expected = "Failed to save YUYV image")]
2697    fn test_save_yuyv() {
2698        let planar_img = load_bytes_to_tensor(
2699            1280,
2700            720,
2701            PixelFormat::Yuyv,
2702            None,
2703            include_bytes!(concat!(
2704                env!("CARGO_MANIFEST_DIR"),
2705                "/../../testdata/camera720p.yuyv"
2706            )),
2707        )
2708        .unwrap();
2709
2710        let save_path = "/tmp/yuyv.jpg";
2711        crate::save_jpeg(&planar_img, save_path, 90).expect("Failed to save YUYV image");
2712    }
2713
2714    #[test]
2715    fn test_rotation_angle() {
2716        assert_eq!(Rotation::from_degrees_clockwise(0), Rotation::None);
2717        assert_eq!(Rotation::from_degrees_clockwise(90), Rotation::Clockwise90);
2718        assert_eq!(Rotation::from_degrees_clockwise(180), Rotation::Rotate180);
2719        assert_eq!(
2720            Rotation::from_degrees_clockwise(270),
2721            Rotation::CounterClockwise90
2722        );
2723        assert_eq!(Rotation::from_degrees_clockwise(360), Rotation::None);
2724        assert_eq!(Rotation::from_degrees_clockwise(450), Rotation::Clockwise90);
2725        assert_eq!(Rotation::from_degrees_clockwise(540), Rotation::Rotate180);
2726        assert_eq!(
2727            Rotation::from_degrees_clockwise(630),
2728            Rotation::CounterClockwise90
2729        );
2730    }
2731
2732    #[test]
2733    #[should_panic(expected = "rotation angle is not a multiple of 90")]
2734    fn test_rotation_angle_panic() {
2735        Rotation::from_degrees_clockwise(361);
2736    }
2737
2738    #[test]
2739    fn test_disable_env_var() -> Result<(), Error> {
2740        // EDGEFIRST_FORCE_BACKEND takes precedence over EDGEFIRST_DISABLE_*,
2741        // so clear it for the duration of this test to avoid races with
2742        // test_force_backend_cpu running in parallel.
2743        let saved_force = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
2744        unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") };
2745
2746        #[cfg(target_os = "linux")]
2747        {
2748            let original = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
2749            unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
2750            let converter = ImageProcessor::new()?;
2751            match original {
2752                Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
2753                None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
2754            }
2755            assert!(converter.g2d.is_none());
2756        }
2757
2758        #[cfg(target_os = "linux")]
2759        #[cfg(feature = "opengl")]
2760        {
2761            let original = std::env::var("EDGEFIRST_DISABLE_GL").ok();
2762            unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
2763            let converter = ImageProcessor::new()?;
2764            match original {
2765                Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
2766                None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
2767            }
2768            assert!(converter.opengl.is_none());
2769        }
2770
2771        let original = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
2772        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
2773        let converter = ImageProcessor::new()?;
2774        match original {
2775            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
2776            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
2777        }
2778        assert!(converter.cpu.is_none());
2779
2780        let original_cpu = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
2781        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
2782        let original_gl = std::env::var("EDGEFIRST_DISABLE_GL").ok();
2783        unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
2784        let original_g2d = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
2785        unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
2786        let mut converter = ImageProcessor::new()?;
2787
2788        let src = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None)?;
2789        let dst = TensorDyn::image(640, 360, PixelFormat::Rgba, DType::U8, None)?;
2790        let (result, _src, _dst) = convert_img(
2791            &mut converter,
2792            src,
2793            dst,
2794            Rotation::None,
2795            Flip::None,
2796            Crop::no_crop(),
2797        );
2798        assert!(matches!(result, Err(Error::NoConverter)));
2799
2800        match original_cpu {
2801            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
2802            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
2803        }
2804        match original_gl {
2805            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
2806            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
2807        }
2808        match original_g2d {
2809            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
2810            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
2811        }
2812        match saved_force {
2813            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
2814            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
2815        }
2816
2817        Ok(())
2818    }
2819
2820    #[test]
2821    fn test_unsupported_conversion() {
2822        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
2823        let dst = TensorDyn::image(640, 360, PixelFormat::Nv12, DType::U8, None).unwrap();
2824        let mut converter = ImageProcessor::new().unwrap();
2825        let (result, _src, _dst) = convert_img(
2826            &mut converter,
2827            src,
2828            dst,
2829            Rotation::None,
2830            Flip::None,
2831            Crop::no_crop(),
2832        );
2833        log::debug!("result: {:?}", result);
2834        assert!(matches!(
2835            result,
2836            Err(Error::NotSupported(e)) if e.starts_with("Conversion from NV12 to NV12")
2837        ));
2838    }
2839
2840    #[test]
2841    fn test_load_grey() {
2842        let grey_img = crate::load_image(
2843            include_bytes!(concat!(
2844                env!("CARGO_MANIFEST_DIR"),
2845                "/../../testdata/grey.jpg"
2846            )),
2847            Some(PixelFormat::Rgba),
2848            None,
2849        )
2850        .unwrap();
2851
2852        let grey_but_rgb_img = crate::load_image(
2853            include_bytes!(concat!(
2854                env!("CARGO_MANIFEST_DIR"),
2855                "/../../testdata/grey-rgb.jpg"
2856            )),
2857            Some(PixelFormat::Rgba),
2858            None,
2859        )
2860        .unwrap();
2861
2862        compare_images(&grey_img, &grey_but_rgb_img, 0.99, function!());
2863    }
2864
2865    #[test]
2866    fn test_new_nv12() {
2867        let nv12 = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
2868        assert_eq!(nv12.height(), Some(720));
2869        assert_eq!(nv12.width(), Some(1280));
2870        assert_eq!(nv12.format().unwrap(), PixelFormat::Nv12);
2871        // PixelFormat::Nv12.channels() returns 1 (luma plane channel count)
2872        assert_eq!(nv12.format().unwrap().channels(), 1);
2873        assert!(nv12.format().is_some_and(
2874            |f| f.layout() == PixelLayout::Planar || f.layout() == PixelLayout::SemiPlanar
2875        ))
2876    }
2877
2878    #[test]
2879    #[cfg(target_os = "linux")]
2880    fn test_new_image_converter() {
2881        let dst_width = 640;
2882        let dst_height = 360;
2883        let file = include_bytes!(concat!(
2884            env!("CARGO_MANIFEST_DIR"),
2885            "/../../testdata/zidane.jpg"
2886        ))
2887        .to_vec();
2888        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
2889
2890        let mut converter = ImageProcessor::new().unwrap();
2891        let converter_dst = converter
2892            .create_image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None)
2893            .unwrap();
2894        let (result, src, converter_dst) = convert_img(
2895            &mut converter,
2896            src,
2897            converter_dst,
2898            Rotation::None,
2899            Flip::None,
2900            Crop::no_crop(),
2901        );
2902        result.unwrap();
2903
2904        let cpu_dst =
2905            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
2906        let mut cpu_converter = CPUProcessor::new();
2907        let (result, _src, cpu_dst) = convert_img(
2908            &mut cpu_converter,
2909            src,
2910            cpu_dst,
2911            Rotation::None,
2912            Flip::None,
2913            Crop::no_crop(),
2914        );
2915        result.unwrap();
2916
2917        compare_images(&converter_dst, &cpu_dst, 0.98, function!());
2918    }
2919
2920    #[test]
2921    #[cfg(target_os = "linux")]
2922    fn test_create_image_dtype_i8() {
2923        let mut converter = ImageProcessor::new().unwrap();
2924
2925        // I8 image should allocate successfully via create_image
2926        let dst = converter
2927            .create_image(320, 240, PixelFormat::Rgb, DType::I8, None)
2928            .unwrap();
2929        assert_eq!(dst.dtype(), DType::I8);
2930        assert!(dst.width() == Some(320));
2931        assert!(dst.height() == Some(240));
2932        assert_eq!(dst.format(), Some(PixelFormat::Rgb));
2933
2934        // U8 for comparison
2935        let dst_u8 = converter
2936            .create_image(320, 240, PixelFormat::Rgb, DType::U8, None)
2937            .unwrap();
2938        assert_eq!(dst_u8.dtype(), DType::U8);
2939
2940        // Convert into I8 dst should succeed
2941        let file = include_bytes!(concat!(
2942            env!("CARGO_MANIFEST_DIR"),
2943            "/../../testdata/zidane.jpg"
2944        ))
2945        .to_vec();
2946        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
2947        let mut dst_i8 = converter
2948            .create_image(320, 240, PixelFormat::Rgb, DType::I8, None)
2949            .unwrap();
2950        converter
2951            .convert(
2952                &src,
2953                &mut dst_i8,
2954                Rotation::None,
2955                Flip::None,
2956                Crop::no_crop(),
2957            )
2958            .unwrap();
2959    }
2960
2961    #[test]
2962    #[cfg(target_os = "linux")]
2963    fn test_create_image_nv12_dma_non_aligned_width() {
2964        // Regression for C2: create_image must not apply stride padding to
2965        // non-packed formats. NV12 is semi-planar (PixelLayout::SemiPlanar),
2966        // so the try_dma path should fall through to the plain
2967        // TensorDyn::image allocation for any width, regardless of the
2968        // 64-byte GPU pitch alignment.
2969        let converter = ImageProcessor::new().unwrap();
2970
2971        // 100 is intentionally not a multiple of 64 (the Mali pitch
2972        // alignment) to prove that non-packed layouts do not take the
2973        // stride-padded branch.
2974        let result = converter.create_image(
2975            100,
2976            64,
2977            PixelFormat::Nv12,
2978            DType::U8,
2979            Some(TensorMemory::Dma),
2980        );
2981
2982        match result {
2983            Ok(img) => {
2984                assert_eq!(img.width(), Some(100));
2985                assert_eq!(img.height(), Some(64));
2986                assert_eq!(img.format(), Some(PixelFormat::Nv12));
2987                // Non-packed formats must never carry a row_stride override.
2988                assert!(
2989                    img.row_stride().is_none(),
2990                    "NV12 must not be stride-padded by create_image",
2991                );
2992            }
2993            Err(e) => {
2994                // Accept skip on hosts without a dma-heap, but never the
2995                // "NotImplemented" we used to return for non-packed layouts.
2996                let msg = format!("{e}");
2997                assert!(
2998                    !msg.contains("image_with_stride"),
2999                    "NV12 should not hit the stride-padded path: {msg}",
3000                );
3001            }
3002        }
3003    }
3004
3005    #[test]
3006    #[ignore] // Hangs on desktop platforms where DMA-buf is unavailable and PBO
3007              // fallback triggers a GPU driver hang during SHM→texture upload (e.g.,
3008              // NVIDIA without /dev/dma_heap permissions). Works on embedded targets.
3009    fn test_crop_skip() {
3010        let file = include_bytes!(concat!(
3011            env!("CARGO_MANIFEST_DIR"),
3012            "/../../testdata/zidane.jpg"
3013        ))
3014        .to_vec();
3015        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3016
3017        let mut converter = ImageProcessor::new().unwrap();
3018        let converter_dst = converter
3019            .create_image(1280, 720, PixelFormat::Rgba, DType::U8, None)
3020            .unwrap();
3021        let crop = Crop::new()
3022            .with_src_rect(Some(Rect::new(0, 0, 640, 640)))
3023            .with_dst_rect(Some(Rect::new(0, 0, 640, 640)));
3024        let (result, src, converter_dst) = convert_img(
3025            &mut converter,
3026            src,
3027            converter_dst,
3028            Rotation::None,
3029            Flip::None,
3030            crop,
3031        );
3032        result.unwrap();
3033
3034        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
3035        let mut cpu_converter = CPUProcessor::new();
3036        let (result, _src, cpu_dst) = convert_img(
3037            &mut cpu_converter,
3038            src,
3039            cpu_dst,
3040            Rotation::None,
3041            Flip::None,
3042            crop,
3043        );
3044        result.unwrap();
3045
3046        compare_images(&converter_dst, &cpu_dst, 0.99999, function!());
3047    }
3048
3049    #[test]
3050    fn test_invalid_pixel_format() {
3051        // PixelFormat::from_fourcc returns None for unknown formats,
3052        // so TensorDyn::image cannot be called with an invalid format.
3053        assert!(PixelFormat::from_fourcc(u32::from_le_bytes(*b"TEST")).is_none());
3054    }
3055
3056    // Helper function to check if G2D library is available (Linux/i.MX8 only)
3057    #[cfg(target_os = "linux")]
3058    static G2D_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
3059
3060    #[cfg(target_os = "linux")]
3061    fn is_g2d_available() -> bool {
3062        *G2D_AVAILABLE.get_or_init(|| G2DProcessor::new().is_ok())
3063    }
3064
3065    #[cfg(target_os = "linux")]
3066    #[cfg(feature = "opengl")]
3067    static GL_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
3068
3069    #[cfg(target_os = "linux")]
3070    #[cfg(feature = "opengl")]
3071    // Helper function to check if OpenGL is available
3072    fn is_opengl_available() -> bool {
3073        #[cfg(all(target_os = "linux", feature = "opengl"))]
3074        {
3075            *GL_AVAILABLE.get_or_init(|| GLProcessorThreaded::new(None).is_ok())
3076        }
3077
3078        #[cfg(not(all(target_os = "linux", feature = "opengl")))]
3079        {
3080            false
3081        }
3082    }
3083
3084    #[test]
3085    fn test_load_jpeg_with_exif() {
3086        let file = include_bytes!(concat!(
3087            env!("CARGO_MANIFEST_DIR"),
3088            "/../../testdata/zidane_rotated_exif.jpg"
3089        ))
3090        .to_vec();
3091        let loaded = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3092
3093        assert_eq!(loaded.height(), Some(1280));
3094        assert_eq!(loaded.width(), Some(720));
3095
3096        let file = include_bytes!(concat!(
3097            env!("CARGO_MANIFEST_DIR"),
3098            "/../../testdata/zidane.jpg"
3099        ))
3100        .to_vec();
3101        let cpu_src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3102
3103        let (dst_width, dst_height) = (cpu_src.height().unwrap(), cpu_src.width().unwrap());
3104
3105        let cpu_dst =
3106            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3107        let mut cpu_converter = CPUProcessor::new();
3108
3109        let (result, _cpu_src, cpu_dst) = convert_img(
3110            &mut cpu_converter,
3111            cpu_src,
3112            cpu_dst,
3113            Rotation::Clockwise90,
3114            Flip::None,
3115            Crop::no_crop(),
3116        );
3117        result.unwrap();
3118
3119        compare_images(&loaded, &cpu_dst, 0.98, function!());
3120    }
3121
3122    #[test]
3123    fn test_load_png_with_exif() {
3124        let file = include_bytes!(concat!(
3125            env!("CARGO_MANIFEST_DIR"),
3126            "/../../testdata/zidane_rotated_exif_180.png"
3127        ))
3128        .to_vec();
3129        let loaded = crate::load_png(&file, Some(PixelFormat::Rgba), None).unwrap();
3130
3131        assert_eq!(loaded.height(), Some(720));
3132        assert_eq!(loaded.width(), Some(1280));
3133
3134        let file = include_bytes!(concat!(
3135            env!("CARGO_MANIFEST_DIR"),
3136            "/../../testdata/zidane.jpg"
3137        ))
3138        .to_vec();
3139        let cpu_src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3140
3141        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
3142        let mut cpu_converter = CPUProcessor::new();
3143
3144        let (result, _cpu_src, cpu_dst) = convert_img(
3145            &mut cpu_converter,
3146            cpu_src,
3147            cpu_dst,
3148            Rotation::Rotate180,
3149            Flip::None,
3150            Crop::no_crop(),
3151        );
3152        result.unwrap();
3153
3154        compare_images(&loaded, &cpu_dst, 0.98, function!());
3155    }
3156
3157    #[test]
3158    #[cfg(target_os = "linux")]
3159    fn test_g2d_resize() {
3160        if !is_g2d_available() {
3161            eprintln!("SKIPPED: test_g2d_resize - G2D library (libg2d.so.2) not available");
3162            return;
3163        }
3164        if !is_dma_available() {
3165            eprintln!(
3166                "SKIPPED: test_g2d_resize - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3167            );
3168            return;
3169        }
3170
3171        let dst_width = 640;
3172        let dst_height = 360;
3173        let file = include_bytes!(concat!(
3174            env!("CARGO_MANIFEST_DIR"),
3175            "/../../testdata/zidane.jpg"
3176        ))
3177        .to_vec();
3178        let src =
3179            crate::load_image(&file, Some(PixelFormat::Rgba), Some(TensorMemory::Dma)).unwrap();
3180
3181        let g2d_dst = TensorDyn::image(
3182            dst_width,
3183            dst_height,
3184            PixelFormat::Rgba,
3185            DType::U8,
3186            Some(TensorMemory::Dma),
3187        )
3188        .unwrap();
3189        let mut g2d_converter = G2DProcessor::new().unwrap();
3190        let (result, src, g2d_dst) = convert_img(
3191            &mut g2d_converter,
3192            src,
3193            g2d_dst,
3194            Rotation::None,
3195            Flip::None,
3196            Crop::no_crop(),
3197        );
3198        result.unwrap();
3199
3200        let cpu_dst =
3201            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3202        let mut cpu_converter = CPUProcessor::new();
3203        let (result, _src, cpu_dst) = convert_img(
3204            &mut cpu_converter,
3205            src,
3206            cpu_dst,
3207            Rotation::None,
3208            Flip::None,
3209            Crop::no_crop(),
3210        );
3211        result.unwrap();
3212
3213        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
3214    }
3215
3216    #[test]
3217    #[cfg(target_os = "linux")]
3218    #[cfg(feature = "opengl")]
3219    fn test_opengl_resize() {
3220        if !is_opengl_available() {
3221            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3222            return;
3223        }
3224
3225        let dst_width = 640;
3226        let dst_height = 360;
3227        let file = include_bytes!(concat!(
3228            env!("CARGO_MANIFEST_DIR"),
3229            "/../../testdata/zidane.jpg"
3230        ))
3231        .to_vec();
3232        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3233
3234        let cpu_dst =
3235            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3236        let mut cpu_converter = CPUProcessor::new();
3237        let (result, src, cpu_dst) = convert_img(
3238            &mut cpu_converter,
3239            src,
3240            cpu_dst,
3241            Rotation::None,
3242            Flip::None,
3243            Crop::no_crop(),
3244        );
3245        result.unwrap();
3246
3247        let mut src = src;
3248        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3249
3250        for _ in 0..5 {
3251            let gl_dst =
3252                TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None)
3253                    .unwrap();
3254            let (result, src_back, gl_dst) = convert_img(
3255                &mut gl_converter,
3256                src,
3257                gl_dst,
3258                Rotation::None,
3259                Flip::None,
3260                Crop::no_crop(),
3261            );
3262            result.unwrap();
3263            src = src_back;
3264
3265            compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3266        }
3267    }
3268
3269    #[test]
3270    #[cfg(target_os = "linux")]
3271    #[cfg(feature = "opengl")]
3272    fn test_opengl_10_threads() {
3273        if !is_opengl_available() {
3274            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3275            return;
3276        }
3277
3278        let handles: Vec<_> = (0..10)
3279            .map(|i| {
3280                std::thread::Builder::new()
3281                    .name(format!("Thread {i}"))
3282                    .spawn(test_opengl_resize)
3283                    .unwrap()
3284            })
3285            .collect();
3286        handles.into_iter().for_each(|h| {
3287            if let Err(e) = h.join() {
3288                std::panic::resume_unwind(e)
3289            }
3290        });
3291    }
3292
3293    #[test]
3294    #[cfg(target_os = "linux")]
3295    #[cfg(feature = "opengl")]
3296    fn test_opengl_grey() {
3297        if !is_opengl_available() {
3298            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3299            return;
3300        }
3301
3302        let img = crate::load_image(
3303            include_bytes!(concat!(
3304                env!("CARGO_MANIFEST_DIR"),
3305                "/../../testdata/grey.jpg"
3306            )),
3307            Some(PixelFormat::Grey),
3308            None,
3309        )
3310        .unwrap();
3311
3312        let gl_dst = TensorDyn::image(640, 640, PixelFormat::Grey, DType::U8, None).unwrap();
3313        let cpu_dst = TensorDyn::image(640, 640, PixelFormat::Grey, DType::U8, None).unwrap();
3314
3315        let mut converter = CPUProcessor::new();
3316
3317        let (result, img, cpu_dst) = convert_img(
3318            &mut converter,
3319            img,
3320            cpu_dst,
3321            Rotation::None,
3322            Flip::None,
3323            Crop::no_crop(),
3324        );
3325        result.unwrap();
3326
3327        let mut gl = GLProcessorThreaded::new(None).unwrap();
3328        let (result, _img, gl_dst) = convert_img(
3329            &mut gl,
3330            img,
3331            gl_dst,
3332            Rotation::None,
3333            Flip::None,
3334            Crop::no_crop(),
3335        );
3336        result.unwrap();
3337
3338        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3339    }
3340
3341    #[test]
3342    #[cfg(target_os = "linux")]
3343    fn test_g2d_src_crop() {
3344        if !is_g2d_available() {
3345            eprintln!("SKIPPED: test_g2d_src_crop - G2D library (libg2d.so.2) not available");
3346            return;
3347        }
3348        if !is_dma_available() {
3349            eprintln!(
3350                "SKIPPED: test_g2d_src_crop - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3351            );
3352            return;
3353        }
3354
3355        let dst_width = 640;
3356        let dst_height = 640;
3357        let file = include_bytes!(concat!(
3358            env!("CARGO_MANIFEST_DIR"),
3359            "/../../testdata/zidane.jpg"
3360        ))
3361        .to_vec();
3362        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3363
3364        let cpu_dst =
3365            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3366        let mut cpu_converter = CPUProcessor::new();
3367        let crop = Crop {
3368            src_rect: Some(Rect {
3369                left: 0,
3370                top: 0,
3371                width: 640,
3372                height: 360,
3373            }),
3374            dst_rect: None,
3375            dst_color: None,
3376        };
3377        let (result, src, cpu_dst) = convert_img(
3378            &mut cpu_converter,
3379            src,
3380            cpu_dst,
3381            Rotation::None,
3382            Flip::None,
3383            crop,
3384        );
3385        result.unwrap();
3386
3387        let g2d_dst =
3388            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3389        let mut g2d_converter = G2DProcessor::new().unwrap();
3390        let (result, _src, g2d_dst) = convert_img(
3391            &mut g2d_converter,
3392            src,
3393            g2d_dst,
3394            Rotation::None,
3395            Flip::None,
3396            crop,
3397        );
3398        result.unwrap();
3399
3400        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
3401    }
3402
3403    #[test]
3404    #[cfg(target_os = "linux")]
3405    fn test_g2d_dst_crop() {
3406        if !is_g2d_available() {
3407            eprintln!("SKIPPED: test_g2d_dst_crop - G2D library (libg2d.so.2) not available");
3408            return;
3409        }
3410        if !is_dma_available() {
3411            eprintln!(
3412                "SKIPPED: test_g2d_dst_crop - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3413            );
3414            return;
3415        }
3416
3417        let dst_width = 640;
3418        let dst_height = 640;
3419        let file = include_bytes!(concat!(
3420            env!("CARGO_MANIFEST_DIR"),
3421            "/../../testdata/zidane.jpg"
3422        ))
3423        .to_vec();
3424        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3425
3426        let cpu_dst =
3427            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3428        let mut cpu_converter = CPUProcessor::new();
3429        let crop = Crop {
3430            src_rect: None,
3431            dst_rect: Some(Rect::new(100, 100, 512, 288)),
3432            dst_color: None,
3433        };
3434        let (result, src, cpu_dst) = convert_img(
3435            &mut cpu_converter,
3436            src,
3437            cpu_dst,
3438            Rotation::None,
3439            Flip::None,
3440            crop,
3441        );
3442        result.unwrap();
3443
3444        let g2d_dst =
3445            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3446        let mut g2d_converter = G2DProcessor::new().unwrap();
3447        let (result, _src, g2d_dst) = convert_img(
3448            &mut g2d_converter,
3449            src,
3450            g2d_dst,
3451            Rotation::None,
3452            Flip::None,
3453            crop,
3454        );
3455        result.unwrap();
3456
3457        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
3458    }
3459
3460    #[test]
3461    #[cfg(target_os = "linux")]
3462    fn test_g2d_all_rgba() {
3463        if !is_g2d_available() {
3464            eprintln!("SKIPPED: test_g2d_all_rgba - G2D library (libg2d.so.2) not available");
3465            return;
3466        }
3467        if !is_dma_available() {
3468            eprintln!(
3469                "SKIPPED: test_g2d_all_rgba - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3470            );
3471            return;
3472        }
3473
3474        let dst_width = 640;
3475        let dst_height = 640;
3476        let file = include_bytes!(concat!(
3477            env!("CARGO_MANIFEST_DIR"),
3478            "/../../testdata/zidane.jpg"
3479        ))
3480        .to_vec();
3481        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3482        let src_dyn = src;
3483
3484        let mut cpu_dst =
3485            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3486        let mut cpu_converter = CPUProcessor::new();
3487        let mut g2d_dst =
3488            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3489        let mut g2d_converter = G2DProcessor::new().unwrap();
3490
3491        let crop = Crop {
3492            src_rect: Some(Rect::new(50, 120, 1024, 576)),
3493            dst_rect: Some(Rect::new(100, 100, 512, 288)),
3494            dst_color: None,
3495        };
3496
3497        for rot in [
3498            Rotation::None,
3499            Rotation::Clockwise90,
3500            Rotation::Rotate180,
3501            Rotation::CounterClockwise90,
3502        ] {
3503            cpu_dst
3504                .as_u8()
3505                .unwrap()
3506                .map()
3507                .unwrap()
3508                .as_mut_slice()
3509                .fill(114);
3510            g2d_dst
3511                .as_u8()
3512                .unwrap()
3513                .map()
3514                .unwrap()
3515                .as_mut_slice()
3516                .fill(114);
3517            for flip in [Flip::None, Flip::Horizontal, Flip::Vertical] {
3518                let mut cpu_dst_dyn = cpu_dst;
3519                cpu_converter
3520                    .convert(&src_dyn, &mut cpu_dst_dyn, Rotation::None, Flip::None, crop)
3521                    .unwrap();
3522                cpu_dst = {
3523                    let mut __t = cpu_dst_dyn.into_u8().unwrap();
3524                    __t.set_format(PixelFormat::Rgba).unwrap();
3525                    TensorDyn::from(__t)
3526                };
3527
3528                let mut g2d_dst_dyn = g2d_dst;
3529                g2d_converter
3530                    .convert(&src_dyn, &mut g2d_dst_dyn, Rotation::None, Flip::None, crop)
3531                    .unwrap();
3532                g2d_dst = {
3533                    let mut __t = g2d_dst_dyn.into_u8().unwrap();
3534                    __t.set_format(PixelFormat::Rgba).unwrap();
3535                    TensorDyn::from(__t)
3536                };
3537
3538                compare_images(
3539                    &g2d_dst,
3540                    &cpu_dst,
3541                    0.98,
3542                    &format!("{} {:?} {:?}", function!(), rot, flip),
3543                );
3544            }
3545        }
3546    }
3547
3548    #[test]
3549    #[cfg(target_os = "linux")]
3550    #[cfg(feature = "opengl")]
3551    fn test_opengl_src_crop() {
3552        if !is_opengl_available() {
3553            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3554            return;
3555        }
3556
3557        let dst_width = 640;
3558        let dst_height = 360;
3559        let file = include_bytes!(concat!(
3560            env!("CARGO_MANIFEST_DIR"),
3561            "/../../testdata/zidane.jpg"
3562        ))
3563        .to_vec();
3564        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3565        let crop = Crop {
3566            src_rect: Some(Rect {
3567                left: 320,
3568                top: 180,
3569                width: 1280 - 320,
3570                height: 720 - 180,
3571            }),
3572            dst_rect: None,
3573            dst_color: None,
3574        };
3575
3576        let cpu_dst =
3577            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3578        let mut cpu_converter = CPUProcessor::new();
3579        let (result, src, cpu_dst) = convert_img(
3580            &mut cpu_converter,
3581            src,
3582            cpu_dst,
3583            Rotation::None,
3584            Flip::None,
3585            crop,
3586        );
3587        result.unwrap();
3588
3589        let gl_dst =
3590            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3591        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3592        let (result, _src, gl_dst) = convert_img(
3593            &mut gl_converter,
3594            src,
3595            gl_dst,
3596            Rotation::None,
3597            Flip::None,
3598            crop,
3599        );
3600        result.unwrap();
3601
3602        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3603    }
3604
3605    #[test]
3606    #[cfg(target_os = "linux")]
3607    #[cfg(feature = "opengl")]
3608    fn test_opengl_dst_crop() {
3609        if !is_opengl_available() {
3610            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3611            return;
3612        }
3613
3614        let dst_width = 640;
3615        let dst_height = 640;
3616        let file = include_bytes!(concat!(
3617            env!("CARGO_MANIFEST_DIR"),
3618            "/../../testdata/zidane.jpg"
3619        ))
3620        .to_vec();
3621        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3622
3623        let cpu_dst =
3624            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3625        let mut cpu_converter = CPUProcessor::new();
3626        let crop = Crop {
3627            src_rect: None,
3628            dst_rect: Some(Rect::new(100, 100, 512, 288)),
3629            dst_color: None,
3630        };
3631        let (result, src, cpu_dst) = convert_img(
3632            &mut cpu_converter,
3633            src,
3634            cpu_dst,
3635            Rotation::None,
3636            Flip::None,
3637            crop,
3638        );
3639        result.unwrap();
3640
3641        let gl_dst =
3642            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3643        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3644        let (result, _src, gl_dst) = convert_img(
3645            &mut gl_converter,
3646            src,
3647            gl_dst,
3648            Rotation::None,
3649            Flip::None,
3650            crop,
3651        );
3652        result.unwrap();
3653
3654        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3655    }
3656
3657    #[test]
3658    #[cfg(target_os = "linux")]
3659    #[cfg(feature = "opengl")]
3660    fn test_opengl_all_rgba() {
3661        if !is_opengl_available() {
3662            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3663            return;
3664        }
3665
3666        let dst_width = 640;
3667        let dst_height = 640;
3668        let file = include_bytes!(concat!(
3669            env!("CARGO_MANIFEST_DIR"),
3670            "/../../testdata/zidane.jpg"
3671        ))
3672        .to_vec();
3673
3674        let mut cpu_converter = CPUProcessor::new();
3675
3676        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3677
3678        let mut mem = vec![None, Some(TensorMemory::Mem), Some(TensorMemory::Shm)];
3679        if is_dma_available() {
3680            mem.push(Some(TensorMemory::Dma));
3681        }
3682        let crop = Crop {
3683            src_rect: Some(Rect::new(50, 120, 1024, 576)),
3684            dst_rect: Some(Rect::new(100, 100, 512, 288)),
3685            dst_color: None,
3686        };
3687        for m in mem {
3688            let src = crate::load_image(&file, Some(PixelFormat::Rgba), m).unwrap();
3689            let src_dyn = src;
3690
3691            for rot in [
3692                Rotation::None,
3693                Rotation::Clockwise90,
3694                Rotation::Rotate180,
3695                Rotation::CounterClockwise90,
3696            ] {
3697                for flip in [Flip::None, Flip::Horizontal, Flip::Vertical] {
3698                    let cpu_dst =
3699                        TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, m)
3700                            .unwrap();
3701                    let gl_dst =
3702                        TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, m)
3703                            .unwrap();
3704                    cpu_dst
3705                        .as_u8()
3706                        .unwrap()
3707                        .map()
3708                        .unwrap()
3709                        .as_mut_slice()
3710                        .fill(114);
3711                    gl_dst
3712                        .as_u8()
3713                        .unwrap()
3714                        .map()
3715                        .unwrap()
3716                        .as_mut_slice()
3717                        .fill(114);
3718
3719                    let mut cpu_dst_dyn = cpu_dst;
3720                    cpu_converter
3721                        .convert(&src_dyn, &mut cpu_dst_dyn, Rotation::None, Flip::None, crop)
3722                        .unwrap();
3723                    let cpu_dst = {
3724                        let mut __t = cpu_dst_dyn.into_u8().unwrap();
3725                        __t.set_format(PixelFormat::Rgba).unwrap();
3726                        TensorDyn::from(__t)
3727                    };
3728
3729                    let mut gl_dst_dyn = gl_dst;
3730                    gl_converter
3731                        .convert(&src_dyn, &mut gl_dst_dyn, Rotation::None, Flip::None, crop)
3732                        .map_err(|e| {
3733                            log::error!("error mem {m:?} rot {rot:?} error: {e:?}");
3734                            e
3735                        })
3736                        .unwrap();
3737                    let gl_dst = {
3738                        let mut __t = gl_dst_dyn.into_u8().unwrap();
3739                        __t.set_format(PixelFormat::Rgba).unwrap();
3740                        TensorDyn::from(__t)
3741                    };
3742
3743                    compare_images(
3744                        &gl_dst,
3745                        &cpu_dst,
3746                        0.98,
3747                        &format!("{} {:?} {:?}", function!(), rot, flip),
3748                    );
3749                }
3750            }
3751        }
3752    }
3753
3754    #[test]
3755    #[cfg(target_os = "linux")]
3756    fn test_cpu_rotate() {
3757        for rot in [
3758            Rotation::Clockwise90,
3759            Rotation::Rotate180,
3760            Rotation::CounterClockwise90,
3761        ] {
3762            test_cpu_rotate_(rot);
3763        }
3764    }
3765
3766    #[cfg(target_os = "linux")]
3767    fn test_cpu_rotate_(rot: Rotation) {
3768        // This test rotates the image 4 times and checks that the image was returned to
3769        // be the same Currently doesn't check if rotations actually rotated in
3770        // right direction
3771        let file = include_bytes!(concat!(
3772            env!("CARGO_MANIFEST_DIR"),
3773            "/../../testdata/zidane.jpg"
3774        ))
3775        .to_vec();
3776
3777        let unchanged_src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3778        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3779
3780        let (dst_width, dst_height) = match rot {
3781            Rotation::None | Rotation::Rotate180 => (src.width().unwrap(), src.height().unwrap()),
3782            Rotation::Clockwise90 | Rotation::CounterClockwise90 => {
3783                (src.height().unwrap(), src.width().unwrap())
3784            }
3785        };
3786
3787        let cpu_dst =
3788            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3789        let mut cpu_converter = CPUProcessor::new();
3790
3791        // After rotating 4 times, the image should be the same as the original
3792
3793        let (result, src, cpu_dst) = convert_img(
3794            &mut cpu_converter,
3795            src,
3796            cpu_dst,
3797            rot,
3798            Flip::None,
3799            Crop::no_crop(),
3800        );
3801        result.unwrap();
3802
3803        let (result, cpu_dst, src) = convert_img(
3804            &mut cpu_converter,
3805            cpu_dst,
3806            src,
3807            rot,
3808            Flip::None,
3809            Crop::no_crop(),
3810        );
3811        result.unwrap();
3812
3813        let (result, src, cpu_dst) = convert_img(
3814            &mut cpu_converter,
3815            src,
3816            cpu_dst,
3817            rot,
3818            Flip::None,
3819            Crop::no_crop(),
3820        );
3821        result.unwrap();
3822
3823        let (result, _cpu_dst, src) = convert_img(
3824            &mut cpu_converter,
3825            cpu_dst,
3826            src,
3827            rot,
3828            Flip::None,
3829            Crop::no_crop(),
3830        );
3831        result.unwrap();
3832
3833        compare_images(&src, &unchanged_src, 0.98, function!());
3834    }
3835
3836    #[test]
3837    #[cfg(target_os = "linux")]
3838    #[cfg(feature = "opengl")]
3839    fn test_opengl_rotate() {
3840        if !is_opengl_available() {
3841            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3842            return;
3843        }
3844
3845        let size = (1280, 720);
3846        let mut mem = vec![None, Some(TensorMemory::Shm), Some(TensorMemory::Mem)];
3847
3848        if is_dma_available() {
3849            mem.push(Some(TensorMemory::Dma));
3850        }
3851        for m in mem {
3852            for rot in [
3853                Rotation::Clockwise90,
3854                Rotation::Rotate180,
3855                Rotation::CounterClockwise90,
3856            ] {
3857                test_opengl_rotate_(size, rot, m);
3858            }
3859        }
3860    }
3861
3862    #[cfg(target_os = "linux")]
3863    #[cfg(feature = "opengl")]
3864    fn test_opengl_rotate_(
3865        size: (usize, usize),
3866        rot: Rotation,
3867        tensor_memory: Option<TensorMemory>,
3868    ) {
3869        let (dst_width, dst_height) = match rot {
3870            Rotation::None | Rotation::Rotate180 => size,
3871            Rotation::Clockwise90 | Rotation::CounterClockwise90 => (size.1, size.0),
3872        };
3873
3874        let file = include_bytes!(concat!(
3875            env!("CARGO_MANIFEST_DIR"),
3876            "/../../testdata/zidane.jpg"
3877        ))
3878        .to_vec();
3879        let src = crate::load_image(&file, Some(PixelFormat::Rgba), tensor_memory).unwrap();
3880
3881        let cpu_dst =
3882            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3883        let mut cpu_converter = CPUProcessor::new();
3884
3885        let (result, mut src, cpu_dst) = convert_img(
3886            &mut cpu_converter,
3887            src,
3888            cpu_dst,
3889            rot,
3890            Flip::None,
3891            Crop::no_crop(),
3892        );
3893        result.unwrap();
3894
3895        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3896
3897        for _ in 0..5 {
3898            let gl_dst = TensorDyn::image(
3899                dst_width,
3900                dst_height,
3901                PixelFormat::Rgba,
3902                DType::U8,
3903                tensor_memory,
3904            )
3905            .unwrap();
3906            let (result, src_back, gl_dst) = convert_img(
3907                &mut gl_converter,
3908                src,
3909                gl_dst,
3910                rot,
3911                Flip::None,
3912                Crop::no_crop(),
3913            );
3914            result.unwrap();
3915            src = src_back;
3916            compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3917        }
3918    }
3919
3920    #[test]
3921    #[cfg(target_os = "linux")]
3922    fn test_g2d_rotate() {
3923        if !is_g2d_available() {
3924            eprintln!("SKIPPED: test_g2d_rotate - G2D library (libg2d.so.2) not available");
3925            return;
3926        }
3927        if !is_dma_available() {
3928            eprintln!(
3929                "SKIPPED: test_g2d_rotate - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3930            );
3931            return;
3932        }
3933
3934        let size = (1280, 720);
3935        for rot in [
3936            Rotation::Clockwise90,
3937            Rotation::Rotate180,
3938            Rotation::CounterClockwise90,
3939        ] {
3940            test_g2d_rotate_(size, rot);
3941        }
3942    }
3943
3944    #[cfg(target_os = "linux")]
3945    fn test_g2d_rotate_(size: (usize, usize), rot: Rotation) {
3946        let (dst_width, dst_height) = match rot {
3947            Rotation::None | Rotation::Rotate180 => size,
3948            Rotation::Clockwise90 | Rotation::CounterClockwise90 => (size.1, size.0),
3949        };
3950
3951        let file = include_bytes!(concat!(
3952            env!("CARGO_MANIFEST_DIR"),
3953            "/../../testdata/zidane.jpg"
3954        ))
3955        .to_vec();
3956        let src =
3957            crate::load_image(&file, Some(PixelFormat::Rgba), Some(TensorMemory::Dma)).unwrap();
3958
3959        let cpu_dst =
3960            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3961        let mut cpu_converter = CPUProcessor::new();
3962
3963        let (result, src, cpu_dst) = convert_img(
3964            &mut cpu_converter,
3965            src,
3966            cpu_dst,
3967            rot,
3968            Flip::None,
3969            Crop::no_crop(),
3970        );
3971        result.unwrap();
3972
3973        let g2d_dst = TensorDyn::image(
3974            dst_width,
3975            dst_height,
3976            PixelFormat::Rgba,
3977            DType::U8,
3978            Some(TensorMemory::Dma),
3979        )
3980        .unwrap();
3981        let mut g2d_converter = G2DProcessor::new().unwrap();
3982
3983        let (result, _src, g2d_dst) = convert_img(
3984            &mut g2d_converter,
3985            src,
3986            g2d_dst,
3987            rot,
3988            Flip::None,
3989            Crop::no_crop(),
3990        );
3991        result.unwrap();
3992
3993        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
3994    }
3995
3996    #[test]
3997    fn test_rgba_to_yuyv_resize_cpu() {
3998        let src = load_bytes_to_tensor(
3999            1280,
4000            720,
4001            PixelFormat::Rgba,
4002            None,
4003            include_bytes!(concat!(
4004                env!("CARGO_MANIFEST_DIR"),
4005                "/../../testdata/camera720p.rgba"
4006            )),
4007        )
4008        .unwrap();
4009
4010        let (dst_width, dst_height) = (640, 360);
4011
4012        let dst =
4013            TensorDyn::image(dst_width, dst_height, PixelFormat::Yuyv, DType::U8, None).unwrap();
4014
4015        let dst_through_yuyv =
4016            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4017        let dst_direct =
4018            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4019
4020        let mut cpu_converter = CPUProcessor::new();
4021
4022        let (result, src, dst) = convert_img(
4023            &mut cpu_converter,
4024            src,
4025            dst,
4026            Rotation::None,
4027            Flip::None,
4028            Crop::no_crop(),
4029        );
4030        result.unwrap();
4031
4032        let (result, _dst, dst_through_yuyv) = convert_img(
4033            &mut cpu_converter,
4034            dst,
4035            dst_through_yuyv,
4036            Rotation::None,
4037            Flip::None,
4038            Crop::no_crop(),
4039        );
4040        result.unwrap();
4041
4042        let (result, _src, dst_direct) = convert_img(
4043            &mut cpu_converter,
4044            src,
4045            dst_direct,
4046            Rotation::None,
4047            Flip::None,
4048            Crop::no_crop(),
4049        );
4050        result.unwrap();
4051
4052        compare_images(&dst_through_yuyv, &dst_direct, 0.98, function!());
4053    }
4054
4055    #[test]
4056    #[cfg(target_os = "linux")]
4057    #[cfg(feature = "opengl")]
4058    #[ignore = "opengl doesn't support rendering to PixelFormat::Yuyv texture"]
4059    fn test_rgba_to_yuyv_resize_opengl() {
4060        if !is_opengl_available() {
4061            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4062            return;
4063        }
4064
4065        if !is_dma_available() {
4066            eprintln!(
4067                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
4068                function!()
4069            );
4070            return;
4071        }
4072
4073        let src = load_bytes_to_tensor(
4074            1280,
4075            720,
4076            PixelFormat::Rgba,
4077            None,
4078            include_bytes!(concat!(
4079                env!("CARGO_MANIFEST_DIR"),
4080                "/../../testdata/camera720p.rgba"
4081            )),
4082        )
4083        .unwrap();
4084
4085        let (dst_width, dst_height) = (640, 360);
4086
4087        let dst = TensorDyn::image(
4088            dst_width,
4089            dst_height,
4090            PixelFormat::Yuyv,
4091            DType::U8,
4092            Some(TensorMemory::Dma),
4093        )
4094        .unwrap();
4095
4096        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4097
4098        let (result, src, dst) = convert_img(
4099            &mut gl_converter,
4100            src,
4101            dst,
4102            Rotation::None,
4103            Flip::None,
4104            Crop::new()
4105                .with_dst_rect(Some(Rect::new(100, 100, 100, 100)))
4106                .with_dst_color(Some([255, 255, 255, 255])),
4107        );
4108        result.unwrap();
4109
4110        std::fs::write(
4111            "rgba_to_yuyv_opengl.yuyv",
4112            dst.as_u8().unwrap().map().unwrap().as_slice(),
4113        )
4114        .unwrap();
4115        let cpu_dst = TensorDyn::image(
4116            dst_width,
4117            dst_height,
4118            PixelFormat::Yuyv,
4119            DType::U8,
4120            Some(TensorMemory::Dma),
4121        )
4122        .unwrap();
4123        let (result, _src, cpu_dst) = convert_img(
4124            &mut CPUProcessor::new(),
4125            src,
4126            cpu_dst,
4127            Rotation::None,
4128            Flip::None,
4129            Crop::no_crop(),
4130        );
4131        result.unwrap();
4132
4133        compare_images_convert_to_rgb(&dst, &cpu_dst, 0.98, function!());
4134    }
4135
4136    #[test]
4137    #[cfg(target_os = "linux")]
4138    fn test_rgba_to_yuyv_resize_g2d() {
4139        if !is_g2d_available() {
4140            eprintln!(
4141                "SKIPPED: test_rgba_to_yuyv_resize_g2d - G2D library (libg2d.so.2) not available"
4142            );
4143            return;
4144        }
4145        if !is_dma_available() {
4146            eprintln!(
4147                "SKIPPED: test_rgba_to_yuyv_resize_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4148            );
4149            return;
4150        }
4151
4152        let src = load_bytes_to_tensor(
4153            1280,
4154            720,
4155            PixelFormat::Rgba,
4156            Some(TensorMemory::Dma),
4157            include_bytes!(concat!(
4158                env!("CARGO_MANIFEST_DIR"),
4159                "/../../testdata/camera720p.rgba"
4160            )),
4161        )
4162        .unwrap();
4163
4164        let (dst_width, dst_height) = (1280, 720);
4165
4166        let cpu_dst = TensorDyn::image(
4167            dst_width,
4168            dst_height,
4169            PixelFormat::Yuyv,
4170            DType::U8,
4171            Some(TensorMemory::Dma),
4172        )
4173        .unwrap();
4174
4175        let g2d_dst = TensorDyn::image(
4176            dst_width,
4177            dst_height,
4178            PixelFormat::Yuyv,
4179            DType::U8,
4180            Some(TensorMemory::Dma),
4181        )
4182        .unwrap();
4183
4184        let mut g2d_converter = G2DProcessor::new().unwrap();
4185        let crop = Crop {
4186            src_rect: None,
4187            dst_rect: Some(Rect::new(100, 100, 2, 2)),
4188            dst_color: None,
4189        };
4190
4191        g2d_dst
4192            .as_u8()
4193            .unwrap()
4194            .map()
4195            .unwrap()
4196            .as_mut_slice()
4197            .fill(128);
4198        let (result, src, g2d_dst) = convert_img(
4199            &mut g2d_converter,
4200            src,
4201            g2d_dst,
4202            Rotation::None,
4203            Flip::None,
4204            crop,
4205        );
4206        result.unwrap();
4207
4208        let cpu_dst_img = cpu_dst;
4209        cpu_dst_img
4210            .as_u8()
4211            .unwrap()
4212            .map()
4213            .unwrap()
4214            .as_mut_slice()
4215            .fill(128);
4216        let (result, _src, cpu_dst) = convert_img(
4217            &mut CPUProcessor::new(),
4218            src,
4219            cpu_dst_img,
4220            Rotation::None,
4221            Flip::None,
4222            crop,
4223        );
4224        result.unwrap();
4225
4226        compare_images_convert_to_rgb(&cpu_dst, &g2d_dst, 0.98, function!());
4227    }
4228
4229    #[test]
4230    fn test_yuyv_to_rgba_cpu() {
4231        let file = include_bytes!(concat!(
4232            env!("CARGO_MANIFEST_DIR"),
4233            "/../../testdata/camera720p.yuyv"
4234        ))
4235        .to_vec();
4236        let src = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
4237        src.as_u8()
4238            .unwrap()
4239            .map()
4240            .unwrap()
4241            .as_mut_slice()
4242            .copy_from_slice(&file);
4243
4244        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4245        let mut cpu_converter = CPUProcessor::new();
4246
4247        let (result, _src, dst) = convert_img(
4248            &mut cpu_converter,
4249            src,
4250            dst,
4251            Rotation::None,
4252            Flip::None,
4253            Crop::no_crop(),
4254        );
4255        result.unwrap();
4256
4257        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4258        target_image
4259            .as_u8()
4260            .unwrap()
4261            .map()
4262            .unwrap()
4263            .as_mut_slice()
4264            .copy_from_slice(include_bytes!(concat!(
4265                env!("CARGO_MANIFEST_DIR"),
4266                "/../../testdata/camera720p.rgba"
4267            )));
4268
4269        compare_images(&dst, &target_image, 0.98, function!());
4270    }
4271
4272    #[test]
4273    fn test_yuyv_to_rgb_cpu() {
4274        let file = include_bytes!(concat!(
4275            env!("CARGO_MANIFEST_DIR"),
4276            "/../../testdata/camera720p.yuyv"
4277        ))
4278        .to_vec();
4279        let src = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
4280        src.as_u8()
4281            .unwrap()
4282            .map()
4283            .unwrap()
4284            .as_mut_slice()
4285            .copy_from_slice(&file);
4286
4287        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4288        let mut cpu_converter = CPUProcessor::new();
4289
4290        let (result, _src, dst) = convert_img(
4291            &mut cpu_converter,
4292            src,
4293            dst,
4294            Rotation::None,
4295            Flip::None,
4296            Crop::no_crop(),
4297        );
4298        result.unwrap();
4299
4300        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4301        target_image
4302            .as_u8()
4303            .unwrap()
4304            .map()
4305            .unwrap()
4306            .as_mut_slice()
4307            .as_chunks_mut::<3>()
4308            .0
4309            .iter_mut()
4310            .zip(
4311                include_bytes!(concat!(
4312                    env!("CARGO_MANIFEST_DIR"),
4313                    "/../../testdata/camera720p.rgba"
4314                ))
4315                .as_chunks::<4>()
4316                .0,
4317            )
4318            .for_each(|(dst, src)| *dst = [src[0], src[1], src[2]]);
4319
4320        compare_images(&dst, &target_image, 0.98, function!());
4321    }
4322
4323    #[test]
4324    #[cfg(target_os = "linux")]
4325    fn test_yuyv_to_rgba_g2d() {
4326        if !is_g2d_available() {
4327            eprintln!("SKIPPED: test_yuyv_to_rgba_g2d - G2D library (libg2d.so.2) not available");
4328            return;
4329        }
4330        if !is_dma_available() {
4331            eprintln!(
4332                "SKIPPED: test_yuyv_to_rgba_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4333            );
4334            return;
4335        }
4336
4337        let src = load_bytes_to_tensor(
4338            1280,
4339            720,
4340            PixelFormat::Yuyv,
4341            None,
4342            include_bytes!(concat!(
4343                env!("CARGO_MANIFEST_DIR"),
4344                "/../../testdata/camera720p.yuyv"
4345            )),
4346        )
4347        .unwrap();
4348
4349        let dst = TensorDyn::image(
4350            1280,
4351            720,
4352            PixelFormat::Rgba,
4353            DType::U8,
4354            Some(TensorMemory::Dma),
4355        )
4356        .unwrap();
4357        let mut g2d_converter = G2DProcessor::new().unwrap();
4358
4359        let (result, _src, dst) = convert_img(
4360            &mut g2d_converter,
4361            src,
4362            dst,
4363            Rotation::None,
4364            Flip::None,
4365            Crop::no_crop(),
4366        );
4367        result.unwrap();
4368
4369        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4370        target_image
4371            .as_u8()
4372            .unwrap()
4373            .map()
4374            .unwrap()
4375            .as_mut_slice()
4376            .copy_from_slice(include_bytes!(concat!(
4377                env!("CARGO_MANIFEST_DIR"),
4378                "/../../testdata/camera720p.rgba"
4379            )));
4380
4381        compare_images(&dst, &target_image, 0.98, function!());
4382    }
4383
4384    #[test]
4385    #[cfg(target_os = "linux")]
4386    #[cfg(feature = "opengl")]
4387    fn test_yuyv_to_rgba_opengl() {
4388        if !is_opengl_available() {
4389            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4390            return;
4391        }
4392        if !is_dma_available() {
4393            eprintln!(
4394                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
4395                function!()
4396            );
4397            return;
4398        }
4399
4400        let src = load_bytes_to_tensor(
4401            1280,
4402            720,
4403            PixelFormat::Yuyv,
4404            Some(TensorMemory::Dma),
4405            include_bytes!(concat!(
4406                env!("CARGO_MANIFEST_DIR"),
4407                "/../../testdata/camera720p.yuyv"
4408            )),
4409        )
4410        .unwrap();
4411
4412        let dst = TensorDyn::image(
4413            1280,
4414            720,
4415            PixelFormat::Rgba,
4416            DType::U8,
4417            Some(TensorMemory::Dma),
4418        )
4419        .unwrap();
4420        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4421
4422        let (result, _src, dst) = convert_img(
4423            &mut gl_converter,
4424            src,
4425            dst,
4426            Rotation::None,
4427            Flip::None,
4428            Crop::no_crop(),
4429        );
4430        result.unwrap();
4431
4432        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4433        target_image
4434            .as_u8()
4435            .unwrap()
4436            .map()
4437            .unwrap()
4438            .as_mut_slice()
4439            .copy_from_slice(include_bytes!(concat!(
4440                env!("CARGO_MANIFEST_DIR"),
4441                "/../../testdata/camera720p.rgba"
4442            )));
4443
4444        compare_images(&dst, &target_image, 0.98, function!());
4445    }
4446
4447    #[test]
4448    #[cfg(target_os = "linux")]
4449    fn test_yuyv_to_rgb_g2d() {
4450        if !is_g2d_available() {
4451            eprintln!("SKIPPED: test_yuyv_to_rgb_g2d - G2D library (libg2d.so.2) not available");
4452            return;
4453        }
4454        if !is_dma_available() {
4455            eprintln!(
4456                "SKIPPED: test_yuyv_to_rgb_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4457            );
4458            return;
4459        }
4460
4461        let src = load_bytes_to_tensor(
4462            1280,
4463            720,
4464            PixelFormat::Yuyv,
4465            None,
4466            include_bytes!(concat!(
4467                env!("CARGO_MANIFEST_DIR"),
4468                "/../../testdata/camera720p.yuyv"
4469            )),
4470        )
4471        .unwrap();
4472
4473        let g2d_dst = TensorDyn::image(
4474            1280,
4475            720,
4476            PixelFormat::Rgb,
4477            DType::U8,
4478            Some(TensorMemory::Dma),
4479        )
4480        .unwrap();
4481        let mut g2d_converter = G2DProcessor::new().unwrap();
4482
4483        let (result, src, g2d_dst) = convert_img(
4484            &mut g2d_converter,
4485            src,
4486            g2d_dst,
4487            Rotation::None,
4488            Flip::None,
4489            Crop::no_crop(),
4490        );
4491        result.unwrap();
4492
4493        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4494        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
4495
4496        let (result, _src, cpu_dst) = convert_img(
4497            &mut cpu_converter,
4498            src,
4499            cpu_dst,
4500            Rotation::None,
4501            Flip::None,
4502            Crop::no_crop(),
4503        );
4504        result.unwrap();
4505
4506        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
4507    }
4508
4509    #[test]
4510    #[cfg(target_os = "linux")]
4511    fn test_yuyv_to_yuyv_resize_g2d() {
4512        if !is_g2d_available() {
4513            eprintln!(
4514                "SKIPPED: test_yuyv_to_yuyv_resize_g2d - G2D library (libg2d.so.2) not available"
4515            );
4516            return;
4517        }
4518        if !is_dma_available() {
4519            eprintln!(
4520                "SKIPPED: test_yuyv_to_yuyv_resize_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4521            );
4522            return;
4523        }
4524
4525        let src = load_bytes_to_tensor(
4526            1280,
4527            720,
4528            PixelFormat::Yuyv,
4529            None,
4530            include_bytes!(concat!(
4531                env!("CARGO_MANIFEST_DIR"),
4532                "/../../testdata/camera720p.yuyv"
4533            )),
4534        )
4535        .unwrap();
4536
4537        let g2d_dst = TensorDyn::image(
4538            600,
4539            400,
4540            PixelFormat::Yuyv,
4541            DType::U8,
4542            Some(TensorMemory::Dma),
4543        )
4544        .unwrap();
4545        let mut g2d_converter = G2DProcessor::new().unwrap();
4546
4547        let (result, src, g2d_dst) = convert_img(
4548            &mut g2d_converter,
4549            src,
4550            g2d_dst,
4551            Rotation::None,
4552            Flip::None,
4553            Crop::no_crop(),
4554        );
4555        result.unwrap();
4556
4557        let cpu_dst = TensorDyn::image(600, 400, PixelFormat::Yuyv, DType::U8, None).unwrap();
4558        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
4559
4560        let (result, _src, cpu_dst) = convert_img(
4561            &mut cpu_converter,
4562            src,
4563            cpu_dst,
4564            Rotation::None,
4565            Flip::None,
4566            Crop::no_crop(),
4567        );
4568        result.unwrap();
4569
4570        // TODO: compare PixelFormat::Yuyv and PixelFormat::Yuyv images without having to convert them to PixelFormat::Rgb
4571        compare_images_convert_to_rgb(&g2d_dst, &cpu_dst, 0.98, function!());
4572    }
4573
4574    #[test]
4575    fn test_yuyv_to_rgba_resize_cpu() {
4576        let src = load_bytes_to_tensor(
4577            1280,
4578            720,
4579            PixelFormat::Yuyv,
4580            None,
4581            include_bytes!(concat!(
4582                env!("CARGO_MANIFEST_DIR"),
4583                "/../../testdata/camera720p.yuyv"
4584            )),
4585        )
4586        .unwrap();
4587
4588        let (dst_width, dst_height) = (960, 540);
4589
4590        let dst =
4591            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4592        let mut cpu_converter = CPUProcessor::new();
4593
4594        let (result, _src, dst) = convert_img(
4595            &mut cpu_converter,
4596            src,
4597            dst,
4598            Rotation::None,
4599            Flip::None,
4600            Crop::no_crop(),
4601        );
4602        result.unwrap();
4603
4604        let dst_target =
4605            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4606        let src_target = load_bytes_to_tensor(
4607            1280,
4608            720,
4609            PixelFormat::Rgba,
4610            None,
4611            include_bytes!(concat!(
4612                env!("CARGO_MANIFEST_DIR"),
4613                "/../../testdata/camera720p.rgba"
4614            )),
4615        )
4616        .unwrap();
4617        let (result, _src_target, dst_target) = convert_img(
4618            &mut cpu_converter,
4619            src_target,
4620            dst_target,
4621            Rotation::None,
4622            Flip::None,
4623            Crop::no_crop(),
4624        );
4625        result.unwrap();
4626
4627        compare_images(&dst, &dst_target, 0.98, function!());
4628    }
4629
4630    #[test]
4631    #[cfg(target_os = "linux")]
4632    fn test_yuyv_to_rgba_crop_flip_g2d() {
4633        if !is_g2d_available() {
4634            eprintln!(
4635                "SKIPPED: test_yuyv_to_rgba_crop_flip_g2d - G2D library (libg2d.so.2) not available"
4636            );
4637            return;
4638        }
4639        if !is_dma_available() {
4640            eprintln!(
4641                "SKIPPED: test_yuyv_to_rgba_crop_flip_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4642            );
4643            return;
4644        }
4645
4646        let src = load_bytes_to_tensor(
4647            1280,
4648            720,
4649            PixelFormat::Yuyv,
4650            Some(TensorMemory::Dma),
4651            include_bytes!(concat!(
4652                env!("CARGO_MANIFEST_DIR"),
4653                "/../../testdata/camera720p.yuyv"
4654            )),
4655        )
4656        .unwrap();
4657
4658        let (dst_width, dst_height) = (640, 640);
4659
4660        let dst_g2d = TensorDyn::image(
4661            dst_width,
4662            dst_height,
4663            PixelFormat::Rgba,
4664            DType::U8,
4665            Some(TensorMemory::Dma),
4666        )
4667        .unwrap();
4668        let mut g2d_converter = G2DProcessor::new().unwrap();
4669        let crop = Crop {
4670            src_rect: Some(Rect {
4671                left: 20,
4672                top: 15,
4673                width: 400,
4674                height: 300,
4675            }),
4676            dst_rect: None,
4677            dst_color: None,
4678        };
4679
4680        let (result, src, dst_g2d) = convert_img(
4681            &mut g2d_converter,
4682            src,
4683            dst_g2d,
4684            Rotation::None,
4685            Flip::Horizontal,
4686            crop,
4687        );
4688        result.unwrap();
4689
4690        let dst_cpu = TensorDyn::image(
4691            dst_width,
4692            dst_height,
4693            PixelFormat::Rgba,
4694            DType::U8,
4695            Some(TensorMemory::Dma),
4696        )
4697        .unwrap();
4698        let mut cpu_converter = CPUProcessor::new();
4699
4700        let (result, _src, dst_cpu) = convert_img(
4701            &mut cpu_converter,
4702            src,
4703            dst_cpu,
4704            Rotation::None,
4705            Flip::Horizontal,
4706            crop,
4707        );
4708        result.unwrap();
4709        compare_images(&dst_g2d, &dst_cpu, 0.98, function!());
4710    }
4711
4712    #[test]
4713    #[cfg(target_os = "linux")]
4714    #[cfg(feature = "opengl")]
4715    fn test_yuyv_to_rgba_crop_flip_opengl() {
4716        if !is_opengl_available() {
4717            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4718            return;
4719        }
4720
4721        if !is_dma_available() {
4722            eprintln!(
4723                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
4724                function!()
4725            );
4726            return;
4727        }
4728
4729        let src = load_bytes_to_tensor(
4730            1280,
4731            720,
4732            PixelFormat::Yuyv,
4733            Some(TensorMemory::Dma),
4734            include_bytes!(concat!(
4735                env!("CARGO_MANIFEST_DIR"),
4736                "/../../testdata/camera720p.yuyv"
4737            )),
4738        )
4739        .unwrap();
4740
4741        let (dst_width, dst_height) = (640, 640);
4742
4743        let dst_gl = TensorDyn::image(
4744            dst_width,
4745            dst_height,
4746            PixelFormat::Rgba,
4747            DType::U8,
4748            Some(TensorMemory::Dma),
4749        )
4750        .unwrap();
4751        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4752        let crop = Crop {
4753            src_rect: Some(Rect {
4754                left: 20,
4755                top: 15,
4756                width: 400,
4757                height: 300,
4758            }),
4759            dst_rect: None,
4760            dst_color: None,
4761        };
4762
4763        let (result, src, dst_gl) = convert_img(
4764            &mut gl_converter,
4765            src,
4766            dst_gl,
4767            Rotation::None,
4768            Flip::Horizontal,
4769            crop,
4770        );
4771        result.unwrap();
4772
4773        let dst_cpu = TensorDyn::image(
4774            dst_width,
4775            dst_height,
4776            PixelFormat::Rgba,
4777            DType::U8,
4778            Some(TensorMemory::Dma),
4779        )
4780        .unwrap();
4781        let mut cpu_converter = CPUProcessor::new();
4782
4783        let (result, _src, dst_cpu) = convert_img(
4784            &mut cpu_converter,
4785            src,
4786            dst_cpu,
4787            Rotation::None,
4788            Flip::Horizontal,
4789            crop,
4790        );
4791        result.unwrap();
4792        compare_images(&dst_gl, &dst_cpu, 0.98, function!());
4793    }
4794
4795    #[test]
4796    fn test_vyuy_to_rgba_cpu() {
4797        let file = include_bytes!(concat!(
4798            env!("CARGO_MANIFEST_DIR"),
4799            "/../../testdata/camera720p.vyuy"
4800        ))
4801        .to_vec();
4802        let src = TensorDyn::image(1280, 720, PixelFormat::Vyuy, DType::U8, None).unwrap();
4803        src.as_u8()
4804            .unwrap()
4805            .map()
4806            .unwrap()
4807            .as_mut_slice()
4808            .copy_from_slice(&file);
4809
4810        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4811        let mut cpu_converter = CPUProcessor::new();
4812
4813        let (result, _src, dst) = convert_img(
4814            &mut cpu_converter,
4815            src,
4816            dst,
4817            Rotation::None,
4818            Flip::None,
4819            Crop::no_crop(),
4820        );
4821        result.unwrap();
4822
4823        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4824        target_image
4825            .as_u8()
4826            .unwrap()
4827            .map()
4828            .unwrap()
4829            .as_mut_slice()
4830            .copy_from_slice(include_bytes!(concat!(
4831                env!("CARGO_MANIFEST_DIR"),
4832                "/../../testdata/camera720p.rgba"
4833            )));
4834
4835        compare_images(&dst, &target_image, 0.98, function!());
4836    }
4837
4838    #[test]
4839    fn test_vyuy_to_rgb_cpu() {
4840        let file = include_bytes!(concat!(
4841            env!("CARGO_MANIFEST_DIR"),
4842            "/../../testdata/camera720p.vyuy"
4843        ))
4844        .to_vec();
4845        let src = TensorDyn::image(1280, 720, PixelFormat::Vyuy, DType::U8, None).unwrap();
4846        src.as_u8()
4847            .unwrap()
4848            .map()
4849            .unwrap()
4850            .as_mut_slice()
4851            .copy_from_slice(&file);
4852
4853        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4854        let mut cpu_converter = CPUProcessor::new();
4855
4856        let (result, _src, dst) = convert_img(
4857            &mut cpu_converter,
4858            src,
4859            dst,
4860            Rotation::None,
4861            Flip::None,
4862            Crop::no_crop(),
4863        );
4864        result.unwrap();
4865
4866        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4867        target_image
4868            .as_u8()
4869            .unwrap()
4870            .map()
4871            .unwrap()
4872            .as_mut_slice()
4873            .as_chunks_mut::<3>()
4874            .0
4875            .iter_mut()
4876            .zip(
4877                include_bytes!(concat!(
4878                    env!("CARGO_MANIFEST_DIR"),
4879                    "/../../testdata/camera720p.rgba"
4880                ))
4881                .as_chunks::<4>()
4882                .0,
4883            )
4884            .for_each(|(dst, src)| *dst = [src[0], src[1], src[2]]);
4885
4886        compare_images(&dst, &target_image, 0.98, function!());
4887    }
4888
4889    #[test]
4890    #[cfg(target_os = "linux")]
4891    #[ignore = "G2D does not support VYUY; re-enable when hardware support is added"]
4892    fn test_vyuy_to_rgba_g2d() {
4893        if !is_g2d_available() {
4894            eprintln!("SKIPPED: test_vyuy_to_rgba_g2d - G2D library (libg2d.so.2) not available");
4895            return;
4896        }
4897        if !is_dma_available() {
4898            eprintln!(
4899                "SKIPPED: test_vyuy_to_rgba_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4900            );
4901            return;
4902        }
4903
4904        let src = load_bytes_to_tensor(
4905            1280,
4906            720,
4907            PixelFormat::Vyuy,
4908            None,
4909            include_bytes!(concat!(
4910                env!("CARGO_MANIFEST_DIR"),
4911                "/../../testdata/camera720p.vyuy"
4912            )),
4913        )
4914        .unwrap();
4915
4916        let dst = TensorDyn::image(
4917            1280,
4918            720,
4919            PixelFormat::Rgba,
4920            DType::U8,
4921            Some(TensorMemory::Dma),
4922        )
4923        .unwrap();
4924        let mut g2d_converter = G2DProcessor::new().unwrap();
4925
4926        let (result, _src, dst) = convert_img(
4927            &mut g2d_converter,
4928            src,
4929            dst,
4930            Rotation::None,
4931            Flip::None,
4932            Crop::no_crop(),
4933        );
4934        match result {
4935            Err(Error::G2D(_)) => {
4936                eprintln!("SKIPPED: test_vyuy_to_rgba_g2d - G2D does not support PixelFormat::Vyuy format");
4937                return;
4938            }
4939            r => r.unwrap(),
4940        }
4941
4942        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4943        target_image
4944            .as_u8()
4945            .unwrap()
4946            .map()
4947            .unwrap()
4948            .as_mut_slice()
4949            .copy_from_slice(include_bytes!(concat!(
4950                env!("CARGO_MANIFEST_DIR"),
4951                "/../../testdata/camera720p.rgba"
4952            )));
4953
4954        compare_images(&dst, &target_image, 0.98, function!());
4955    }
4956
4957    #[test]
4958    #[cfg(target_os = "linux")]
4959    #[ignore = "G2D does not support VYUY; re-enable when hardware support is added"]
4960    fn test_vyuy_to_rgb_g2d() {
4961        if !is_g2d_available() {
4962            eprintln!("SKIPPED: test_vyuy_to_rgb_g2d - G2D library (libg2d.so.2) not available");
4963            return;
4964        }
4965        if !is_dma_available() {
4966            eprintln!(
4967                "SKIPPED: test_vyuy_to_rgb_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4968            );
4969            return;
4970        }
4971
4972        let src = load_bytes_to_tensor(
4973            1280,
4974            720,
4975            PixelFormat::Vyuy,
4976            None,
4977            include_bytes!(concat!(
4978                env!("CARGO_MANIFEST_DIR"),
4979                "/../../testdata/camera720p.vyuy"
4980            )),
4981        )
4982        .unwrap();
4983
4984        let g2d_dst = TensorDyn::image(
4985            1280,
4986            720,
4987            PixelFormat::Rgb,
4988            DType::U8,
4989            Some(TensorMemory::Dma),
4990        )
4991        .unwrap();
4992        let mut g2d_converter = G2DProcessor::new().unwrap();
4993
4994        let (result, src, g2d_dst) = convert_img(
4995            &mut g2d_converter,
4996            src,
4997            g2d_dst,
4998            Rotation::None,
4999            Flip::None,
5000            Crop::no_crop(),
5001        );
5002        match result {
5003            Err(Error::G2D(_)) => {
5004                eprintln!(
5005                    "SKIPPED: test_vyuy_to_rgb_g2d - G2D does not support PixelFormat::Vyuy format"
5006                );
5007                return;
5008            }
5009            r => r.unwrap(),
5010        }
5011
5012        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5013        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
5014
5015        let (result, _src, cpu_dst) = convert_img(
5016            &mut cpu_converter,
5017            src,
5018            cpu_dst,
5019            Rotation::None,
5020            Flip::None,
5021            Crop::no_crop(),
5022        );
5023        result.unwrap();
5024
5025        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
5026    }
5027
5028    #[test]
5029    #[cfg(target_os = "linux")]
5030    #[cfg(feature = "opengl")]
5031    fn test_vyuy_to_rgba_opengl() {
5032        if !is_opengl_available() {
5033            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5034            return;
5035        }
5036        if !is_dma_available() {
5037            eprintln!(
5038                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
5039                function!()
5040            );
5041            return;
5042        }
5043
5044        let src = load_bytes_to_tensor(
5045            1280,
5046            720,
5047            PixelFormat::Vyuy,
5048            Some(TensorMemory::Dma),
5049            include_bytes!(concat!(
5050                env!("CARGO_MANIFEST_DIR"),
5051                "/../../testdata/camera720p.vyuy"
5052            )),
5053        )
5054        .unwrap();
5055
5056        let dst = TensorDyn::image(
5057            1280,
5058            720,
5059            PixelFormat::Rgba,
5060            DType::U8,
5061            Some(TensorMemory::Dma),
5062        )
5063        .unwrap();
5064        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
5065
5066        let (result, _src, dst) = convert_img(
5067            &mut gl_converter,
5068            src,
5069            dst,
5070            Rotation::None,
5071            Flip::None,
5072            Crop::no_crop(),
5073        );
5074        match result {
5075            Err(Error::NotSupported(_)) => {
5076                eprintln!(
5077                    "SKIPPED: {} - OpenGL does not support PixelFormat::Vyuy DMA format",
5078                    function!()
5079                );
5080                return;
5081            }
5082            r => r.unwrap(),
5083        }
5084
5085        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5086        target_image
5087            .as_u8()
5088            .unwrap()
5089            .map()
5090            .unwrap()
5091            .as_mut_slice()
5092            .copy_from_slice(include_bytes!(concat!(
5093                env!("CARGO_MANIFEST_DIR"),
5094                "/../../testdata/camera720p.rgba"
5095            )));
5096
5097        compare_images(&dst, &target_image, 0.98, function!());
5098    }
5099
5100    #[test]
5101    fn test_nv12_to_rgba_cpu() {
5102        let file = include_bytes!(concat!(
5103            env!("CARGO_MANIFEST_DIR"),
5104            "/../../testdata/zidane.nv12"
5105        ))
5106        .to_vec();
5107        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5108        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5109            .copy_from_slice(&file);
5110
5111        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5112        let mut cpu_converter = CPUProcessor::new();
5113
5114        let (result, _src, dst) = convert_img(
5115            &mut cpu_converter,
5116            src,
5117            dst,
5118            Rotation::None,
5119            Flip::None,
5120            Crop::no_crop(),
5121        );
5122        result.unwrap();
5123
5124        let target_image = crate::load_image(
5125            include_bytes!(concat!(
5126                env!("CARGO_MANIFEST_DIR"),
5127                "/../../testdata/zidane.jpg"
5128            )),
5129            Some(PixelFormat::Rgba),
5130            None,
5131        )
5132        .unwrap();
5133
5134        compare_images(&dst, &target_image, 0.98, function!());
5135    }
5136
5137    #[test]
5138    fn test_nv12_to_rgb_cpu() {
5139        let file = include_bytes!(concat!(
5140            env!("CARGO_MANIFEST_DIR"),
5141            "/../../testdata/zidane.nv12"
5142        ))
5143        .to_vec();
5144        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5145        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5146            .copy_from_slice(&file);
5147
5148        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5149        let mut cpu_converter = CPUProcessor::new();
5150
5151        let (result, _src, dst) = convert_img(
5152            &mut cpu_converter,
5153            src,
5154            dst,
5155            Rotation::None,
5156            Flip::None,
5157            Crop::no_crop(),
5158        );
5159        result.unwrap();
5160
5161        let target_image = crate::load_image(
5162            include_bytes!(concat!(
5163                env!("CARGO_MANIFEST_DIR"),
5164                "/../../testdata/zidane.jpg"
5165            )),
5166            Some(PixelFormat::Rgb),
5167            None,
5168        )
5169        .unwrap();
5170
5171        compare_images(&dst, &target_image, 0.98, function!());
5172    }
5173
5174    #[test]
5175    fn test_nv12_to_grey_cpu() {
5176        let file = include_bytes!(concat!(
5177            env!("CARGO_MANIFEST_DIR"),
5178            "/../../testdata/zidane.nv12"
5179        ))
5180        .to_vec();
5181        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5182        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5183            .copy_from_slice(&file);
5184
5185        let dst = TensorDyn::image(1280, 720, PixelFormat::Grey, DType::U8, None).unwrap();
5186        let mut cpu_converter = CPUProcessor::new();
5187
5188        let (result, _src, dst) = convert_img(
5189            &mut cpu_converter,
5190            src,
5191            dst,
5192            Rotation::None,
5193            Flip::None,
5194            Crop::no_crop(),
5195        );
5196        result.unwrap();
5197
5198        let target_image = crate::load_image(
5199            include_bytes!(concat!(
5200                env!("CARGO_MANIFEST_DIR"),
5201                "/../../testdata/zidane.jpg"
5202            )),
5203            Some(PixelFormat::Grey),
5204            None,
5205        )
5206        .unwrap();
5207
5208        compare_images(&dst, &target_image, 0.98, function!());
5209    }
5210
5211    #[test]
5212    fn test_nv12_to_yuyv_cpu() {
5213        let file = include_bytes!(concat!(
5214            env!("CARGO_MANIFEST_DIR"),
5215            "/../../testdata/zidane.nv12"
5216        ))
5217        .to_vec();
5218        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5219        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5220            .copy_from_slice(&file);
5221
5222        let dst = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
5223        let mut cpu_converter = CPUProcessor::new();
5224
5225        let (result, _src, dst) = convert_img(
5226            &mut cpu_converter,
5227            src,
5228            dst,
5229            Rotation::None,
5230            Flip::None,
5231            Crop::no_crop(),
5232        );
5233        result.unwrap();
5234
5235        let target_image = crate::load_image(
5236            include_bytes!(concat!(
5237                env!("CARGO_MANIFEST_DIR"),
5238                "/../../testdata/zidane.jpg"
5239            )),
5240            Some(PixelFormat::Rgb),
5241            None,
5242        )
5243        .unwrap();
5244
5245        compare_images_convert_to_rgb(&dst, &target_image, 0.98, function!());
5246    }
5247
5248    #[test]
5249    fn test_cpu_resize_planar_rgb() {
5250        let src = TensorDyn::image(4, 4, PixelFormat::Rgba, DType::U8, None).unwrap();
5251        #[rustfmt::skip]
5252        let src_image = [
5253                    255, 0, 0, 255,     0, 255, 0, 255,     0, 0, 255, 255,     255, 255, 0, 255,
5254                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5255                    0, 0, 255, 0,       0, 255, 255, 255,   255, 255, 0, 0,     0, 0, 0, 255,
5256                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5257        ];
5258        src.as_u8()
5259            .unwrap()
5260            .map()
5261            .unwrap()
5262            .as_mut_slice()
5263            .copy_from_slice(&src_image);
5264
5265        let cpu_dst = TensorDyn::image(5, 5, PixelFormat::PlanarRgb, DType::U8, None).unwrap();
5266        let mut cpu_converter = CPUProcessor::new();
5267
5268        let (result, _src, cpu_dst) = convert_img(
5269            &mut cpu_converter,
5270            src,
5271            cpu_dst,
5272            Rotation::None,
5273            Flip::None,
5274            Crop::new()
5275                .with_dst_rect(Some(Rect {
5276                    left: 1,
5277                    top: 1,
5278                    width: 4,
5279                    height: 4,
5280                }))
5281                .with_dst_color(Some([114, 114, 114, 255])),
5282        );
5283        result.unwrap();
5284
5285        #[rustfmt::skip]
5286        let expected_dst = [
5287            114, 114, 114, 114, 114,    114, 255, 0, 0, 255,    114, 255, 0, 255, 255,      114, 0, 0, 255, 0,        114, 255, 0, 255, 255,
5288            114, 114, 114, 114, 114,    114, 0, 255, 0, 255,    114, 0, 0, 0, 0,            114, 0, 255, 255, 0,      114, 0, 0, 0, 0,
5289            114, 114, 114, 114, 114,    114, 0, 0, 255, 0,      114, 0, 0, 255, 255,        114, 255, 255, 0, 0,      114, 0, 0, 255, 255,
5290        ];
5291
5292        assert_eq!(
5293            cpu_dst.as_u8().unwrap().map().unwrap().as_slice(),
5294            &expected_dst
5295        );
5296    }
5297
5298    #[test]
5299    fn test_cpu_resize_planar_rgba() {
5300        let src = TensorDyn::image(4, 4, PixelFormat::Rgba, DType::U8, None).unwrap();
5301        #[rustfmt::skip]
5302        let src_image = [
5303                    255, 0, 0, 255,     0, 255, 0, 255,     0, 0, 255, 255,     255, 255, 0, 255,
5304                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5305                    0, 0, 255, 0,       0, 255, 255, 255,   255, 255, 0, 0,     0, 0, 0, 255,
5306                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5307        ];
5308        src.as_u8()
5309            .unwrap()
5310            .map()
5311            .unwrap()
5312            .as_mut_slice()
5313            .copy_from_slice(&src_image);
5314
5315        let cpu_dst = TensorDyn::image(5, 5, PixelFormat::PlanarRgba, DType::U8, None).unwrap();
5316        let mut cpu_converter = CPUProcessor::new();
5317
5318        let (result, _src, cpu_dst) = convert_img(
5319            &mut cpu_converter,
5320            src,
5321            cpu_dst,
5322            Rotation::None,
5323            Flip::None,
5324            Crop::new()
5325                .with_dst_rect(Some(Rect {
5326                    left: 1,
5327                    top: 1,
5328                    width: 4,
5329                    height: 4,
5330                }))
5331                .with_dst_color(Some([114, 114, 114, 255])),
5332        );
5333        result.unwrap();
5334
5335        #[rustfmt::skip]
5336        let expected_dst = [
5337            114, 114, 114, 114, 114,    114, 255, 0, 0, 255,        114, 255, 0, 255, 255,      114, 0, 0, 255, 0,        114, 255, 0, 255, 255,
5338            114, 114, 114, 114, 114,    114, 0, 255, 0, 255,        114, 0, 0, 0, 0,            114, 0, 255, 255, 0,      114, 0, 0, 0, 0,
5339            114, 114, 114, 114, 114,    114, 0, 0, 255, 0,          114, 0, 0, 255, 255,        114, 255, 255, 0, 0,      114, 0, 0, 255, 255,
5340            255, 255, 255, 255, 255,    255, 255, 255, 255, 255,    255, 0, 255, 0, 255,        255, 0, 255, 0, 255,      255, 0, 255, 0, 255,
5341        ];
5342
5343        assert_eq!(
5344            cpu_dst.as_u8().unwrap().map().unwrap().as_slice(),
5345            &expected_dst
5346        );
5347    }
5348
5349    #[test]
5350    #[cfg(target_os = "linux")]
5351    #[cfg(feature = "opengl")]
5352    fn test_opengl_resize_planar_rgb() {
5353        if !is_opengl_available() {
5354            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5355            return;
5356        }
5357
5358        if !is_dma_available() {
5359            eprintln!(
5360                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
5361                function!()
5362            );
5363            return;
5364        }
5365
5366        let dst_width = 640;
5367        let dst_height = 640;
5368        let file = include_bytes!(concat!(
5369            env!("CARGO_MANIFEST_DIR"),
5370            "/../../testdata/test_image.jpg"
5371        ))
5372        .to_vec();
5373        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
5374
5375        let cpu_dst = TensorDyn::image(
5376            dst_width,
5377            dst_height,
5378            PixelFormat::PlanarRgb,
5379            DType::U8,
5380            None,
5381        )
5382        .unwrap();
5383        let mut cpu_converter = CPUProcessor::new();
5384        let (result, src, cpu_dst) = convert_img(
5385            &mut cpu_converter,
5386            src,
5387            cpu_dst,
5388            Rotation::None,
5389            Flip::None,
5390            Crop::no_crop(),
5391        );
5392        result.unwrap();
5393        let crop_letterbox = Crop::new()
5394            .with_dst_rect(Some(Rect {
5395                left: 102,
5396                top: 102,
5397                width: 440,
5398                height: 440,
5399            }))
5400            .with_dst_color(Some([114, 114, 114, 114]));
5401        let (result, src, cpu_dst) = convert_img(
5402            &mut cpu_converter,
5403            src,
5404            cpu_dst,
5405            Rotation::None,
5406            Flip::None,
5407            crop_letterbox,
5408        );
5409        result.unwrap();
5410
5411        let gl_dst = TensorDyn::image(
5412            dst_width,
5413            dst_height,
5414            PixelFormat::PlanarRgb,
5415            DType::U8,
5416            None,
5417        )
5418        .unwrap();
5419        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
5420
5421        let (result, _src, gl_dst) = convert_img(
5422            &mut gl_converter,
5423            src,
5424            gl_dst,
5425            Rotation::None,
5426            Flip::None,
5427            crop_letterbox,
5428        );
5429        result.unwrap();
5430        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
5431    }
5432
5433    #[test]
5434    fn test_cpu_resize_nv16() {
5435        let file = include_bytes!(concat!(
5436            env!("CARGO_MANIFEST_DIR"),
5437            "/../../testdata/zidane.jpg"
5438        ))
5439        .to_vec();
5440        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
5441
5442        let cpu_nv16_dst = TensorDyn::image(640, 640, PixelFormat::Nv16, DType::U8, None).unwrap();
5443        let cpu_rgb_dst = TensorDyn::image(640, 640, PixelFormat::Rgb, DType::U8, None).unwrap();
5444        let mut cpu_converter = CPUProcessor::new();
5445        let crop = Crop::new()
5446            .with_dst_rect(Some(Rect {
5447                left: 20,
5448                top: 140,
5449                width: 600,
5450                height: 360,
5451            }))
5452            .with_dst_color(Some([255, 128, 0, 255]));
5453
5454        let (result, src, cpu_nv16_dst) = convert_img(
5455            &mut cpu_converter,
5456            src,
5457            cpu_nv16_dst,
5458            Rotation::None,
5459            Flip::None,
5460            crop,
5461        );
5462        result.unwrap();
5463
5464        let (result, _src, cpu_rgb_dst) = convert_img(
5465            &mut cpu_converter,
5466            src,
5467            cpu_rgb_dst,
5468            Rotation::None,
5469            Flip::None,
5470            crop,
5471        );
5472        result.unwrap();
5473        compare_images_convert_to_rgb(&cpu_nv16_dst, &cpu_rgb_dst, 0.99, function!());
5474    }
5475
5476    fn load_bytes_to_tensor(
5477        width: usize,
5478        height: usize,
5479        format: PixelFormat,
5480        memory: Option<TensorMemory>,
5481        bytes: &[u8],
5482    ) -> Result<TensorDyn, Error> {
5483        let src = TensorDyn::image(width, height, format, DType::U8, memory)?;
5484        src.as_u8()
5485            .unwrap()
5486            .map()?
5487            .as_mut_slice()
5488            .copy_from_slice(bytes);
5489        Ok(src)
5490    }
5491
5492    fn compare_images(img1: &TensorDyn, img2: &TensorDyn, threshold: f64, name: &str) {
5493        assert_eq!(img1.height(), img2.height(), "Heights differ");
5494        assert_eq!(img1.width(), img2.width(), "Widths differ");
5495        assert_eq!(
5496            img1.format().unwrap(),
5497            img2.format().unwrap(),
5498            "PixelFormat differ"
5499        );
5500        assert!(
5501            matches!(
5502                img1.format().unwrap(),
5503                PixelFormat::Rgb | PixelFormat::Rgba | PixelFormat::Grey | PixelFormat::PlanarRgb
5504            ),
5505            "format must be Rgb or Rgba for comparison"
5506        );
5507
5508        let image1 = match img1.format().unwrap() {
5509            PixelFormat::Rgb => image::RgbImage::from_vec(
5510                img1.width().unwrap() as u32,
5511                img1.height().unwrap() as u32,
5512                img1.as_u8().unwrap().map().unwrap().to_vec(),
5513            )
5514            .unwrap(),
5515            PixelFormat::Rgba => image::RgbaImage::from_vec(
5516                img1.width().unwrap() as u32,
5517                img1.height().unwrap() as u32,
5518                img1.as_u8().unwrap().map().unwrap().to_vec(),
5519            )
5520            .unwrap()
5521            .convert(),
5522            PixelFormat::Grey => image::GrayImage::from_vec(
5523                img1.width().unwrap() as u32,
5524                img1.height().unwrap() as u32,
5525                img1.as_u8().unwrap().map().unwrap().to_vec(),
5526            )
5527            .unwrap()
5528            .convert(),
5529            PixelFormat::PlanarRgb => image::GrayImage::from_vec(
5530                img1.width().unwrap() as u32,
5531                (img1.height().unwrap() * 3) as u32,
5532                img1.as_u8().unwrap().map().unwrap().to_vec(),
5533            )
5534            .unwrap()
5535            .convert(),
5536            _ => return,
5537        };
5538
5539        let image2 = match img2.format().unwrap() {
5540            PixelFormat::Rgb => image::RgbImage::from_vec(
5541                img2.width().unwrap() as u32,
5542                img2.height().unwrap() as u32,
5543                img2.as_u8().unwrap().map().unwrap().to_vec(),
5544            )
5545            .unwrap(),
5546            PixelFormat::Rgba => image::RgbaImage::from_vec(
5547                img2.width().unwrap() as u32,
5548                img2.height().unwrap() as u32,
5549                img2.as_u8().unwrap().map().unwrap().to_vec(),
5550            )
5551            .unwrap()
5552            .convert(),
5553            PixelFormat::Grey => image::GrayImage::from_vec(
5554                img2.width().unwrap() as u32,
5555                img2.height().unwrap() as u32,
5556                img2.as_u8().unwrap().map().unwrap().to_vec(),
5557            )
5558            .unwrap()
5559            .convert(),
5560            PixelFormat::PlanarRgb => image::GrayImage::from_vec(
5561                img2.width().unwrap() as u32,
5562                (img2.height().unwrap() * 3) as u32,
5563                img2.as_u8().unwrap().map().unwrap().to_vec(),
5564            )
5565            .unwrap()
5566            .convert(),
5567            _ => return,
5568        };
5569
5570        let similarity = image_compare::rgb_similarity_structure(
5571            &image_compare::Algorithm::RootMeanSquared,
5572            &image1,
5573            &image2,
5574        )
5575        .expect("Image Comparison failed");
5576        if similarity.score < threshold {
5577            // image1.save(format!("{name}_1.png"));
5578            // image2.save(format!("{name}_2.png"));
5579            similarity
5580                .image
5581                .to_color_map()
5582                .save(format!("{name}.png"))
5583                .unwrap();
5584            panic!(
5585                "{name}: converted image and target image have similarity score too low: {} < {}",
5586                similarity.score, threshold
5587            )
5588        }
5589    }
5590
5591    fn compare_images_convert_to_rgb(
5592        img1: &TensorDyn,
5593        img2: &TensorDyn,
5594        threshold: f64,
5595        name: &str,
5596    ) {
5597        assert_eq!(img1.height(), img2.height(), "Heights differ");
5598        assert_eq!(img1.width(), img2.width(), "Widths differ");
5599
5600        let mut img_rgb1 = TensorDyn::image(
5601            img1.width().unwrap(),
5602            img1.height().unwrap(),
5603            PixelFormat::Rgb,
5604            DType::U8,
5605            Some(TensorMemory::Mem),
5606        )
5607        .unwrap();
5608        let mut img_rgb2 = TensorDyn::image(
5609            img1.width().unwrap(),
5610            img1.height().unwrap(),
5611            PixelFormat::Rgb,
5612            DType::U8,
5613            Some(TensorMemory::Mem),
5614        )
5615        .unwrap();
5616        let mut __cv = CPUProcessor::default();
5617        let r1 = __cv.convert(
5618            img1,
5619            &mut img_rgb1,
5620            crate::Rotation::None,
5621            crate::Flip::None,
5622            crate::Crop::default(),
5623        );
5624        let r2 = __cv.convert(
5625            img2,
5626            &mut img_rgb2,
5627            crate::Rotation::None,
5628            crate::Flip::None,
5629            crate::Crop::default(),
5630        );
5631        if r1.is_err() || r2.is_err() {
5632            // Fallback: compare raw bytes as greyscale strip
5633            let w = img1.width().unwrap() as u32;
5634            let data1 = img1.as_u8().unwrap().map().unwrap().to_vec();
5635            let data2 = img2.as_u8().unwrap().map().unwrap().to_vec();
5636            let h1 = (data1.len() as u32) / w;
5637            let h2 = (data2.len() as u32) / w;
5638            let g1 = image::GrayImage::from_vec(w, h1, data1).unwrap();
5639            let g2 = image::GrayImage::from_vec(w, h2, data2).unwrap();
5640            let similarity = image_compare::gray_similarity_structure(
5641                &image_compare::Algorithm::RootMeanSquared,
5642                &g1,
5643                &g2,
5644            )
5645            .expect("Image Comparison failed");
5646            if similarity.score < threshold {
5647                panic!(
5648                    "{name}: converted image and target image have similarity score too low: {} < {}",
5649                    similarity.score, threshold
5650                )
5651            }
5652            return;
5653        }
5654
5655        let image1 = image::RgbImage::from_vec(
5656            img_rgb1.width().unwrap() as u32,
5657            img_rgb1.height().unwrap() as u32,
5658            img_rgb1.as_u8().unwrap().map().unwrap().to_vec(),
5659        )
5660        .unwrap();
5661
5662        let image2 = image::RgbImage::from_vec(
5663            img_rgb2.width().unwrap() as u32,
5664            img_rgb2.height().unwrap() as u32,
5665            img_rgb2.as_u8().unwrap().map().unwrap().to_vec(),
5666        )
5667        .unwrap();
5668
5669        let similarity = image_compare::rgb_similarity_structure(
5670            &image_compare::Algorithm::RootMeanSquared,
5671            &image1,
5672            &image2,
5673        )
5674        .expect("Image Comparison failed");
5675        if similarity.score < threshold {
5676            // image1.save(format!("{name}_1.png"));
5677            // image2.save(format!("{name}_2.png"));
5678            similarity
5679                .image
5680                .to_color_map()
5681                .save(format!("{name}.png"))
5682                .unwrap();
5683            panic!(
5684                "{name}: converted image and target image have similarity score too low: {} < {}",
5685                similarity.score, threshold
5686            )
5687        }
5688    }
5689
5690    // =========================================================================
5691    // PixelFormat::Nv12 Format Tests
5692    // =========================================================================
5693
5694    #[test]
5695    fn test_nv12_image_creation() {
5696        let width = 640;
5697        let height = 480;
5698        let img = TensorDyn::image(width, height, PixelFormat::Nv12, DType::U8, None).unwrap();
5699
5700        assert_eq!(img.width(), Some(width));
5701        assert_eq!(img.height(), Some(height));
5702        assert_eq!(img.format().unwrap(), PixelFormat::Nv12);
5703        // PixelFormat::Nv12 uses shape [H*3/2, W] to store Y plane + UV plane
5704        assert_eq!(img.as_u8().unwrap().shape(), &[height * 3 / 2, width]);
5705    }
5706
5707    #[test]
5708    fn test_nv12_channels() {
5709        let img = TensorDyn::image(640, 480, PixelFormat::Nv12, DType::U8, None).unwrap();
5710        // PixelFormat::Nv12.channels() returns 1 (luma plane)
5711        assert_eq!(img.format().unwrap().channels(), 1);
5712    }
5713
5714    // =========================================================================
5715    // Tensor Format Metadata Tests
5716    // =========================================================================
5717
5718    #[test]
5719    fn test_tensor_set_format_planar() {
5720        let mut tensor = Tensor::<u8>::new(&[3, 480, 640], None, None).unwrap();
5721        tensor.set_format(PixelFormat::PlanarRgb).unwrap();
5722        assert_eq!(tensor.format(), Some(PixelFormat::PlanarRgb));
5723        assert_eq!(tensor.width(), Some(640));
5724        assert_eq!(tensor.height(), Some(480));
5725    }
5726
5727    #[test]
5728    fn test_tensor_set_format_interleaved() {
5729        let mut tensor = Tensor::<u8>::new(&[480, 640, 4], None, None).unwrap();
5730        tensor.set_format(PixelFormat::Rgba).unwrap();
5731        assert_eq!(tensor.format(), Some(PixelFormat::Rgba));
5732        assert_eq!(tensor.width(), Some(640));
5733        assert_eq!(tensor.height(), Some(480));
5734    }
5735
5736    #[test]
5737    fn test_tensordyn_image_rgb() {
5738        let img = TensorDyn::image(640, 480, PixelFormat::Rgb, DType::U8, None).unwrap();
5739        assert_eq!(img.width(), Some(640));
5740        assert_eq!(img.height(), Some(480));
5741        assert_eq!(img.format(), Some(PixelFormat::Rgb));
5742    }
5743
5744    #[test]
5745    fn test_tensordyn_image_planar_rgb() {
5746        let img = TensorDyn::image(640, 480, PixelFormat::PlanarRgb, DType::U8, None).unwrap();
5747        assert_eq!(img.width(), Some(640));
5748        assert_eq!(img.height(), Some(480));
5749        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
5750    }
5751
5752    #[test]
5753    fn test_rgb_int8_format() {
5754        // Int8 variant: same PixelFormat::Rgb but with DType::I8
5755        let img = TensorDyn::image(
5756            1280,
5757            720,
5758            PixelFormat::Rgb,
5759            DType::I8,
5760            Some(TensorMemory::Mem),
5761        )
5762        .unwrap();
5763        assert_eq!(img.width(), Some(1280));
5764        assert_eq!(img.height(), Some(720));
5765        assert_eq!(img.format(), Some(PixelFormat::Rgb));
5766        assert_eq!(img.dtype(), DType::I8);
5767    }
5768
5769    #[test]
5770    fn test_planar_rgb_int8_format() {
5771        let img = TensorDyn::image(
5772            1280,
5773            720,
5774            PixelFormat::PlanarRgb,
5775            DType::I8,
5776            Some(TensorMemory::Mem),
5777        )
5778        .unwrap();
5779        assert_eq!(img.width(), Some(1280));
5780        assert_eq!(img.height(), Some(720));
5781        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
5782        assert_eq!(img.dtype(), DType::I8);
5783    }
5784
5785    #[test]
5786    fn test_rgb_from_tensor() {
5787        let mut tensor = Tensor::<u8>::new(&[720, 1280, 3], None, None).unwrap();
5788        tensor.set_format(PixelFormat::Rgb).unwrap();
5789        let img = TensorDyn::from(tensor);
5790        assert_eq!(img.width(), Some(1280));
5791        assert_eq!(img.height(), Some(720));
5792        assert_eq!(img.format(), Some(PixelFormat::Rgb));
5793    }
5794
5795    #[test]
5796    fn test_planar_rgb_from_tensor() {
5797        let mut tensor = Tensor::<u8>::new(&[3, 720, 1280], None, None).unwrap();
5798        tensor.set_format(PixelFormat::PlanarRgb).unwrap();
5799        let img = TensorDyn::from(tensor);
5800        assert_eq!(img.width(), Some(1280));
5801        assert_eq!(img.height(), Some(720));
5802        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
5803    }
5804
5805    #[test]
5806    fn test_dtype_determines_int8() {
5807        // DType::I8 indicates int8 data
5808        let u8_img = TensorDyn::image(64, 64, PixelFormat::Rgb, DType::U8, None).unwrap();
5809        let i8_img = TensorDyn::image(64, 64, PixelFormat::Rgb, DType::I8, None).unwrap();
5810        assert_eq!(u8_img.dtype(), DType::U8);
5811        assert_eq!(i8_img.dtype(), DType::I8);
5812    }
5813
5814    #[test]
5815    fn test_pixel_layout_packed_vs_planar() {
5816        // Packed vs planar layout classification
5817        assert_eq!(PixelFormat::Rgb.layout(), PixelLayout::Packed);
5818        assert_eq!(PixelFormat::Rgba.layout(), PixelLayout::Packed);
5819        assert_eq!(PixelFormat::PlanarRgb.layout(), PixelLayout::Planar);
5820        assert_eq!(PixelFormat::Nv12.layout(), PixelLayout::SemiPlanar);
5821    }
5822
5823    /// Integration test that exercises the PBO-to-PBO convert path.
5824    /// Uses ImageProcessor::create_image() to allocate PBO-backed tensors,
5825    /// then converts between them. Skipped when GL is unavailable or the
5826    /// backend is not PBO (e.g. DMA-buf systems).
5827    #[cfg(target_os = "linux")]
5828    #[cfg(feature = "opengl")]
5829    #[test]
5830    fn test_convert_pbo_to_pbo() {
5831        let mut converter = ImageProcessor::new().unwrap();
5832
5833        // Skip if GL is not available or backend is not PBO
5834        let is_pbo = converter
5835            .opengl
5836            .as_ref()
5837            .is_some_and(|gl| gl.transfer_backend() == opengl_headless::TransferBackend::Pbo);
5838        if !is_pbo {
5839            eprintln!("Skipping test_convert_pbo_to_pbo: backend is not PBO");
5840            return;
5841        }
5842
5843        let src_w = 640;
5844        let src_h = 480;
5845        let dst_w = 320;
5846        let dst_h = 240;
5847
5848        // Create PBO-backed source image
5849        let pbo_src = converter
5850            .create_image(src_w, src_h, PixelFormat::Rgba, DType::U8, None)
5851            .unwrap();
5852        assert_eq!(
5853            pbo_src.as_u8().unwrap().memory(),
5854            TensorMemory::Pbo,
5855            "create_image should produce a PBO tensor"
5856        );
5857
5858        // Fill source PBO with test pattern: load JPEG then convert Mem→PBO
5859        let file = include_bytes!(concat!(
5860            env!("CARGO_MANIFEST_DIR"),
5861            "/../../testdata/zidane.jpg"
5862        ))
5863        .to_vec();
5864        let jpeg_src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
5865
5866        // Resize JPEG into a Mem temp of the right size, then copy into PBO
5867        let mem_src = TensorDyn::image(
5868            src_w,
5869            src_h,
5870            PixelFormat::Rgba,
5871            DType::U8,
5872            Some(TensorMemory::Mem),
5873        )
5874        .unwrap();
5875        let (result, _jpeg_src, mem_src) = convert_img(
5876            &mut CPUProcessor::new(),
5877            jpeg_src,
5878            mem_src,
5879            Rotation::None,
5880            Flip::None,
5881            Crop::no_crop(),
5882        );
5883        result.unwrap();
5884
5885        // Copy pixel data into the PBO source by mapping it
5886        {
5887            let src_data = mem_src.as_u8().unwrap().map().unwrap();
5888            let mut pbo_map = pbo_src.as_u8().unwrap().map().unwrap();
5889            pbo_map.copy_from_slice(&src_data);
5890        }
5891
5892        // Create PBO-backed destination image
5893        let pbo_dst = converter
5894            .create_image(dst_w, dst_h, PixelFormat::Rgba, DType::U8, None)
5895            .unwrap();
5896        assert_eq!(pbo_dst.as_u8().unwrap().memory(), TensorMemory::Pbo);
5897
5898        // Convert PBO→PBO (this exercises convert_pbo_to_pbo)
5899        let mut pbo_dst = pbo_dst;
5900        let result = converter.convert(
5901            &pbo_src,
5902            &mut pbo_dst,
5903            Rotation::None,
5904            Flip::None,
5905            Crop::no_crop(),
5906        );
5907        result.unwrap();
5908
5909        // Verify: compare with CPU-only conversion of the same input
5910        let cpu_dst = TensorDyn::image(
5911            dst_w,
5912            dst_h,
5913            PixelFormat::Rgba,
5914            DType::U8,
5915            Some(TensorMemory::Mem),
5916        )
5917        .unwrap();
5918        let (result, _mem_src, cpu_dst) = convert_img(
5919            &mut CPUProcessor::new(),
5920            mem_src,
5921            cpu_dst,
5922            Rotation::None,
5923            Flip::None,
5924            Crop::no_crop(),
5925        );
5926        result.unwrap();
5927
5928        let pbo_dst_img = {
5929            let mut __t = pbo_dst.into_u8().unwrap();
5930            __t.set_format(PixelFormat::Rgba).unwrap();
5931            TensorDyn::from(__t)
5932        };
5933        compare_images(&pbo_dst_img, &cpu_dst, 0.95, function!());
5934        log::info!("test_convert_pbo_to_pbo: PASS — PBO-to-PBO convert matches CPU reference");
5935    }
5936
5937    #[test]
5938    fn test_image_bgra() {
5939        let img = TensorDyn::image(
5940            640,
5941            480,
5942            PixelFormat::Bgra,
5943            DType::U8,
5944            Some(edgefirst_tensor::TensorMemory::Mem),
5945        )
5946        .unwrap();
5947        assert_eq!(img.width(), Some(640));
5948        assert_eq!(img.height(), Some(480));
5949        assert_eq!(img.format().unwrap().channels(), 4);
5950        assert_eq!(img.format().unwrap(), PixelFormat::Bgra);
5951    }
5952
5953    // ========================================================================
5954    // Tests for EDGEFIRST_FORCE_BACKEND env var
5955    // ========================================================================
5956
5957    #[test]
5958    fn test_force_backend_cpu() {
5959        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
5960        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
5961        let result = ImageProcessor::new();
5962        match original {
5963            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
5964            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
5965        }
5966        let converter = result.unwrap();
5967        assert!(converter.cpu.is_some());
5968        assert_eq!(converter.forced_backend, Some(ForcedBackend::Cpu));
5969    }
5970
5971    #[test]
5972    fn test_force_backend_invalid() {
5973        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
5974        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "invalid") };
5975        let result = ImageProcessor::new();
5976        match original {
5977            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
5978            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
5979        }
5980        assert!(
5981            matches!(&result, Err(Error::ForcedBackendUnavailable(s)) if s.contains("unknown")),
5982            "invalid backend value should return ForcedBackendUnavailable error: {result:?}"
5983        );
5984    }
5985
5986    #[test]
5987    fn test_force_backend_unset() {
5988        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
5989        unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") };
5990        let result = ImageProcessor::new();
5991        match original {
5992            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
5993            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
5994        }
5995        let converter = result.unwrap();
5996        assert!(converter.forced_backend.is_none());
5997    }
5998
5999    // ========================================================================
6000    // Tests for hybrid mask path error handling
6001    // ========================================================================
6002
6003    #[test]
6004    fn test_draw_proto_masks_no_cpu_returns_error() {
6005        // Disable CPU backend to trigger the error path
6006        let original_cpu = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
6007        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
6008        let original_gl = std::env::var("EDGEFIRST_DISABLE_GL").ok();
6009        unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
6010        let original_g2d = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
6011        unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
6012
6013        let result = ImageProcessor::new();
6014
6015        match original_cpu {
6016            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
6017            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
6018        }
6019        match original_gl {
6020            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
6021            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
6022        }
6023        match original_g2d {
6024            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
6025            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
6026        }
6027
6028        let mut converter = result.unwrap();
6029        assert!(converter.cpu.is_none(), "CPU should be disabled");
6030
6031        let dst = TensorDyn::image(
6032            640,
6033            480,
6034            PixelFormat::Rgba,
6035            DType::U8,
6036            Some(TensorMemory::Mem),
6037        )
6038        .unwrap();
6039        let mut dst_dyn = dst;
6040        let det = [DetectBox {
6041            bbox: edgefirst_decoder::BoundingBox {
6042                xmin: 0.1,
6043                ymin: 0.1,
6044                xmax: 0.5,
6045                ymax: 0.5,
6046            },
6047            score: 0.9,
6048            label: 0,
6049        }];
6050        let proto_data = ProtoData {
6051            mask_coefficients: vec![vec![0.5; 4]],
6052            protos: edgefirst_decoder::ProtoTensor::Float(ndarray::Array3::<f32>::zeros((8, 8, 4))),
6053        };
6054        let result =
6055            converter.draw_proto_masks(&mut dst_dyn, &det, &proto_data, Default::default());
6056        assert!(
6057            matches!(&result, Err(Error::Internal(s)) if s.contains("CPU backend")),
6058            "draw_proto_masks without CPU should return Internal error: {result:?}"
6059        );
6060    }
6061
6062    #[test]
6063    fn test_draw_proto_masks_cpu_fallback_works() {
6064        // Force CPU-only backend to ensure the CPU fallback path executes
6065        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6066        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
6067        let result = ImageProcessor::new();
6068        match original {
6069            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6070            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6071        }
6072
6073        let mut converter = result.unwrap();
6074        assert!(converter.cpu.is_some());
6075
6076        let dst = TensorDyn::image(
6077            64,
6078            64,
6079            PixelFormat::Rgba,
6080            DType::U8,
6081            Some(TensorMemory::Mem),
6082        )
6083        .unwrap();
6084        let mut dst_dyn = dst;
6085        let det = [DetectBox {
6086            bbox: edgefirst_decoder::BoundingBox {
6087                xmin: 0.1,
6088                ymin: 0.1,
6089                xmax: 0.5,
6090                ymax: 0.5,
6091            },
6092            score: 0.9,
6093            label: 0,
6094        }];
6095        let proto_data = ProtoData {
6096            mask_coefficients: vec![vec![0.5; 4]],
6097            protos: edgefirst_decoder::ProtoTensor::Float(ndarray::Array3::<f32>::zeros((8, 8, 4))),
6098        };
6099        let result =
6100            converter.draw_proto_masks(&mut dst_dyn, &det, &proto_data, Default::default());
6101        assert!(result.is_ok(), "CPU fallback path should work: {result:?}");
6102    }
6103
6104    // ============================================================
6105    // draw_decoded_masks / draw_proto_masks — 4-scenario pixel-
6106    // verified tests. Exercises each backend against the full
6107    // output-contract matrix:
6108    //
6109    //   | detections | background | expected dst             |
6110    //   |------------|------------|--------------------------|
6111    //   | empty      | none       | fully cleared (0x00)     |
6112    //   | empty      | set        | fully equal to bg        |
6113    //   | set        | none       | cleared outside box +    |
6114    //   |            |            | mask-coloured inside     |
6115    //   | set        | set        | bg outside box + mask    |
6116    //   |            |            | blended inside           |
6117    //
6118    // Every test pre-fills dst with a non-zero "dirty" pattern so
6119    // that any silent `return Ok(())` leaks the pattern into the
6120    // asserted output and fails loudly.
6121    // ============================================================
6122
6123    /// Run `body` with `EDGEFIRST_FORCE_BACKEND` temporarily set (or
6124    /// removed), restoring the prior value afterward. Tests are mutated
6125    /// env-serialized via the process-wide `FORCE_BACKEND_MUTEX`.
6126    fn with_force_backend<R>(value: Option<&str>, body: impl FnOnce() -> R) -> R {
6127        use std::sync::{Mutex, MutexGuard, OnceLock};
6128        static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
6129        let _guard: MutexGuard<()> = LOCK
6130            .get_or_init(|| Mutex::new(()))
6131            .lock()
6132            .unwrap_or_else(|e| e.into_inner());
6133        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6134        match value {
6135            Some(v) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", v) },
6136            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6137        }
6138        let r = body();
6139        match original {
6140            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6141            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6142        }
6143        r
6144    }
6145
6146    /// Allocate an RGBA image tensor and pre-fill every byte with a
6147    /// distinctive non-zero pattern. Any test that relies on the old
6148    /// "dst is already cleared" assumption will see this pattern leak
6149    /// through to the output and fail.
6150    fn make_dirty_dst(w: usize, h: usize, mem: Option<TensorMemory>) -> TensorDyn {
6151        let dst = TensorDyn::image(w, h, PixelFormat::Rgba, DType::U8, mem).unwrap();
6152        {
6153            use edgefirst_tensor::TensorMapTrait;
6154            let u8t = dst.as_u8().unwrap();
6155            let mut map = u8t.map().unwrap();
6156            for (i, b) in map.as_mut_slice().iter_mut().enumerate() {
6157                *b = 0xA0u8.wrapping_add((i as u8) & 0x3F);
6158            }
6159        }
6160        dst
6161    }
6162
6163    /// Allocate an RGBA background filled with a constant colour.
6164    fn make_bg(w: usize, h: usize, mem: Option<TensorMemory>, rgba: [u8; 4]) -> TensorDyn {
6165        let bg = TensorDyn::image(w, h, PixelFormat::Rgba, DType::U8, mem).unwrap();
6166        {
6167            use edgefirst_tensor::TensorMapTrait;
6168            let u8t = bg.as_u8().unwrap();
6169            let mut map = u8t.map().unwrap();
6170            for chunk in map.as_mut_slice().chunks_exact_mut(4) {
6171                chunk.copy_from_slice(&rgba);
6172            }
6173        }
6174        bg
6175    }
6176
6177    fn pixel_at(dst: &TensorDyn, x: usize, y: usize) -> [u8; 4] {
6178        use edgefirst_tensor::TensorMapTrait;
6179        let w = dst.width().unwrap();
6180        let off = (y * w + x) * 4;
6181        let u8t = dst.as_u8().unwrap();
6182        let map = u8t.map().unwrap();
6183        let s = map.as_slice();
6184        [s[off], s[off + 1], s[off + 2], s[off + 3]]
6185    }
6186
6187    fn assert_every_pixel_eq(dst: &TensorDyn, expected: [u8; 4], case: &str) {
6188        use edgefirst_tensor::TensorMapTrait;
6189        let u8t = dst.as_u8().unwrap();
6190        let map = u8t.map().unwrap();
6191        for (i, chunk) in map.as_slice().chunks_exact(4).enumerate() {
6192            assert_eq!(
6193                chunk, &expected,
6194                "{case}: pixel idx {i} = {chunk:?}, expected {expected:?}"
6195            );
6196        }
6197    }
6198
6199    /// Scenario 1: empty detections, empty segmentation, no background
6200    /// → dst must be fully cleared to 0x00000000.
6201    fn scenario_empty_no_bg(processor: &mut ImageProcessor, case: &str) {
6202        let mut dst = make_dirty_dst(64, 64, None);
6203        processor
6204            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::default())
6205            .unwrap_or_else(|e| panic!("{case}/decoded_masks empty+no-bg failed: {e:?}"));
6206        assert_every_pixel_eq(&dst, [0, 0, 0, 0], &format!("{case}/decoded"));
6207
6208        let mut dst = make_dirty_dst(64, 64, None);
6209        let proto = ProtoData {
6210            mask_coefficients: vec![],
6211            protos: edgefirst_decoder::ProtoTensor::Float(ndarray::Array3::<f32>::zeros((8, 8, 4))),
6212        };
6213        processor
6214            .draw_proto_masks(&mut dst, &[], &proto, MaskOverlay::default())
6215            .unwrap_or_else(|e| panic!("{case}/proto_masks empty+no-bg failed: {e:?}"));
6216        assert_every_pixel_eq(&dst, [0, 0, 0, 0], &format!("{case}/proto"));
6217    }
6218
6219    /// Scenario 2: empty detections, empty segmentation, background set
6220    /// → dst must be fully equal to bg.
6221    fn scenario_empty_with_bg(processor: &mut ImageProcessor, case: &str) {
6222        let bg_color = [42, 99, 200, 255];
6223        let bg = make_bg(64, 64, None, bg_color);
6224        let overlay = MaskOverlay::new().with_background(&bg);
6225
6226        let mut dst = make_dirty_dst(64, 64, None);
6227        processor
6228            .draw_decoded_masks(&mut dst, &[], &[], overlay)
6229            .unwrap_or_else(|e| panic!("{case}/decoded_masks empty+bg failed: {e:?}"));
6230        assert_every_pixel_eq(&dst, bg_color, &format!("{case}/decoded bg blit"));
6231
6232        let mut dst = make_dirty_dst(64, 64, None);
6233        let proto = ProtoData {
6234            mask_coefficients: vec![],
6235            protos: edgefirst_decoder::ProtoTensor::Float(ndarray::Array3::<f32>::zeros((8, 8, 4))),
6236        };
6237        processor
6238            .draw_proto_masks(&mut dst, &[], &proto, overlay)
6239            .unwrap_or_else(|e| panic!("{case}/proto_masks empty+bg failed: {e:?}"));
6240        assert_every_pixel_eq(&dst, bg_color, &format!("{case}/proto bg blit"));
6241    }
6242
6243    /// Scenario 3: one detection with a fully-opaque segmentation fill,
6244    /// no background → outside the box dst must be 0x00, inside it must
6245    /// be a non-zero mask colour (the render_segmentation output).
6246    fn scenario_detect_no_bg(processor: &mut ImageProcessor, case: &str) {
6247        use edgefirst_decoder::Segmentation;
6248        use ndarray::Array3;
6249        processor
6250            .set_class_colors(&[[200, 80, 40, 255]])
6251            .expect("set_class_colors");
6252
6253        let detect = DetectBox {
6254            bbox: [0.25, 0.25, 0.75, 0.75].into(),
6255            score: 0.99,
6256            label: 0,
6257        };
6258        let seg_arr = Array3::from_shape_fn((4, 4, 1), |_| 255u8);
6259        let seg = Segmentation {
6260            segmentation: seg_arr,
6261            xmin: 0.25,
6262            ymin: 0.25,
6263            xmax: 0.75,
6264            ymax: 0.75,
6265        };
6266
6267        let mut dst = make_dirty_dst(64, 64, None);
6268        processor
6269            .draw_decoded_masks(&mut dst, &[detect], &[seg], MaskOverlay::default())
6270            .unwrap_or_else(|e| panic!("{case}/decoded_masks detect+no-bg failed: {e:?}"));
6271
6272        // Outside the bbox (corner): must be cleared black.
6273        let corner = pixel_at(&dst, 2, 2);
6274        assert_eq!(
6275            corner,
6276            [0, 0, 0, 0],
6277            "{case}/decoded: corner (2,2) leaked dirty pattern: {corner:?}"
6278        );
6279        // Inside the bbox (center): the mask colour must be visible.
6280        // Any non-zero pixel is acceptable — exact rendering varies
6281        // between backends (GL smoothstep, CPU nearest).
6282        let center = pixel_at(&dst, 32, 32);
6283        assert!(
6284            center != [0, 0, 0, 0],
6285            "{case}/decoded: center (32,32) was not coloured: {center:?}"
6286        );
6287    }
6288
6289    /// Scenario 4: detection + background. Outside the box must match
6290    /// bg; inside the box must NOT match bg (mask blended on top).
6291    fn scenario_detect_with_bg(processor: &mut ImageProcessor, case: &str) {
6292        use edgefirst_decoder::Segmentation;
6293        use ndarray::Array3;
6294        processor
6295            .set_class_colors(&[[200, 80, 40, 255]])
6296            .expect("set_class_colors");
6297        let bg_color = [10, 20, 30, 255];
6298        let bg = make_bg(64, 64, None, bg_color);
6299
6300        let detect = DetectBox {
6301            bbox: [0.25, 0.25, 0.75, 0.75].into(),
6302            score: 0.99,
6303            label: 0,
6304        };
6305        let seg_arr = Array3::from_shape_fn((4, 4, 1), |_| 255u8);
6306        let seg = Segmentation {
6307            segmentation: seg_arr,
6308            xmin: 0.25,
6309            ymin: 0.25,
6310            xmax: 0.75,
6311            ymax: 0.75,
6312        };
6313
6314        let overlay = MaskOverlay::new().with_background(&bg);
6315        let mut dst = make_dirty_dst(64, 64, None);
6316        processor
6317            .draw_decoded_masks(&mut dst, &[detect], &[seg], overlay)
6318            .unwrap_or_else(|e| panic!("{case}/decoded_masks detect+bg failed: {e:?}"));
6319
6320        // Outside the bbox (corner): bg colour.
6321        let corner = pixel_at(&dst, 2, 2);
6322        assert_eq!(
6323            corner, bg_color,
6324            "{case}/decoded: corner (2,2) should show bg {bg_color:?} got {corner:?}"
6325        );
6326        // Inside the bbox (center): mask blended on bg, must differ from
6327        // pure bg (alpha-blend with mask colour produces a distinct shade).
6328        let center = pixel_at(&dst, 32, 32);
6329        assert!(
6330            center != bg_color,
6331            "{case}/decoded: center (32,32) should differ from bg {bg_color:?}, got {center:?}"
6332        );
6333    }
6334
6335    /// Run all 4 scenarios against the processor. Skip gracefully if
6336    /// construction fails (backend unavailable on this host).
6337    fn run_all_scenarios(
6338        force_backend: Option<&'static str>,
6339        case: &'static str,
6340        require_dma_for_bg: bool,
6341    ) {
6342        if require_dma_for_bg && !edgefirst_tensor::is_dma_available() {
6343            eprintln!("SKIPPED: {case} — DMA not available on this host");
6344            return;
6345        }
6346        let processor_result = with_force_backend(force_backend, ImageProcessor::new);
6347        let mut processor = match processor_result {
6348            Ok(p) => p,
6349            Err(e) => {
6350                eprintln!("SKIPPED: {case} — backend init failed: {e:?}");
6351                return;
6352            }
6353        };
6354        scenario_empty_no_bg(&mut processor, case);
6355        scenario_empty_with_bg(&mut processor, case);
6356        scenario_detect_no_bg(&mut processor, case);
6357        scenario_detect_with_bg(&mut processor, case);
6358    }
6359
6360    #[test]
6361    fn test_draw_masks_4_scenarios_cpu() {
6362        run_all_scenarios(Some("cpu"), "cpu", false);
6363    }
6364
6365    #[test]
6366    fn test_draw_masks_4_scenarios_auto() {
6367        run_all_scenarios(None, "auto", false);
6368    }
6369
6370    #[cfg(target_os = "linux")]
6371    #[cfg(feature = "opengl")]
6372    #[test]
6373    fn test_draw_masks_4_scenarios_opengl() {
6374        run_all_scenarios(Some("opengl"), "opengl", false);
6375    }
6376
6377    /// G2D forced backend: exercises the zero-detection empty-frame
6378    /// paths via `g2d_clear` and `g2d_blit`. Scenarios 3 and 4 (with
6379    /// detections) expect `NotImplemented` since G2D has no rasterizer
6380    /// for boxes / masks.
6381    #[cfg(target_os = "linux")]
6382    #[test]
6383    fn test_draw_masks_zero_detection_g2d_forced() {
6384        if !edgefirst_tensor::is_dma_available() {
6385            eprintln!("SKIPPED: g2d forced — DMA not available on this host");
6386            return;
6387        }
6388        let processor_result = with_force_backend(Some("g2d"), ImageProcessor::new);
6389        let mut processor = match processor_result {
6390            Ok(p) => p,
6391            Err(e) => {
6392                eprintln!("SKIPPED: g2d forced — init failed: {e:?}");
6393                return;
6394            }
6395        };
6396
6397        // Case 1: empty + no bg. G2D requires DMA-backed dst.
6398        let mut dst = TensorDyn::image(
6399            64,
6400            64,
6401            PixelFormat::Rgba,
6402            DType::U8,
6403            Some(TensorMemory::Dma),
6404        )
6405        .unwrap();
6406        {
6407            use edgefirst_tensor::TensorMapTrait;
6408            let u8t = dst.as_u8_mut().unwrap();
6409            let mut map = u8t.map().unwrap();
6410            map.as_mut_slice().fill(0xBB);
6411        }
6412        processor
6413            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::default())
6414            .expect("g2d empty+no-bg");
6415        assert_every_pixel_eq(&dst, [0, 0, 0, 0], "g2d/case1 cleared");
6416
6417        // Case 2: empty + bg. Both surfaces DMA-backed for g2d_blit.
6418        let bg_color = [7, 11, 13, 255];
6419        let bg = {
6420            let t = TensorDyn::image(
6421                64,
6422                64,
6423                PixelFormat::Rgba,
6424                DType::U8,
6425                Some(TensorMemory::Dma),
6426            )
6427            .unwrap();
6428            {
6429                use edgefirst_tensor::TensorMapTrait;
6430                let u8t = t.as_u8().unwrap();
6431                let mut map = u8t.map().unwrap();
6432                for chunk in map.as_mut_slice().chunks_exact_mut(4) {
6433                    chunk.copy_from_slice(&bg_color);
6434                }
6435            }
6436            t
6437        };
6438        let mut dst = TensorDyn::image(
6439            64,
6440            64,
6441            PixelFormat::Rgba,
6442            DType::U8,
6443            Some(TensorMemory::Dma),
6444        )
6445        .unwrap();
6446        {
6447            use edgefirst_tensor::TensorMapTrait;
6448            let u8t = dst.as_u8_mut().unwrap();
6449            let mut map = u8t.map().unwrap();
6450            map.as_mut_slice().fill(0x55);
6451        }
6452        processor
6453            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::new().with_background(&bg))
6454            .expect("g2d empty+bg");
6455        assert_every_pixel_eq(&dst, bg_color, "g2d/case2 bg blit");
6456
6457        // Case 3 and 4: detect present — must return NotImplemented.
6458        let detect = DetectBox {
6459            bbox: [0.25, 0.25, 0.75, 0.75].into(),
6460            score: 0.9,
6461            label: 0,
6462        };
6463        let mut dst = TensorDyn::image(
6464            64,
6465            64,
6466            PixelFormat::Rgba,
6467            DType::U8,
6468            Some(TensorMemory::Dma),
6469        )
6470        .unwrap();
6471        let err = processor
6472            .draw_decoded_masks(&mut dst, &[detect], &[], MaskOverlay::default())
6473            .expect_err("g2d must reject detect-present draw_decoded_masks");
6474        assert!(
6475            matches!(err, Error::NotImplemented(_)),
6476            "g2d case3 wrong error: {err:?}"
6477        );
6478    }
6479
6480    #[test]
6481    fn test_set_format_then_cpu_convert() {
6482        // Force CPU backend (save/restore to avoid leaking into other tests)
6483        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6484        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
6485        let mut processor = ImageProcessor::new().unwrap();
6486        match original {
6487            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6488            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6489        }
6490
6491        // Load a source image
6492        let image = include_bytes!(concat!(
6493            env!("CARGO_MANIFEST_DIR"),
6494            "/../../testdata/zidane.jpg"
6495        ));
6496        let src = load_image(image, Some(PixelFormat::Rgba), None).unwrap();
6497
6498        // Create a raw tensor, then attach format — simulating the from_fd workflow
6499        let mut dst =
6500            TensorDyn::new(&[640, 640, 3], DType::U8, Some(TensorMemory::Mem), None).unwrap();
6501        dst.set_format(PixelFormat::Rgb).unwrap();
6502
6503        // Convert should work with the set_format-annotated tensor
6504        processor
6505            .convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
6506            .unwrap();
6507
6508        // Verify format survived conversion
6509        assert_eq!(dst.format(), Some(PixelFormat::Rgb));
6510        assert_eq!(dst.width(), Some(640));
6511        assert_eq!(dst.height(), Some(640));
6512    }
6513
6514    /// Verify that creating multiple ImageProcessors on the same thread and
6515    /// performing a resize on each does not deadlock or error.
6516    ///
6517    /// Uses automatic memory allocation (DMA → PBO → Mem fallback) so that
6518    /// hardware backends (OpenGL, G2D) are exercised on capable targets.
6519    #[test]
6520    fn test_multiple_image_processors_same_thread() {
6521        let mut processors: Vec<ImageProcessor> = (0..4)
6522            .map(|_| ImageProcessor::new().expect("ImageProcessor::new() failed"))
6523            .collect();
6524
6525        for proc in &mut processors {
6526            let src = proc
6527                .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
6528                .expect("create src failed");
6529            let mut dst = proc
6530                .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
6531                .expect("create dst failed");
6532            proc.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
6533                .expect("convert failed");
6534            assert_eq!(dst.width(), Some(64));
6535            assert_eq!(dst.height(), Some(64));
6536        }
6537    }
6538
6539    /// Verify that creating ImageProcessors on separate threads and performing
6540    /// a resize on each does not deadlock or error.
6541    ///
6542    /// Uses automatic memory allocation (DMA → PBO → Mem fallback) so that
6543    /// hardware backends (OpenGL, G2D) are exercised on capable targets.
6544    /// A 60-second timeout prevents CI from hanging on deadlock regressions.
6545    #[test]
6546    fn test_multiple_image_processors_separate_threads() {
6547        use std::sync::mpsc;
6548        use std::time::Duration;
6549
6550        const TIMEOUT: Duration = Duration::from_secs(60);
6551
6552        let (tx, rx) = mpsc::channel::<()>();
6553
6554        std::thread::spawn(move || {
6555            let handles: Vec<_> = (0..4)
6556                .map(|i| {
6557                    std::thread::spawn(move || {
6558                        let mut proc = ImageProcessor::new().unwrap_or_else(|e| {
6559                            panic!("ImageProcessor::new() failed on thread {i}: {e}")
6560                        });
6561                        let src = proc
6562                            .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
6563                            .unwrap_or_else(|e| panic!("create src failed on thread {i}: {e}"));
6564                        let mut dst = proc
6565                            .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
6566                            .unwrap_or_else(|e| panic!("create dst failed on thread {i}: {e}"));
6567                        proc.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
6568                            .unwrap_or_else(|e| panic!("convert failed on thread {i}: {e}"));
6569                        assert_eq!(dst.width(), Some(64));
6570                        assert_eq!(dst.height(), Some(64));
6571                    })
6572                })
6573                .collect();
6574
6575            for (i, h) in handles.into_iter().enumerate() {
6576                h.join()
6577                    .unwrap_or_else(|e| panic!("thread {i} panicked: {e:?}"));
6578            }
6579
6580            let _ = tx.send(());
6581        });
6582
6583        rx.recv_timeout(TIMEOUT).unwrap_or_else(|_| {
6584            panic!("test_multiple_image_processors_separate_threads timed out after {TIMEOUT:?}")
6585        });
6586    }
6587
6588    /// Verify that 4 fully-initialized ImageProcessors on separate threads can
6589    /// all operate concurrently without deadlocking each other.
6590    ///
6591    /// All processors are created first, then a barrier synchronizes them so
6592    /// they all start converting at the same instant — maximizing contention.
6593    /// A 60-second timeout prevents CI from hanging on deadlock regressions.
6594    #[test]
6595    fn test_image_processors_concurrent_operations() {
6596        use std::sync::{mpsc, Arc, Barrier};
6597        use std::time::Duration;
6598
6599        const N: usize = 4;
6600        const ROUNDS: usize = 10;
6601        const TIMEOUT: Duration = Duration::from_secs(60);
6602
6603        let (tx, rx) = mpsc::channel::<()>();
6604
6605        std::thread::spawn(move || {
6606            let barrier = Arc::new(Barrier::new(N));
6607
6608            let handles: Vec<_> = (0..N)
6609                .map(|i| {
6610                    let barrier = Arc::clone(&barrier);
6611                    std::thread::spawn(move || {
6612                        let mut proc = ImageProcessor::new().unwrap_or_else(|e| {
6613                            panic!("ImageProcessor::new() failed on thread {i}: {e}")
6614                        });
6615
6616                        // All threads wait here until every processor is initialized.
6617                        barrier.wait();
6618
6619                        // Now all 4 hammer the GPU concurrently.
6620                        for round in 0..ROUNDS {
6621                            let src = proc
6622                                .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
6623                                .unwrap_or_else(|e| {
6624                                    panic!("create src failed on thread {i} round {round}: {e}")
6625                                });
6626                            let mut dst = proc
6627                                .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
6628                                .unwrap_or_else(|e| {
6629                                    panic!("create dst failed on thread {i} round {round}: {e}")
6630                                });
6631                            proc.convert(
6632                                &src,
6633                                &mut dst,
6634                                Rotation::None,
6635                                Flip::None,
6636                                Crop::default(),
6637                            )
6638                            .unwrap_or_else(|e| {
6639                                panic!("convert failed on thread {i} round {round}: {e}")
6640                            });
6641                            assert_eq!(dst.width(), Some(64));
6642                            assert_eq!(dst.height(), Some(64));
6643                        }
6644                    })
6645                })
6646                .collect();
6647
6648            for (i, h) in handles.into_iter().enumerate() {
6649                h.join()
6650                    .unwrap_or_else(|e| panic!("thread {i} panicked: {e:?}"));
6651            }
6652
6653            let _ = tx.send(());
6654        });
6655
6656        rx.recv_timeout(TIMEOUT).unwrap_or_else(|_| {
6657            panic!("test_image_processors_concurrent_operations timed out after {TIMEOUT:?}")
6658        });
6659    }
6660}