Skip to main content

edgefirst_image/
lib.rs

1// SPDX-FileCopyrightText: Copyright 2025 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4/*!
5
6## EdgeFirst HAL - Image Converter
7
8The `edgefirst_image` crate is part of the EdgeFirst Hardware Abstraction
9Layer (HAL) and provides functionality for converting images between
10different formats and sizes.  The crate is designed to work with hardware
11acceleration when available, but also provides a CPU-based fallback for
12environments where hardware acceleration is not present or not suitable.
13
14The main features of the `edgefirst_image` crate include:
15- Support for various image formats, including YUYV, RGB, RGBA, and GREY.
16- Support for source crop, destination crop, rotation, and flipping.
17- Image conversion using hardware acceleration (G2D, OpenGL) when available.
18- CPU-based image conversion as a fallback option.
19
20The crate uses [`TensorDyn`] from `edgefirst_tensor` to represent images,
21with [`PixelFormat`] metadata describing the pixel layout. The
22[`ImageProcessor`] struct manages the conversion process, selecting
23the appropriate conversion method based on the available hardware.
24
25## Examples
26
27```rust
28# use edgefirst_image::{ImageProcessor, Rotation, Flip, Crop, ImageProcessorTrait, load_image};
29# use edgefirst_tensor::{PixelFormat, DType, TensorDyn};
30# fn main() -> Result<(), edgefirst_image::Error> {
31let image = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/../../testdata/zidane.jpg"));
32let src = load_image(image, Some(PixelFormat::Rgba), None)?;
33let mut converter = ImageProcessor::new()?;
34let mut dst = converter.create_image(640, 480, PixelFormat::Rgb, DType::U8, None)?;
35converter.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())?;
36# Ok(())
37# }
38```
39
40## Environment Variables
41The behavior of the `edgefirst_image::ImageProcessor` struct can be influenced by the
42following environment variables:
43- `EDGEFIRST_FORCE_BACKEND`: When set to `cpu`, `g2d`, or `opengl` (case-insensitive),
44  only that single backend is initialized and no fallback chain is used. If the
45  forced backend fails to initialize, an error is returned immediately. This is
46  useful for benchmarking individual backends in isolation. When this variable is
47  set, the `EDGEFIRST_DISABLE_*` variables are ignored.
48- `EDGEFIRST_DISABLE_GL`: If set to `1`, disables the use of OpenGL for image
49  conversion, forcing the use of CPU or other available hardware methods.
50- `EDGEFIRST_DISABLE_G2D`: If set to `1`, disables the use of G2D for image
51  conversion, forcing the use of CPU or other available hardware methods.
52- `EDGEFIRST_DISABLE_CPU`: If set to `1`, disables the use of CPU for image
53  conversion, forcing the use of hardware acceleration methods. If no hardware
54  acceleration methods are available, an error will be returned when attempting
55  to create an `ImageProcessor`.
56
57Additionally the TensorMemory used by default allocations can be controlled using the
58`EDGEFIRST_TENSOR_FORCE_MEM` environment variable. If set to `1`, default tensor memory
59uses system memory. This will disable the use of specialized memory regions for tensors
60and hardware acceleration. However, this will increase the performance of the CPU converter.
61*/
62#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
63
64/// Pitch alignment requirement for DMA-BUF tensors that may be imported as
65/// EGLImages by the GL backend. Mali Valhall (i.MX 95 / G310) rejects
66/// `eglCreateImageKHR` with `EGL_BAD_ALLOC` for any DMA-BUF whose row pitch
67/// is not a multiple of 64 bytes; Vivante GC7000UL (i.MX 8MP) accepts any
68/// pitch so the constant is harmless on that path. 64 is the smallest
69/// alignment that satisfies every embedded ARM GPU we ship to.
70///
71/// Applied automatically inside [`ImageProcessor::create_image`] when the
72/// allocation lands on `TensorMemory::Dma`. External callers that allocate
73/// their own DMA-BUF tensors (e.g. GStreamer plugins, video pipelines) can
74/// use [`align_width_for_gpu_pitch`] to compute a width whose resulting row
75/// stride satisfies this requirement.
76pub const GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES: usize = 64;
77
78/// Round `width` (in pixels) up so the resulting row stride
79/// `width * bpp` is a multiple of [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`]
80/// AND a multiple of `bpp` (so the rounded width is an integer pixel count).
81///
82/// `bpp` must be the per-pixel byte count for the image's primary plane
83/// (e.g. 4 for RGBA8/BGRA8, 3 for RGB888, 1 for Grey/NV12-luma).
84///
85/// External callers — GStreamer plugins, video pipelines, anyone wrapping a
86/// foreign DMA-BUF — should call this when sizing the destination so that
87/// `eglCreateImageKHR` doesn't reject the import on Mali. Pre-aligned widths
88/// (640, 1280, 1920, 3008, 3840 …) round-trip unchanged; misaligned widths
89/// are bumped up to the next valid value.
90///
91/// # Overflow behaviour
92///
93/// All arithmetic is checked. If the alignment computation or the rounded
94/// width would overflow `usize`, the function logs a warning and returns the
95/// original `width` unchanged rather than wrapping or producing a smaller
96/// value. Callers can rely on the returned width being **at least** the
97/// requested width.
98///
99/// `bpp == 0` and `width == 0` short-circuit to return the input unchanged.
100///
101/// # Examples
102///
103/// ```
104/// use edgefirst_image::align_width_for_gpu_pitch;
105///
106/// // RGBA8 (bpp=4): width must round to a multiple of 16 pixels (64-byte stride).
107/// assert_eq!(align_width_for_gpu_pitch(1920, 4), 1920); // already aligned
108/// assert_eq!(align_width_for_gpu_pitch(3004, 4), 3008); // crowd.png case: +4 px
109/// assert_eq!(align_width_for_gpu_pitch(1281, 4), 1296); // +15 px
110///
111/// // RGB888 (bpp=3): width must round to a multiple of 64 pixels (192-byte stride).
112/// assert_eq!(align_width_for_gpu_pitch(640, 3), 640);
113/// assert_eq!(align_width_for_gpu_pitch(641, 3), 704);
114/// ```
115pub fn align_width_for_gpu_pitch(width: usize, bpp: usize) -> usize {
116    if bpp == 0 || width == 0 {
117        return width;
118    }
119
120    // The minimum aligned stride must be a common multiple of both the
121    // GPU's pitch alignment and the per-pixel byte count. Using the LCM
122    // guarantees the rounded stride is an integer multiple of `bpp`, so
123    // converting back to a pixel count is exact.
124    //
125    // Compute the alignment in pixels (`width_alignment`) so we never need
126    // to multiply `width * bpp`, which is the only operation that could
127    // realistically overflow for large caller-supplied widths.
128    let Some(lcm_alignment) = checked_num_integer_lcm(GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES, bpp)
129    else {
130        log::warn!(
131            "align_width_for_gpu_pitch: lcm({GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES}, {bpp}) \
132             overflows usize, returning unaligned width {width}"
133        );
134        return width;
135    };
136    if lcm_alignment == 0 {
137        return width;
138    }
139
140    debug_assert_eq!(lcm_alignment % bpp, 0);
141    let width_alignment = lcm_alignment / bpp;
142    if width_alignment == 0 {
143        return width;
144    }
145
146    let remainder = width % width_alignment;
147    if remainder == 0 {
148        return width;
149    }
150
151    let pad = width_alignment - remainder;
152    match width.checked_add(pad) {
153        Some(aligned) => aligned,
154        None => {
155            log::warn!(
156                "align_width_for_gpu_pitch: width {width} + pad {pad} overflows usize, \
157                 returning unaligned (caller should use a smaller width or pre-aligned size)"
158            );
159            width
160        }
161    }
162}
163
164/// Round `min_pitch_bytes` up to the next multiple of
165/// [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`]. Returns `None` if the rounded
166/// value would overflow `usize`. Returns `Some(0)` for input 0.
167///
168/// Used internally by [`ImageProcessor::create_image`] to compute the
169/// padded row stride for DMA-backed image allocations. External callers
170/// that need pixel-counted alignment (instead of raw byte pitch) should
171/// use [`align_width_for_gpu_pitch`] instead.
172#[cfg(target_os = "linux")]
173pub(crate) fn align_pitch_bytes_to_gpu_alignment(min_pitch_bytes: usize) -> Option<usize> {
174    let alignment = GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES;
175    if min_pitch_bytes == 0 {
176        return Some(0);
177    }
178    let remainder = min_pitch_bytes % alignment;
179    if remainder == 0 {
180        return Some(min_pitch_bytes);
181    }
182    min_pitch_bytes.checked_add(alignment - remainder)
183}
184
185/// Overflow-safe least common multiple. Returns `None` when `(a / gcd) * b`
186/// would wrap.
187fn checked_num_integer_lcm(a: usize, b: usize) -> Option<usize> {
188    if a == 0 || b == 0 {
189        return Some(0);
190    }
191    let g = num_integer_gcd(a, b);
192    // a / g is exact (g divides a by definition) and at most a, so this
193    // division never panics. Only the subsequent multiply can overflow.
194    (a / g).checked_mul(b)
195}
196
197fn num_integer_gcd(a: usize, b: usize) -> usize {
198    if b == 0 {
199        a
200    } else {
201        num_integer_gcd(b, a % b)
202    }
203}
204
205/// Bytes-per-pixel for the primary plane of `format` at element size `elem`.
206/// Returns `None` for formats that don't have a single packed BPP (semi-planar
207/// chroma is handled separately, returning the luma-plane bpp).
208///
209/// External callers can use this together with [`align_width_for_gpu_pitch`]
210/// to size their own DMA-BUFs without having to remember per-format BPPs:
211///
212/// ```
213/// use edgefirst_image::{align_width_for_gpu_pitch, primary_plane_bpp};
214/// use edgefirst_tensor::PixelFormat;
215///
216/// let bpp = primary_plane_bpp(PixelFormat::Rgba, 1).unwrap();
217/// let aligned = align_width_for_gpu_pitch(3004, bpp);
218/// assert_eq!(aligned, 3008);
219/// ```
220pub fn primary_plane_bpp(format: PixelFormat, elem: usize) -> Option<usize> {
221    use edgefirst_tensor::PixelLayout;
222    match format.layout() {
223        PixelLayout::Packed => Some(format.channels() * elem),
224        PixelLayout::Planar => Some(elem),
225        // For NV12/NV16 the luma plane is single-channel so the pitch
226        // matches `elem`; the chroma plane uses the same pitch in bytes
227        // (UV is half-width but two interleaved channels = same pitch).
228        PixelLayout::SemiPlanar => Some(elem),
229        // `PixelLayout` is non-exhaustive — fall through unaligned for
230        // any future variant we don't yet recognise.
231        _ => None,
232    }
233}
234
235/// Return the GPU-aligned pitch in bytes when a DMA-backed image of
236/// `width × fmt` would need row-stride padding, or `None` when the
237/// natural pitch already satisfies `GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`
238/// or the caller has explicitly requested non-DMA memory.
239///
240/// Mali G310 (i.MX 95) rejects `eglCreateImage` from DMA-BUFs whose
241/// `PLANE0_PITCH_EXT` is not a multiple of 64 bytes, surfacing as
242/// `EGL_BAD_ALLOC`. Decoders like [`load_jpeg`]/[`load_png`] use this
243/// helper to decide whether to route through the two-buffer padded
244/// decode path.
245#[cfg(target_os = "linux")]
246pub(crate) fn padded_dma_pitch_for(
247    fmt: PixelFormat,
248    width: usize,
249    memory: &Option<TensorMemory>,
250) -> Option<usize> {
251    // Only pad when the caller explicitly requested DMA, or when they
252    // left memory selection to the allocator AND DMA is actually
253    // available. `Tensor::image_with_stride(..., None)` always routes
254    // through DMA allocation, so treating `None` as "DMA wanted"
255    // unconditionally would convert a normally-working image load into
256    // a hard failure on systems where DMA is unavailable (sandboxed
257    // CI, missing `/dev/dma_heap`, permission-denied containers) —
258    // whereas `Tensor::image(..., None)` would have fallen back to
259    // SHM/Mem there.
260    match memory {
261        Some(TensorMemory::Dma) => {}
262        None if edgefirst_tensor::is_dma_available() => {}
263        _ => return None,
264    }
265    // Padding only applies to packed layouts — `Tensor::image_with_stride`
266    // rejects semi-planar / planar formats, and those take their own
267    // per-plane pitches on import anyway.
268    if fmt.layout() != PixelLayout::Packed {
269        return None;
270    }
271    let bpp = primary_plane_bpp(fmt, 1)?;
272    let natural = width.checked_mul(bpp)?;
273    let aligned = align_pitch_bytes_to_gpu_alignment(natural)?;
274    if aligned > natural {
275        Some(aligned)
276    } else {
277        None
278    }
279}
280
281/// Row-copy a tightly-packed `src` tensor into a `dst` tensor that has a
282/// larger row stride (typically a DMA-BUF allocated with GPU-aligned pitch).
283///
284/// Both tensors must share the same width, height and pixel format. The
285/// bytes between the end of each source row and the next destination row
286/// are left untouched — EGL import doesn't read past the row's valid
287/// width, so the padding can remain whatever the allocator produced.
288#[cfg(target_os = "linux")]
289pub(crate) fn copy_packed_to_padded_dma(src: &Tensor<u8>, dst: &mut Tensor<u8>) -> Result<()> {
290    let width = dst.width().ok_or(Error::NotAnImage)?;
291    let height = dst.height().ok_or(Error::NotAnImage)?;
292    let fmt = dst.format().ok_or(Error::NotAnImage)?;
293    let src_width = src.width().ok_or(Error::NotAnImage)?;
294    let src_height = src.height().ok_or(Error::NotAnImage)?;
295    let src_fmt = src.format().ok_or(Error::NotAnImage)?;
296    if src_width != width || src_height != height || src_fmt != fmt {
297        return Err(Error::Internal(format!(
298            "copy_packed_to_padded_dma: src and dst image metadata must match \
299             (src: {src_width}x{src_height} {src_fmt:?}, dst: {width}x{height} {fmt:?})"
300        )));
301    }
302    let bpp = primary_plane_bpp(fmt, 1).ok_or_else(|| {
303        Error::NotSupported(format!(
304            "copy_packed_to_padded_dma: unknown bpp for {fmt:?}"
305        ))
306    })?;
307    let natural = width.checked_mul(bpp).ok_or_else(|| {
308        Error::Internal(format!(
309            "copy_packed_to_padded_dma: width {width} × bpp {bpp} overflows"
310        ))
311    })?;
312    let dst_stride = dst.effective_row_stride().ok_or_else(|| {
313        Error::Internal("copy_packed_to_padded_dma: dst has no effective row stride".into())
314    })?;
315
316    // `TensorMap` derefs to `[T]`, which gives us the slice without
317    // needing to import the `TensorMapTrait` at this call site.
318    let src_map = src.map()?;
319    let src_bytes: &[u8] = &src_map;
320    let mut dst_map = dst.map()?;
321    let dst_bytes: &mut [u8] = &mut dst_map;
322
323    if src_bytes.len() < natural.saturating_mul(height) {
324        return Err(Error::Internal(format!(
325            "copy_packed_to_padded_dma: src has {} bytes, need {} ({}x{} @ {} bpp)",
326            src_bytes.len(),
327            natural.saturating_mul(height),
328            width,
329            height,
330            bpp,
331        )));
332    }
333    if dst_bytes.len() < dst_stride.saturating_mul(height) {
334        return Err(Error::Internal(format!(
335            "copy_packed_to_padded_dma: dst has {} bytes, need {} ({} stride × {} rows)",
336            dst_bytes.len(),
337            dst_stride.saturating_mul(height),
338            dst_stride,
339            height,
340        )));
341    }
342
343    for row in 0..height {
344        let s = row * natural;
345        let d = row * dst_stride;
346        dst_bytes[d..d + natural].copy_from_slice(&src_bytes[s..s + natural]);
347    }
348    Ok(())
349}
350
351#[cfg(test)]
352use edgefirst_decoder::ProtoLayout;
353use edgefirst_decoder::{DetectBox, ProtoData, Segmentation};
354use edgefirst_tensor::{
355    DType, PixelFormat, PixelLayout, Tensor, TensorDyn, TensorMemory, TensorTrait as _,
356};
357use enum_dispatch::enum_dispatch;
358use std::{fmt::Display, time::Instant};
359use zune_jpeg::{
360    zune_core::{bytestream::ZCursor, colorspace::ColorSpace, options::DecoderOptions},
361    JpegDecoder,
362};
363use zune_png::PngDecoder;
364
365pub use cpu::CPUProcessor;
366pub use error::{Error, Result};
367#[cfg(target_os = "linux")]
368pub use g2d::G2DProcessor;
369#[cfg(target_os = "linux")]
370#[cfg(feature = "opengl")]
371pub use opengl_headless::GLProcessorThreaded;
372#[cfg(target_os = "linux")]
373#[cfg(feature = "opengl")]
374pub use opengl_headless::Int8InterpolationMode;
375#[cfg(target_os = "linux")]
376#[cfg(feature = "opengl")]
377pub use opengl_headless::{probe_egl_displays, EglDisplayInfo, EglDisplayKind};
378
379mod cpu;
380mod error;
381mod g2d;
382#[path = "gl/mod.rs"]
383mod opengl_headless;
384
385// Use `edgefirst_tensor::PixelFormat` variants (Rgb, Rgba, Grey, etc.) and
386// `TensorDyn` / `Tensor<u8>` with `.format()` metadata instead.
387
388/// Flips the image data, then rotates it. Returns a new `TensorDyn`.
389fn rotate_flip_to_dyn(
390    src: &Tensor<u8>,
391    src_fmt: PixelFormat,
392    rotation: Rotation,
393    flip: Flip,
394    memory: Option<TensorMemory>,
395) -> Result<TensorDyn, Error> {
396    let src_w = src.width().unwrap();
397    let src_h = src.height().unwrap();
398    let channels = src_fmt.channels();
399
400    let (dst_w, dst_h) = match rotation {
401        Rotation::None | Rotation::Rotate180 => (src_w, src_h),
402        Rotation::Clockwise90 | Rotation::CounterClockwise90 => (src_h, src_w),
403    };
404
405    // Rotate/flip into Mem staging then row-copy into padded DMA when the
406    // caller wants DMA and the destination width would produce an
407    // unaligned pitch (see [`padded_dma_pitch_for`]).
408    #[cfg(target_os = "linux")]
409    if let Some(aligned_pitch) = padded_dma_pitch_for(src_fmt, dst_w, &memory) {
410        let tmp = Tensor::<u8>::image(dst_w, dst_h, src_fmt, Some(TensorMemory::Mem))?;
411        let src_map = src.map()?;
412        let mut tmp_map = tmp.map()?;
413        CPUProcessor::flip_rotate_ndarray_pf(
414            &src_map,
415            &mut tmp_map,
416            dst_w,
417            dst_h,
418            channels,
419            rotation,
420            flip,
421        )?;
422        drop(tmp_map);
423        drop(src_map);
424        let mut dma = Tensor::<u8>::image_with_stride(
425            dst_w,
426            dst_h,
427            src_fmt,
428            aligned_pitch,
429            Some(TensorMemory::Dma),
430        )?;
431        copy_packed_to_padded_dma(&tmp, &mut dma)?;
432        return Ok(TensorDyn::from(dma));
433    }
434
435    let dst = Tensor::<u8>::image(dst_w, dst_h, src_fmt, memory)?;
436    let src_map = src.map()?;
437    let mut dst_map = dst.map()?;
438
439    CPUProcessor::flip_rotate_ndarray_pf(
440        &src_map,
441        &mut dst_map,
442        dst_w,
443        dst_h,
444        channels,
445        rotation,
446        flip,
447    )?;
448    drop(dst_map);
449    drop(src_map);
450
451    Ok(TensorDyn::from(dst))
452}
453
454#[derive(Debug, Clone, Copy, PartialEq, Eq)]
455pub enum Rotation {
456    None = 0,
457    Clockwise90 = 1,
458    Rotate180 = 2,
459    CounterClockwise90 = 3,
460}
461impl Rotation {
462    /// Creates a Rotation enum from an angle in degrees. The angle must be a
463    /// multiple of 90.
464    ///
465    /// # Panics
466    /// Panics if the angle is not a multiple of 90.
467    ///
468    /// # Examples
469    /// ```rust
470    /// # use edgefirst_image::Rotation;
471    /// let rotation = Rotation::from_degrees_clockwise(270);
472    /// assert_eq!(rotation, Rotation::CounterClockwise90);
473    /// ```
474    pub fn from_degrees_clockwise(angle: usize) -> Rotation {
475        match angle.rem_euclid(360) {
476            0 => Rotation::None,
477            90 => Rotation::Clockwise90,
478            180 => Rotation::Rotate180,
479            270 => Rotation::CounterClockwise90,
480            _ => panic!("rotation angle is not a multiple of 90"),
481        }
482    }
483}
484
485#[derive(Debug, Clone, Copy, PartialEq, Eq)]
486pub enum Flip {
487    None = 0,
488    Vertical = 1,
489    Horizontal = 2,
490}
491
492/// Controls how the color palette index is chosen for each detected object.
493#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
494pub enum ColorMode {
495    /// Color is chosen by object class label (`det.label`). Default.
496    ///
497    /// Preserves backward compatibility and is correct for semantic
498    /// segmentation where colors carry class meaning.
499    #[default]
500    Class,
501    /// Color is chosen by instance order (loop index, zero-based).
502    ///
503    /// Each detected object gets a unique color regardless of class,
504    /// useful for instance segmentation.
505    Instance,
506    /// Color is chosen by track ID (future use; currently behaves like
507    /// [`Instance`](Self::Instance)).
508    Track,
509}
510
511impl ColorMode {
512    /// Return the palette index for a detection given its loop index and label.
513    #[inline]
514    pub fn index(self, idx: usize, label: usize) -> usize {
515        match self {
516            ColorMode::Class => label,
517            ColorMode::Instance | ColorMode::Track => idx,
518        }
519    }
520}
521
522/// Controls the resolution and coordinate frame of masks produced by
523/// [`ImageProcessor::materialize_masks`].
524///
525/// - [`Proto`](Self::Proto) returns per-detection tiles at proto-plane
526///   resolution (e.g. 48×32 u8 for a typical COCO bbox on a 160×160 proto
527///   plane). This is the historical behavior of `materialize_masks` and the
528///   fastest path because no upsample runs inside HAL. Mask values are
529///   continuous sigmoid output quantized to `uint8 [0, 255]`.
530/// - [`Scaled`](Self::Scaled) returns per-detection tiles at caller-specified
531///   pixel resolution by upsampling the full proto plane once and cropping by
532///   bbox after sigmoid. The upsample uses bilinear interpolation with
533///   edge-clamp sampling — semantically equivalent to Ultralytics'
534///   `process_masks_retina` reference. When a `letterbox` is also passed to
535///   [`materialize_masks`], the inverse letterbox transform is applied during
536///   the upsample so mask pixels land in original-content coordinates
537///   (drop-in for overlay on the original image). Mask values are binary
538///   `uint8 {0, 255}` after thresholding sigmoid > 0.5 — interchangeable
539///   with `Proto` output via the same `> 127` test.
540///
541/// [`materialize_masks`]: ImageProcessor::materialize_masks
542#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
543pub enum MaskResolution {
544    /// Per-detection tile at proto-plane resolution (default).
545    #[default]
546    Proto,
547    /// Per-detection tile at `(width, height)` pixel resolution in the
548    /// coordinate frame determined by the `letterbox` parameter of
549    /// [`ImageProcessor::materialize_masks`].
550    Scaled {
551        /// Target pixel width of the output coordinate frame.
552        width: u32,
553        /// Target pixel height of the output coordinate frame.
554        height: u32,
555    },
556}
557
558/// Options for mask overlay rendering.
559///
560/// Controls how segmentation masks are composited onto the destination image:
561/// - `background`: when set, the background image is drawn first and masks
562///   are composited over it (result written to `dst`). When `None`, `dst` is
563///   cleared to `0x00000000` (fully transparent) before masks are drawn.
564///   **`dst` is always fully overwritten — its prior contents are never
565///   preserved.** Callers who used to pre-load an image into `dst` before
566///   calling `draw_decoded_masks` / `draw_proto_masks` must now supply that
567///   image via `background` instead (behaviour changed in v0.16.4).
568/// - `opacity`: scales the alpha of rendered mask colors. `1.0` (default)
569///   preserves the class color's alpha unchanged; `0.5` makes masks
570///   semi-transparent.
571/// - `color_mode`: controls whether colors are assigned by class label,
572///   instance index, or track ID. Defaults to [`ColorMode::Class`].
573#[derive(Debug, Clone, Copy)]
574pub struct MaskOverlay<'a> {
575    /// Compositing source image. Must have the same dimensions and pixel
576    /// format as `dst`. When `Some`, the output is `background + masks`.
577    /// When `None`, `dst` is cleared to `0x00000000` before masks are drawn.
578    pub background: Option<&'a TensorDyn>,
579    pub opacity: f32,
580    /// Normalized letterbox region `[xmin, ymin, xmax, ymax]` in model-input
581    /// space that contains actual image content (the rest is padding).
582    ///
583    /// When set, bounding boxes and mask coordinates from the decoder (which
584    /// are in model-input normalized space) are mapped back to the original
585    /// image coordinate space before rendering.
586    ///
587    /// Use [`with_letterbox_crop`](Self::with_letterbox_crop) to compute this
588    /// from the [`Crop`] that was used in the model input [`convert`](crate::ImageProcessorTrait::convert) call.
589    pub letterbox: Option<[f32; 4]>,
590    pub color_mode: ColorMode,
591}
592
593impl Default for MaskOverlay<'_> {
594    fn default() -> Self {
595        Self {
596            background: None,
597            opacity: 1.0,
598            letterbox: None,
599            color_mode: ColorMode::Class,
600        }
601    }
602}
603
604impl<'a> MaskOverlay<'a> {
605    pub fn new() -> Self {
606        Self::default()
607    }
608
609    /// Set the compositing source image.
610    ///
611    /// `bg` must have the same dimensions and pixel format as the `dst` passed
612    /// to [`draw_decoded_masks`](crate::ImageProcessorTrait::draw_decoded_masks) /
613    /// [`draw_proto_masks`](crate::ImageProcessorTrait::draw_proto_masks).
614    /// The output will be `bg + masks`. Without a background, `dst` is cleared
615    /// to `0x00000000`.
616    pub fn with_background(mut self, bg: &'a TensorDyn) -> Self {
617        self.background = Some(bg);
618        self
619    }
620
621    pub fn with_opacity(mut self, opacity: f32) -> Self {
622        self.opacity = opacity.clamp(0.0, 1.0);
623        self
624    }
625
626    pub fn with_color_mode(mut self, mode: ColorMode) -> Self {
627        self.color_mode = mode;
628        self
629    }
630
631    /// Set the letterbox transform from the [`Crop`] used when preparing the
632    /// model input, so that bounding boxes and masks are correctly mapped back
633    /// to the original image coordinate space during rendering.
634    ///
635    /// Pass the same `crop` that was given to
636    /// [`convert`](crate::ImageProcessorTrait::convert) along with the model
637    /// input dimensions (`model_w` × `model_h`).
638    ///
639    /// Has no effect when `crop.dst_rect` is `None` (no letterbox applied).
640    pub fn with_letterbox_crop(mut self, crop: &Crop, model_w: usize, model_h: usize) -> Self {
641        if let Some(r) = crop.dst_rect {
642            self.letterbox = Some([
643                r.left as f32 / model_w as f32,
644                r.top as f32 / model_h as f32,
645                (r.left + r.width) as f32 / model_w as f32,
646                (r.top + r.height) as f32 / model_h as f32,
647            ]);
648        }
649        self
650    }
651}
652
653/// Apply the inverse letterbox transform to a bounding box.
654///
655/// `letterbox` is `[lx0, ly0, lx1, ly1]` — the normalized region of the model
656/// input that contains actual image content (output of
657/// [`MaskOverlay::with_letterbox_crop`]).
658///
659/// Converts model-input-normalized coords to output-image-normalized coords,
660/// clamped to `[0.0, 1.0]`. Also canonicalises the bbox (ensures xmin ≤ xmax).
661#[inline]
662fn unletter_bbox(bbox: DetectBox, lb: [f32; 4]) -> DetectBox {
663    let b = bbox.bbox.to_canonical();
664    let [lx0, ly0, lx1, ly1] = lb;
665    let inv_w = if lx1 > lx0 { 1.0 / (lx1 - lx0) } else { 1.0 };
666    let inv_h = if ly1 > ly0 { 1.0 / (ly1 - ly0) } else { 1.0 };
667    DetectBox {
668        bbox: edgefirst_decoder::BoundingBox {
669            xmin: ((b.xmin - lx0) * inv_w).clamp(0.0, 1.0),
670            ymin: ((b.ymin - ly0) * inv_h).clamp(0.0, 1.0),
671            xmax: ((b.xmax - lx0) * inv_w).clamp(0.0, 1.0),
672            ymax: ((b.ymax - ly0) * inv_h).clamp(0.0, 1.0),
673        },
674        ..bbox
675    }
676}
677
678#[derive(Debug, Clone, Copy, PartialEq, Eq)]
679pub struct Crop {
680    pub src_rect: Option<Rect>,
681    pub dst_rect: Option<Rect>,
682    pub dst_color: Option<[u8; 4]>,
683}
684
685impl Default for Crop {
686    fn default() -> Self {
687        Crop::new()
688    }
689}
690impl Crop {
691    // Creates a new Crop with default values (no cropping).
692    pub fn new() -> Self {
693        Crop {
694            src_rect: None,
695            dst_rect: None,
696            dst_color: None,
697        }
698    }
699
700    // Sets the source rectangle for cropping.
701    pub fn with_src_rect(mut self, src_rect: Option<Rect>) -> Self {
702        self.src_rect = src_rect;
703        self
704    }
705
706    // Sets the destination rectangle for cropping.
707    pub fn with_dst_rect(mut self, dst_rect: Option<Rect>) -> Self {
708        self.dst_rect = dst_rect;
709        self
710    }
711
712    // Sets the destination color for areas outside the cropped region.
713    pub fn with_dst_color(mut self, dst_color: Option<[u8; 4]>) -> Self {
714        self.dst_color = dst_color;
715        self
716    }
717
718    // Creates a new Crop with no cropping.
719    pub fn no_crop() -> Self {
720        Crop::new()
721    }
722
723    /// Validate crop rectangles against explicit dimensions.
724    pub(crate) fn check_crop_dims(
725        &self,
726        src_w: usize,
727        src_h: usize,
728        dst_w: usize,
729        dst_h: usize,
730    ) -> Result<(), Error> {
731        let src_ok = self
732            .src_rect
733            .is_none_or(|r| r.left + r.width <= src_w && r.top + r.height <= src_h);
734        let dst_ok = self
735            .dst_rect
736            .is_none_or(|r| r.left + r.width <= dst_w && r.top + r.height <= dst_h);
737        match (src_ok, dst_ok) {
738            (true, true) => Ok(()),
739            (true, false) => Err(Error::CropInvalid(format!(
740                "Dest crop invalid: {:?}",
741                self.dst_rect
742            ))),
743            (false, true) => Err(Error::CropInvalid(format!(
744                "Src crop invalid: {:?}",
745                self.src_rect
746            ))),
747            (false, false) => Err(Error::CropInvalid(format!(
748                "Dest and Src crop invalid: {:?} {:?}",
749                self.dst_rect, self.src_rect
750            ))),
751        }
752    }
753
754    /// Validate crop rectangles against TensorDyn source and destination.
755    pub fn check_crop_dyn(
756        &self,
757        src: &edgefirst_tensor::TensorDyn,
758        dst: &edgefirst_tensor::TensorDyn,
759    ) -> Result<(), Error> {
760        self.check_crop_dims(
761            src.width().unwrap_or(0),
762            src.height().unwrap_or(0),
763            dst.width().unwrap_or(0),
764            dst.height().unwrap_or(0),
765        )
766    }
767}
768
769#[derive(Debug, Clone, Copy, PartialEq, Eq)]
770pub struct Rect {
771    pub left: usize,
772    pub top: usize,
773    pub width: usize,
774    pub height: usize,
775}
776
777impl Rect {
778    // Creates a new Rect with the specified left, top, width, and height.
779    pub fn new(left: usize, top: usize, width: usize, height: usize) -> Self {
780        Self {
781            left,
782            top,
783            width,
784            height,
785        }
786    }
787
788    // Checks if the rectangle is valid for the given TensorDyn image.
789    pub fn check_rect_dyn(&self, image: &TensorDyn) -> bool {
790        let w = image.width().unwrap_or(0);
791        let h = image.height().unwrap_or(0);
792        self.left + self.width <= w && self.top + self.height <= h
793    }
794}
795
796#[enum_dispatch(ImageProcessor)]
797pub trait ImageProcessorTrait {
798    /// Converts the source image to the destination image format and size. The
799    /// image is cropped first, then flipped, then rotated
800    ///
801    /// # Arguments
802    ///
803    /// * `dst` - The destination image to be converted to.
804    /// * `src` - The source image to convert from.
805    /// * `rotation` - The rotation to apply to the destination image.
806    /// * `flip` - Flips the image
807    /// * `crop` - An optional rectangle specifying the area to crop from the
808    ///   source image
809    ///
810    /// # Returns
811    ///
812    /// A `Result` indicating success or failure of the conversion.
813    fn convert(
814        &mut self,
815        src: &TensorDyn,
816        dst: &mut TensorDyn,
817        rotation: Rotation,
818        flip: Flip,
819        crop: Crop,
820    ) -> Result<()>;
821
822    /// Draw pre-decoded detection boxes and segmentation masks onto `dst`.
823    ///
824    /// Supports two segmentation modes based on the mask channel count:
825    /// - **Instance segmentation** (`C=1`): one `Segmentation` per detection,
826    ///   `segmentation` and `detect` are zipped.
827    /// - **Semantic segmentation** (`C>1`): a single `Segmentation` covering
828    ///   all classes; only the first element is used.
829    ///
830    /// # Format requirements
831    ///
832    /// - CPU backend: `dst` must be `RGBA` or `RGB`.
833    /// - OpenGL backend: `dst` must be `RGBA`, `BGRA`, or `RGB`.
834    /// - G2D backend: only produces the base frame (empty detections);
835    ///   returns `NotImplemented` when any detection or segmentation is
836    ///   supplied.
837    ///
838    /// # Output contract
839    ///
840    /// This function always fully writes `dst` — it never relies on the
841    /// caller having pre-cleared the destination. The four cases are:
842    ///
843    /// | detections | background | output                              |
844    /// |------------|------------|-------------------------------------|
845    /// | none       | none       | dst cleared to `0x00000000`         |
846    /// | none       | set        | dst ← background                    |
847    /// | set        | none       | masks drawn over cleared dst        |
848    /// | set        | set        | masks drawn over background         |
849    ///
850    /// Each backend implements this with its native primitives: G2D uses
851    /// `g2d_clear` / `g2d_blit`, OpenGL uses `glClear` / DMA-BUF GPU blit
852    /// plus the mask program, and CPU uses direct buffer fill / memcpy as
853    /// the terminal fallback. CPU-memcpy of DMA buffers is avoided on the
854    /// accelerated paths.
855    ///
856    /// An empty `segmentation` slice is valid — only bounding boxes are drawn.
857    ///
858    /// `overlay` controls compositing: `background` is the compositing source
859    /// (must match `dst` in size and format); `opacity` scales mask alpha.
860    ///
861    /// # Buffer aliasing
862    ///
863    /// `dst` and `overlay.background` must reference **distinct underlying
864    /// buffers**. An aliased pair returns [`Error::AliasedBuffers`] without
865    /// dispatching to any backend — the GL path would otherwise read and
866    /// write the same texture in a single draw, which is undefined behaviour
867    /// on most drivers. Aliasing is detected via
868    /// [`TensorDyn::aliases`](edgefirst_tensor::TensorDyn::aliases), which
869    /// catches both shared-allocation clones and separate imports over the
870    /// same dmabuf fd.
871    ///
872    /// # Migration from v0.16.3 and earlier
873    ///
874    /// Prior to v0.16.4 the call silently preserved `dst`'s contents on empty
875    /// detections. That invariant no longer holds — `dst` is always fully
876    /// written. Callers who pre-loaded an image into `dst` before calling this
877    /// function must now pass that image via `overlay.background` instead.
878    fn draw_decoded_masks(
879        &mut self,
880        dst: &mut TensorDyn,
881        detect: &[DetectBox],
882        segmentation: &[Segmentation],
883        overlay: MaskOverlay<'_>,
884    ) -> Result<()>;
885
886    /// Draw masks from proto data onto image (fused decode+draw).
887    ///
888    /// For YOLO segmentation models, this avoids materializing intermediate
889    /// `Array3<u8>` masks. The `ProtoData` contains mask coefficients and the
890    /// prototype tensor; the renderer computes `mask_coeff @ protos` directly
891    /// at the output resolution using bilinear sampling.
892    ///
893    /// `detect` and `proto_data.mask_coefficients` must have the same length
894    /// (enforced by zip — excess entries are silently ignored). An empty
895    /// `detect` slice is valid and produces the base frame — cleared or
896    /// background-blitted — via the selected backend's native primitive.
897    ///
898    /// # Format requirements and output contract
899    ///
900    /// Same as [`draw_decoded_masks`](Self::draw_decoded_masks), including
901    /// the "always fully writes dst" guarantee across all four
902    /// detection/background combinations.
903    ///
904    /// `overlay` controls compositing — see [`draw_decoded_masks`](Self::draw_decoded_masks).
905    fn draw_proto_masks(
906        &mut self,
907        dst: &mut TensorDyn,
908        detect: &[DetectBox],
909        proto_data: &ProtoData,
910        overlay: MaskOverlay<'_>,
911    ) -> Result<()>;
912
913    /// Sets the colors used for rendering segmentation masks. Up to 20 colors
914    /// can be set.
915    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()>;
916}
917
918/// Configuration for [`ImageProcessor`] construction.
919///
920/// Use with [`ImageProcessor::with_config`] to override the default EGL
921/// display auto-detection and backend selection. The default configuration
922/// preserves the existing auto-detection behaviour.
923#[derive(Debug, Clone, Default)]
924pub struct ImageProcessorConfig {
925    /// Force OpenGL to use this EGL display type instead of auto-detecting.
926    ///
927    /// When `None`, the processor probes displays in priority order: GBM,
928    /// PlatformDevice, Default. Use [`probe_egl_displays`] to discover
929    /// which displays are available on the current system.
930    ///
931    /// Ignored when `EDGEFIRST_DISABLE_GL=1` is set.
932    #[cfg(target_os = "linux")]
933    #[cfg(feature = "opengl")]
934    pub egl_display: Option<EglDisplayKind>,
935
936    /// Preferred compute backend.
937    ///
938    /// When set to a specific backend (not [`ComputeBackend::Auto`]), the
939    /// processor initializes that backend with no fallback — returns an error if the conversion is not supported.
940    /// This takes precedence over `EDGEFIRST_FORCE_BACKEND` and the
941    /// `EDGEFIRST_DISABLE_*` environment variables.
942    ///
943    /// - [`ComputeBackend::OpenGl`]: init OpenGL + CPU, skip G2D
944    /// - [`ComputeBackend::G2d`]: init G2D + CPU, skip OpenGL
945    /// - [`ComputeBackend::Cpu`]: init CPU only
946    /// - [`ComputeBackend::Auto`]: existing env-var-driven selection
947    pub backend: ComputeBackend,
948}
949
950/// Compute backend selection for [`ImageProcessor`].
951///
952/// Use with [`ImageProcessorConfig::backend`] to select which backend the
953/// processor should prefer. When a specific backend is selected, the
954/// processor initializes that backend plus CPU as a fallback. When `Auto`
955/// is used, the existing environment-variable-driven selection applies.
956#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
957pub enum ComputeBackend {
958    /// Auto-detect based on available hardware and environment variables.
959    #[default]
960    Auto,
961    /// CPU-only processing (no hardware acceleration).
962    Cpu,
963    /// Prefer G2D hardware blitter (+ CPU fallback).
964    G2d,
965    /// Prefer OpenGL ES (+ CPU fallback).
966    OpenGl,
967}
968
969/// Backend forced via the `EDGEFIRST_FORCE_BACKEND` environment variable
970/// or [`ImageProcessorConfig::backend`].
971///
972/// When set, the [`ImageProcessor`] only initializes and dispatches to the
973/// selected backend — no fallback chain is used.
974#[derive(Debug, Clone, Copy, PartialEq, Eq)]
975pub(crate) enum ForcedBackend {
976    Cpu,
977    G2d,
978    OpenGl,
979}
980
981/// Image converter that uses available hardware acceleration or CPU as a
982/// fallback.
983#[derive(Debug)]
984pub struct ImageProcessor {
985    /// CPU-based image converter as a fallback. This is only None if the
986    /// EDGEFIRST_DISABLE_CPU environment variable is set.
987    pub cpu: Option<CPUProcessor>,
988
989    #[cfg(target_os = "linux")]
990    /// G2D-based image converter for Linux systems. This is only available if
991    /// the EDGEFIRST_DISABLE_G2D environment variable is not set and libg2d.so
992    /// is available.
993    pub g2d: Option<G2DProcessor>,
994    #[cfg(target_os = "linux")]
995    #[cfg(feature = "opengl")]
996    /// OpenGL-based image converter for Linux systems. This is only available
997    /// if the EDGEFIRST_DISABLE_GL environment variable is not set and OpenGL
998    /// ES is available.
999    pub opengl: Option<GLProcessorThreaded>,
1000
1001    /// When set, only the specified backend is used — no fallback chain.
1002    pub(crate) forced_backend: Option<ForcedBackend>,
1003}
1004
1005unsafe impl Send for ImageProcessor {}
1006unsafe impl Sync for ImageProcessor {}
1007
1008impl ImageProcessor {
1009    /// Creates a new `ImageProcessor` instance, initializing available
1010    /// hardware converters based on the system capabilities and environment
1011    /// variables.
1012    ///
1013    /// # Examples
1014    /// ```rust
1015    /// # use edgefirst_image::{ImageProcessor, Rotation, Flip, Crop, ImageProcessorTrait, load_image};
1016    /// # use edgefirst_tensor::{PixelFormat, DType, TensorDyn};
1017    /// # fn main() -> Result<(), edgefirst_image::Error> {
1018    /// let image = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/../../testdata/zidane.jpg"));
1019    /// let src = load_image(image, Some(PixelFormat::Rgba), None)?;
1020    /// let mut converter = ImageProcessor::new()?;
1021    /// let mut dst = converter.create_image(640, 480, PixelFormat::Rgb, DType::U8, None)?;
1022    /// converter.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())?;
1023    /// # Ok(())
1024    /// # }
1025    /// ```
1026    pub fn new() -> Result<Self> {
1027        Self::with_config(ImageProcessorConfig::default())
1028    }
1029
1030    /// Creates a new `ImageProcessor` with the given configuration.
1031    ///
1032    /// When [`ImageProcessorConfig::backend`] is set to a specific backend,
1033    /// environment variables are ignored and the processor initializes the
1034    /// requested backend plus CPU as a fallback.
1035    ///
1036    /// When `Auto`, the existing `EDGEFIRST_FORCE_BACKEND` and
1037    /// `EDGEFIRST_DISABLE_*` environment variables apply.
1038    #[allow(unused_variables)]
1039    pub fn with_config(config: ImageProcessorConfig) -> Result<Self> {
1040        // ── Config-driven backend selection ──────────────────────────
1041        // When the caller explicitly requests a backend via the config,
1042        // skip all environment variable logic.
1043        match config.backend {
1044            ComputeBackend::Cpu => {
1045                log::info!("ComputeBackend::Cpu — CPU only");
1046                return Ok(Self {
1047                    cpu: Some(CPUProcessor::new()),
1048                    #[cfg(target_os = "linux")]
1049                    g2d: None,
1050                    #[cfg(target_os = "linux")]
1051                    #[cfg(feature = "opengl")]
1052                    opengl: None,
1053                    forced_backend: None,
1054                });
1055            }
1056            ComputeBackend::G2d => {
1057                log::info!("ComputeBackend::G2d — G2D + CPU fallback");
1058                #[cfg(target_os = "linux")]
1059                {
1060                    let g2d = match G2DProcessor::new() {
1061                        Ok(g) => Some(g),
1062                        Err(e) => {
1063                            log::warn!("G2D requested but failed to initialize: {e:?}");
1064                            None
1065                        }
1066                    };
1067                    return Ok(Self {
1068                        cpu: Some(CPUProcessor::new()),
1069                        g2d,
1070                        #[cfg(feature = "opengl")]
1071                        opengl: None,
1072                        forced_backend: None,
1073                    });
1074                }
1075                #[cfg(not(target_os = "linux"))]
1076                {
1077                    log::warn!("G2D requested but not available on this platform, using CPU");
1078                    return Ok(Self {
1079                        cpu: Some(CPUProcessor::new()),
1080                        forced_backend: None,
1081                    });
1082                }
1083            }
1084            ComputeBackend::OpenGl => {
1085                log::info!("ComputeBackend::OpenGl — OpenGL + CPU fallback");
1086                #[cfg(target_os = "linux")]
1087                {
1088                    #[cfg(feature = "opengl")]
1089                    let opengl = match GLProcessorThreaded::new(config.egl_display) {
1090                        Ok(gl) => Some(gl),
1091                        Err(e) => {
1092                            log::warn!("OpenGL requested but failed to initialize: {e:?}");
1093                            None
1094                        }
1095                    };
1096                    return Ok(Self {
1097                        cpu: Some(CPUProcessor::new()),
1098                        g2d: None,
1099                        #[cfg(feature = "opengl")]
1100                        opengl,
1101                        forced_backend: None,
1102                    });
1103                }
1104                #[cfg(not(target_os = "linux"))]
1105                {
1106                    log::warn!("OpenGL requested but not available on this platform, using CPU");
1107                    return Ok(Self {
1108                        cpu: Some(CPUProcessor::new()),
1109                        forced_backend: None,
1110                    });
1111                }
1112            }
1113            ComputeBackend::Auto => { /* fall through to env-var logic below */ }
1114        }
1115
1116        // ── EDGEFIRST_FORCE_BACKEND ──────────────────────────────────
1117        // When set, only the requested backend is initialised and no
1118        // fallback chain is used. Accepted values (case-insensitive):
1119        //   "cpu", "g2d", "opengl"
1120        if let Ok(val) = std::env::var("EDGEFIRST_FORCE_BACKEND") {
1121            let val_lower = val.to_lowercase();
1122            let forced = match val_lower.as_str() {
1123                "cpu" => ForcedBackend::Cpu,
1124                "g2d" => ForcedBackend::G2d,
1125                "opengl" => ForcedBackend::OpenGl,
1126                other => {
1127                    return Err(Error::ForcedBackendUnavailable(format!(
1128                        "unknown EDGEFIRST_FORCE_BACKEND value: {other:?} (expected cpu, g2d, or opengl)"
1129                    )));
1130                }
1131            };
1132
1133            log::info!("EDGEFIRST_FORCE_BACKEND={val} — only initializing {val_lower} backend");
1134
1135            return match forced {
1136                ForcedBackend::Cpu => Ok(Self {
1137                    cpu: Some(CPUProcessor::new()),
1138                    #[cfg(target_os = "linux")]
1139                    g2d: None,
1140                    #[cfg(target_os = "linux")]
1141                    #[cfg(feature = "opengl")]
1142                    opengl: None,
1143                    forced_backend: Some(ForcedBackend::Cpu),
1144                }),
1145                ForcedBackend::G2d => {
1146                    #[cfg(target_os = "linux")]
1147                    {
1148                        let g2d = G2DProcessor::new().map_err(|e| {
1149                            Error::ForcedBackendUnavailable(format!(
1150                                "g2d forced but failed to initialize: {e:?}"
1151                            ))
1152                        })?;
1153                        Ok(Self {
1154                            cpu: None,
1155                            g2d: Some(g2d),
1156                            #[cfg(feature = "opengl")]
1157                            opengl: None,
1158                            forced_backend: Some(ForcedBackend::G2d),
1159                        })
1160                    }
1161                    #[cfg(not(target_os = "linux"))]
1162                    {
1163                        Err(Error::ForcedBackendUnavailable(
1164                            "g2d backend is only available on Linux".into(),
1165                        ))
1166                    }
1167                }
1168                ForcedBackend::OpenGl => {
1169                    #[cfg(target_os = "linux")]
1170                    #[cfg(feature = "opengl")]
1171                    {
1172                        let opengl = GLProcessorThreaded::new(config.egl_display).map_err(|e| {
1173                            Error::ForcedBackendUnavailable(format!(
1174                                "opengl forced but failed to initialize: {e:?}"
1175                            ))
1176                        })?;
1177                        Ok(Self {
1178                            cpu: None,
1179                            g2d: None,
1180                            opengl: Some(opengl),
1181                            forced_backend: Some(ForcedBackend::OpenGl),
1182                        })
1183                    }
1184                    #[cfg(not(all(target_os = "linux", feature = "opengl")))]
1185                    {
1186                        Err(Error::ForcedBackendUnavailable(
1187                            "opengl backend requires Linux with the 'opengl' feature enabled"
1188                                .into(),
1189                        ))
1190                    }
1191                }
1192            };
1193        }
1194
1195        // ── Existing DISABLE logic (unchanged) ──────────────────────
1196        #[cfg(target_os = "linux")]
1197        let g2d = if std::env::var("EDGEFIRST_DISABLE_G2D")
1198            .map(|x| x != "0" && x.to_lowercase() != "false")
1199            .unwrap_or(false)
1200        {
1201            log::debug!("EDGEFIRST_DISABLE_G2D is set");
1202            None
1203        } else {
1204            match G2DProcessor::new() {
1205                Ok(g2d_converter) => Some(g2d_converter),
1206                Err(err) => {
1207                    log::warn!("Failed to initialize G2D converter: {err:?}");
1208                    None
1209                }
1210            }
1211        };
1212
1213        #[cfg(target_os = "linux")]
1214        #[cfg(feature = "opengl")]
1215        let opengl = if std::env::var("EDGEFIRST_DISABLE_GL")
1216            .map(|x| x != "0" && x.to_lowercase() != "false")
1217            .unwrap_or(false)
1218        {
1219            log::debug!("EDGEFIRST_DISABLE_GL is set");
1220            None
1221        } else {
1222            match GLProcessorThreaded::new(config.egl_display) {
1223                Ok(gl_converter) => Some(gl_converter),
1224                Err(err) => {
1225                    log::warn!("Failed to initialize GL converter: {err:?}");
1226                    None
1227                }
1228            }
1229        };
1230
1231        let cpu = if std::env::var("EDGEFIRST_DISABLE_CPU")
1232            .map(|x| x != "0" && x.to_lowercase() != "false")
1233            .unwrap_or(false)
1234        {
1235            log::debug!("EDGEFIRST_DISABLE_CPU is set");
1236            None
1237        } else {
1238            Some(CPUProcessor::new())
1239        };
1240        Ok(Self {
1241            cpu,
1242            #[cfg(target_os = "linux")]
1243            g2d,
1244            #[cfg(target_os = "linux")]
1245            #[cfg(feature = "opengl")]
1246            opengl,
1247            forced_backend: None,
1248        })
1249    }
1250
1251    /// Sets the interpolation mode for int8 proto textures on the OpenGL
1252    /// backend. No-op if OpenGL is not available.
1253    #[cfg(target_os = "linux")]
1254    #[cfg(feature = "opengl")]
1255    pub fn set_int8_interpolation_mode(&mut self, mode: Int8InterpolationMode) -> Result<()> {
1256        if let Some(ref mut gl) = self.opengl {
1257            gl.set_int8_interpolation_mode(mode)?;
1258        }
1259        Ok(())
1260    }
1261
1262    /// Create a [`TensorDyn`] image with the best available memory backend.
1263    ///
1264    /// Priority: DMA-buf → PBO (byte-sized types: u8, i8) → system memory.
1265    ///
1266    /// Use this method instead of [`TensorDyn::image()`] when the tensor will
1267    /// be used with [`ImageProcessor::convert()`]. It selects the optimal
1268    /// memory backing (including PBO for GPU zero-copy) which direct
1269    /// allocation cannot achieve.
1270    ///
1271    /// This method is on [`ImageProcessor`] rather than [`ImageProcessorTrait`]
1272    /// because optimal allocation requires knowledge of the active compute
1273    /// backends (e.g. the GL context handle for PBO allocation). Individual
1274    /// backend implementations ([`CPUProcessor`], etc.) do not have this
1275    /// cross-backend visibility.
1276    ///
1277    /// # Arguments
1278    ///
1279    /// * `width` - Image width in pixels
1280    /// * `height` - Image height in pixels
1281    /// * `format` - Pixel format
1282    /// * `dtype` - Element data type (e.g. `DType::U8`, `DType::I8`)
1283    /// * `memory` - Optional memory type override; when `None`, the best
1284    ///   available backend is selected automatically.
1285    ///
1286    /// # Returns
1287    ///
1288    /// A [`TensorDyn`] backed by the highest-performance memory type
1289    /// available on this system.
1290    ///
1291    /// # Pitch alignment for DMA-backed allocations
1292    ///
1293    /// DMA-BUF imports into the GL backend (Mali Valhall on i.MX 95
1294    /// specifically) require every row pitch to be a multiple of
1295    /// [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`] (currently 64). When this
1296    /// method lands on `TensorMemory::Dma`, the underlying allocation is
1297    /// silently padded so the row stride satisfies that requirement.
1298    ///
1299    /// **The user-requested `width` is preserved** — `tensor.width()`
1300    /// returns the same value you passed in. The padding is carried by
1301    /// [`TensorDyn::row_stride`] / `effective_row_stride()`, which the
1302    /// GL backend reads when importing the buffer as an EGLImage.
1303    /// Callers that compute byte offsets from the tensor must use the
1304    /// stride, not `width × bytes_per_pixel`; the CPU mapping spans the
1305    /// full `stride × height` bytes.
1306    ///
1307    /// Pre-aligned widths (640, 1280, 1920, 3008, 3840 …) allocate
1308    /// exactly `width × bpp × height` bytes with no padding. PBO and
1309    /// Mem fallbacks never pad — they don't go through EGLImage import.
1310    ///
1311    /// See also [`align_width_for_gpu_pitch`] for an advisory helper
1312    /// that external callers (GStreamer plugins, video pipelines) can
1313    /// use to size their own DMA-BUFs for GL compatibility.
1314    ///
1315    /// # Errors
1316    ///
1317    /// Returns an error if all allocation strategies fail.
1318    pub fn create_image(
1319        &self,
1320        width: usize,
1321        height: usize,
1322        format: PixelFormat,
1323        dtype: DType,
1324        memory: Option<TensorMemory>,
1325    ) -> Result<TensorDyn> {
1326        // Compute the GPU-aligned row stride in bytes for this image.
1327        // `None` means either the format has no defined primary-plane bpp
1328        // (unknown future layout) or the stride calculation would overflow
1329        // — in both cases we fall back to the natural layout via the plain
1330        // `TensorDyn::image` constructor, and the slow-path warning inside
1331        // `draw_*_masks` will fire if the subsequent GL import fails.
1332        //
1333        // DMA allocation is Linux-only (see `TensorMemory::Dma` cfg gate),
1334        // so both the stride computation and the helper closure are gated
1335        // accordingly — the callers below are already Linux-only.
1336        #[cfg(target_os = "linux")]
1337        let dma_stride_bytes: Option<usize> = primary_plane_bpp(format, dtype.size())
1338            .and_then(|bpp| width.checked_mul(bpp))
1339            .and_then(align_pitch_bytes_to_gpu_alignment);
1340
1341        // Helper: allocate a DMA image, using the padded-stride constructor
1342        // when the computed stride exceeds the natural pitch, otherwise the
1343        // plain constructor (byte-identical result in the common case).
1344        #[cfg(target_os = "linux")]
1345        let try_dma = || -> Result<TensorDyn> {
1346            // Stride padding is only meaningful for packed pixel layouts
1347            // (RGBA8, BGRA8, RGB888, Grey) — the formats the GL backend
1348            // renders into. Semi-planar (NV12, NV16) and planar (PlanarRgb,
1349            // PlanarRgba) tensors go through `TensorDyn::image(...)` with
1350            // their natural layout; they're imported from camera capture
1351            // via `from_fd` far more often than allocated here, and
1352            // `Tensor::image_with_stride` explicitly rejects them.
1353            let packed = format.layout() == edgefirst_tensor::PixelLayout::Packed;
1354            match dma_stride_bytes {
1355                Some(stride)
1356                    if packed
1357                        && primary_plane_bpp(format, dtype.size())
1358                            .and_then(|bpp| width.checked_mul(bpp))
1359                            .is_some_and(|natural| stride > natural) =>
1360                {
1361                    log::debug!(
1362                        "create_image: padding row stride for {format:?} {width}x{height} \
1363                         from natural pitch to {stride} bytes for GPU alignment"
1364                    );
1365                    Ok(TensorDyn::image_with_stride(
1366                        width,
1367                        height,
1368                        format,
1369                        dtype,
1370                        stride,
1371                        Some(edgefirst_tensor::TensorMemory::Dma),
1372                    )?)
1373                }
1374                _ => Ok(TensorDyn::image(
1375                    width,
1376                    height,
1377                    format,
1378                    dtype,
1379                    Some(edgefirst_tensor::TensorMemory::Dma),
1380                )?),
1381            }
1382        };
1383
1384        // If an explicit memory type is requested, honour it directly.
1385        // On Linux, `TensorMemory::Dma` gets the padded-stride treatment;
1386        // other memory types take the user-requested width verbatim.
1387        match memory {
1388            #[cfg(target_os = "linux")]
1389            Some(TensorMemory::Dma) => {
1390                return try_dma();
1391            }
1392            Some(mem) => {
1393                return Ok(TensorDyn::image(width, height, format, dtype, Some(mem))?);
1394            }
1395            None => {}
1396        }
1397
1398        // Try DMA first on Linux — skip only when GL has explicitly selected PBO
1399        // as the preferred transfer path (PBO is better than DMA in that case).
1400        #[cfg(target_os = "linux")]
1401        {
1402            #[cfg(feature = "opengl")]
1403            let gl_uses_pbo = self
1404                .opengl
1405                .as_ref()
1406                .is_some_and(|gl| gl.transfer_backend() == opengl_headless::TransferBackend::Pbo);
1407            #[cfg(not(feature = "opengl"))]
1408            let gl_uses_pbo = false;
1409
1410            if !gl_uses_pbo {
1411                if let Ok(img) = try_dma() {
1412                    return Ok(img);
1413                }
1414            }
1415        }
1416
1417        // Try PBO (if GL available).
1418        // PBO buffers are u8-sized; the int8 shader emulates i8 output via
1419        // XOR 0x80 on the same underlying buffer, so both U8 and I8 work.
1420        #[cfg(target_os = "linux")]
1421        #[cfg(feature = "opengl")]
1422        if dtype.size() == 1 {
1423            if let Some(gl) = &self.opengl {
1424                match gl.create_pbo_image(width, height, format) {
1425                    Ok(t) => {
1426                        if dtype == DType::I8 {
1427                            // SAFETY: Tensor<u8> and Tensor<i8> are layout-
1428                            // identical (same element size, no T-dependent
1429                            // drop glue). The int8 shader applies XOR 0x80
1430                            // on the same PBO buffer. Same rationale as
1431                            // gl::processor::tensor_i8_as_u8_mut.
1432                            // Invariant: PBO tensors never have chroma
1433                            // (create_pbo_image → Tensor::wrap sets it None).
1434                            debug_assert!(
1435                                t.chroma().is_none(),
1436                                "PBO i8 transmute requires chroma == None"
1437                            );
1438                            let t_i8: Tensor<i8> = unsafe { std::mem::transmute(t) };
1439                            return Ok(TensorDyn::from(t_i8));
1440                        }
1441                        return Ok(TensorDyn::from(t));
1442                    }
1443                    Err(e) => log::debug!("PBO image creation failed, falling back to Mem: {e:?}"),
1444                }
1445            }
1446        }
1447
1448        // Fallback to Mem
1449        Ok(TensorDyn::image(
1450            width,
1451            height,
1452            format,
1453            dtype,
1454            Some(edgefirst_tensor::TensorMemory::Mem),
1455        )?)
1456    }
1457
1458    /// Import an external DMA-BUF image.
1459    ///
1460    /// Each [`PlaneDescriptor`] owns an already-duped fd; this method
1461    /// consumes the descriptors and takes ownership of those fds (whether
1462    /// the call succeeds or fails).
1463    ///
1464    /// The caller must ensure the DMA-BUF allocation is large enough for the
1465    /// specified width, height, format, and any stride/offset on the plane
1466    /// descriptors. No buffer-size validation is performed; an undersized
1467    /// buffer may cause GPU faults or EGL import failure.
1468    ///
1469    /// # Arguments
1470    ///
1471    /// * `image` - Plane descriptor for the primary (or only) plane
1472    /// * `chroma` - Optional plane descriptor for the UV chroma plane
1473    ///   (required for multiplane NV12)
1474    /// * `width` - Image width in pixels
1475    /// * `height` - Image height in pixels
1476    /// * `format` - Pixel format of the buffer
1477    /// * `dtype` - Element data type (e.g. `DType::U8`)
1478    ///
1479    /// # Returns
1480    ///
1481    /// A `TensorDyn` configured as an image.
1482    ///
1483    /// # Errors
1484    ///
1485    /// * [`Error::NotSupported`] if `chroma` is `Some` for a non-semi-planar
1486    ///   format, or multiplane NV16 (not yet supported), or the fd is not
1487    ///   DMA-backed
1488    /// * [`Error::InvalidShape`] if NV12 height is odd
1489    ///
1490    /// # Platform
1491    ///
1492    /// Linux only.
1493    ///
1494    /// # Examples
1495    ///
1496    /// ```rust,ignore
1497    /// use edgefirst_tensor::PlaneDescriptor;
1498    ///
1499    /// // Single-plane RGBA
1500    /// let pd = PlaneDescriptor::new(fd.as_fd())?;
1501    /// let src = proc.import_image(pd, None, 1920, 1080, PixelFormat::Rgba, DType::U8)?;
1502    ///
1503    /// // Multi-plane NV12 with stride
1504    /// let y_pd = PlaneDescriptor::new(y_fd.as_fd())?.with_stride(2048);
1505    /// let uv_pd = PlaneDescriptor::new(uv_fd.as_fd())?.with_stride(2048);
1506    /// let src = proc.import_image(y_pd, Some(uv_pd), 1920, 1080,
1507    ///                             PixelFormat::Nv12, DType::U8)?;
1508    /// ```
1509    #[cfg(target_os = "linux")]
1510    pub fn import_image(
1511        &self,
1512        image: edgefirst_tensor::PlaneDescriptor,
1513        chroma: Option<edgefirst_tensor::PlaneDescriptor>,
1514        width: usize,
1515        height: usize,
1516        format: PixelFormat,
1517        dtype: DType,
1518    ) -> Result<TensorDyn> {
1519        use edgefirst_tensor::{Tensor, TensorMemory};
1520
1521        // Capture stride/offset from descriptors before consuming them
1522        let image_stride = image.stride();
1523        let image_offset = image.offset();
1524        let chroma_stride = chroma.as_ref().and_then(|c| c.stride());
1525        let chroma_offset = chroma.as_ref().and_then(|c| c.offset());
1526
1527        if let Some(chroma_pd) = chroma {
1528            // ── Multiplane path ──────────────────────────────────────
1529            // Multiplane tensors are backed by Tensor<u8> (or transmuted to
1530            // Tensor<i8>). Reject other dtypes to avoid silently returning a
1531            // tensor with the wrong element type.
1532            if dtype != DType::U8 && dtype != DType::I8 {
1533                return Err(Error::NotSupported(format!(
1534                    "multiplane import only supports U8/I8, got {dtype:?}"
1535                )));
1536            }
1537            if format.layout() != PixelLayout::SemiPlanar {
1538                return Err(Error::NotSupported(format!(
1539                    "import_image with chroma requires a semi-planar format, got {format:?}"
1540                )));
1541            }
1542
1543            let chroma_h = match format {
1544                PixelFormat::Nv12 => {
1545                    if !height.is_multiple_of(2) {
1546                        return Err(Error::InvalidShape(format!(
1547                            "NV12 requires even height, got {height}"
1548                        )));
1549                    }
1550                    height / 2
1551                }
1552                // NV16 multiplane will be supported in a future release;
1553                // the GL backend currently only handles NV12 plane1 attributes.
1554                PixelFormat::Nv16 => {
1555                    return Err(Error::NotSupported(
1556                        "multiplane NV16 is not yet supported; use contiguous NV16 instead".into(),
1557                    ))
1558                }
1559                _ => {
1560                    return Err(Error::NotSupported(format!(
1561                        "unsupported semi-planar format: {format:?}"
1562                    )))
1563                }
1564            };
1565
1566            let luma = Tensor::<u8>::from_fd(image.into_fd(), &[height, width], Some("luma"))?;
1567            if luma.memory() != TensorMemory::Dma {
1568                return Err(Error::NotSupported(format!(
1569                    "luma fd must be DMA-backed, got {:?}",
1570                    luma.memory()
1571                )));
1572            }
1573
1574            let chroma_tensor =
1575                Tensor::<u8>::from_fd(chroma_pd.into_fd(), &[chroma_h, width], Some("chroma"))?;
1576            if chroma_tensor.memory() != TensorMemory::Dma {
1577                return Err(Error::NotSupported(format!(
1578                    "chroma fd must be DMA-backed, got {:?}",
1579                    chroma_tensor.memory()
1580                )));
1581            }
1582
1583            // from_planes creates the combined tensor with format set,
1584            // preserving luma's row_stride (currently None since luma was raw).
1585            let mut tensor = Tensor::<u8>::from_planes(luma, chroma_tensor, format)?;
1586
1587            // Apply stride/offset to the combined tensor (luma plane)
1588            if let Some(s) = image_stride {
1589                tensor.set_row_stride(s)?;
1590            }
1591            if let Some(o) = image_offset {
1592                tensor.set_plane_offset(o);
1593            }
1594
1595            // Apply stride/offset to the chroma sub-tensor.
1596            // The chroma tensor is a raw 2D [chroma_h, width] tensor without
1597            // format metadata, so we validate stride manually rather than
1598            // using set_row_stride (which requires format).
1599            if let Some(chroma_ref) = tensor.chroma_mut() {
1600                if let Some(s) = chroma_stride {
1601                    if s < width {
1602                        return Err(Error::InvalidShape(format!(
1603                            "chroma stride {s} < minimum {width} for {format:?}"
1604                        )));
1605                    }
1606                    chroma_ref.set_row_stride_unchecked(s);
1607                }
1608                if let Some(o) = chroma_offset {
1609                    chroma_ref.set_plane_offset(o);
1610                }
1611            }
1612
1613            if dtype == DType::I8 {
1614                // SAFETY: Tensor<u8> and Tensor<i8> have identical layout because
1615                // the struct contains only type-erased storage (OwnedFd, shape, name),
1616                // no inline T values. This assertion catches layout drift at compile time.
1617                const {
1618                    assert!(std::mem::size_of::<Tensor<u8>>() == std::mem::size_of::<Tensor<i8>>());
1619                    assert!(
1620                        std::mem::align_of::<Tensor<u8>>() == std::mem::align_of::<Tensor<i8>>()
1621                    );
1622                }
1623                let tensor_i8: Tensor<i8> = unsafe { std::mem::transmute(tensor) };
1624                return Ok(TensorDyn::from(tensor_i8));
1625            }
1626            Ok(TensorDyn::from(tensor))
1627        } else {
1628            // ── Single-plane path ────────────────────────────────────
1629            let shape = match format.layout() {
1630                PixelLayout::Packed => vec![height, width, format.channels()],
1631                PixelLayout::Planar => vec![format.channels(), height, width],
1632                PixelLayout::SemiPlanar => {
1633                    let total_h = match format {
1634                        PixelFormat::Nv12 => {
1635                            if !height.is_multiple_of(2) {
1636                                return Err(Error::InvalidShape(format!(
1637                                    "NV12 requires even height, got {height}"
1638                                )));
1639                            }
1640                            height * 3 / 2
1641                        }
1642                        PixelFormat::Nv16 => height * 2,
1643                        _ => {
1644                            return Err(Error::InvalidShape(format!(
1645                                "unknown semi-planar height multiplier for {format:?}"
1646                            )))
1647                        }
1648                    };
1649                    vec![total_h, width]
1650                }
1651                _ => {
1652                    return Err(Error::NotSupported(format!(
1653                        "unsupported pixel layout for import_image: {:?}",
1654                        format.layout()
1655                    )));
1656                }
1657            };
1658            let tensor = TensorDyn::from_fd(image.into_fd(), &shape, dtype, None)?;
1659            if tensor.memory() != TensorMemory::Dma {
1660                return Err(Error::NotSupported(format!(
1661                    "import_image requires DMA-backed fd, got {:?}",
1662                    tensor.memory()
1663                )));
1664            }
1665            let mut tensor = tensor.with_format(format)?;
1666            if let Some(s) = image_stride {
1667                tensor.set_row_stride(s)?;
1668            }
1669            if let Some(o) = image_offset {
1670                tensor.set_plane_offset(o);
1671            }
1672            Ok(tensor)
1673        }
1674    }
1675
1676    /// Decode model outputs and draw segmentation masks onto `dst`.
1677    ///
1678    /// This is the primary mask rendering API. The processor decodes via the
1679    /// provided [`Decoder`], selects the optimal rendering path (hybrid
1680    /// CPU+GL or fused GPU), and composites masks onto `dst`.
1681    ///
1682    /// Returns the detected bounding boxes.
1683    pub fn draw_masks(
1684        &mut self,
1685        decoder: &edgefirst_decoder::Decoder,
1686        outputs: &[&TensorDyn],
1687        dst: &mut TensorDyn,
1688        overlay: MaskOverlay<'_>,
1689    ) -> Result<Vec<DetectBox>> {
1690        let mut output_boxes = Vec::with_capacity(100);
1691
1692        // Try proto path first (fused rendering without materializing masks)
1693        let proto_result = decoder
1694            .decode_proto(outputs, &mut output_boxes)
1695            .map_err(|e| Error::Internal(format!("decode_proto: {e:#?}")))?;
1696
1697        if let Some(proto_data) = proto_result {
1698            self.draw_proto_masks(dst, &output_boxes, &proto_data, overlay)?;
1699        } else {
1700            // Detection-only or unsupported model: full decode + render
1701            let mut output_masks = Vec::with_capacity(100);
1702            decoder
1703                .decode(outputs, &mut output_boxes, &mut output_masks)
1704                .map_err(|e| Error::Internal(format!("decode: {e:#?}")))?;
1705            self.draw_decoded_masks(dst, &output_boxes, &output_masks, overlay)?;
1706        }
1707        Ok(output_boxes)
1708    }
1709
1710    /// Decode tracked model outputs and draw segmentation masks onto `dst`.
1711    ///
1712    /// Like [`draw_masks`](Self::draw_masks) but integrates a tracker for
1713    /// maintaining object identities across frames. The tracker runs after
1714    /// NMS but before mask extraction.
1715    ///
1716    /// Returns detected boxes and track info.
1717    #[cfg(feature = "tracker")]
1718    pub fn draw_masks_tracked<TR: edgefirst_tracker::Tracker<DetectBox>>(
1719        &mut self,
1720        decoder: &edgefirst_decoder::Decoder,
1721        tracker: &mut TR,
1722        timestamp: u64,
1723        outputs: &[&TensorDyn],
1724        dst: &mut TensorDyn,
1725        overlay: MaskOverlay<'_>,
1726    ) -> Result<(Vec<DetectBox>, Vec<edgefirst_tracker::TrackInfo>)> {
1727        let mut output_boxes = Vec::with_capacity(100);
1728        let mut output_tracks = Vec::new();
1729
1730        let proto_result = decoder
1731            .decode_proto_tracked(
1732                tracker,
1733                timestamp,
1734                outputs,
1735                &mut output_boxes,
1736                &mut output_tracks,
1737            )
1738            .map_err(|e| Error::Internal(format!("decode_proto_tracked: {e:#?}")))?;
1739
1740        if let Some(proto_data) = proto_result {
1741            self.draw_proto_masks(dst, &output_boxes, &proto_data, overlay)?;
1742        } else {
1743            // Note: decode_proto_tracked returns None for detection-only/ModelPack
1744            // models WITHOUT calling the tracker. The else branch below is the
1745            // first (and only) tracker call for those model types.
1746            let mut output_masks = Vec::with_capacity(100);
1747            decoder
1748                .decode_tracked(
1749                    tracker,
1750                    timestamp,
1751                    outputs,
1752                    &mut output_boxes,
1753                    &mut output_masks,
1754                    &mut output_tracks,
1755                )
1756                .map_err(|e| Error::Internal(format!("decode_tracked: {e:#?}")))?;
1757            self.draw_decoded_masks(dst, &output_boxes, &output_masks, overlay)?;
1758        }
1759        Ok((output_boxes, output_tracks))
1760    }
1761
1762    /// Materialize per-instance segmentation masks from raw prototype data.
1763    ///
1764    /// Computes `mask_coeff @ protos` with sigmoid activation for each detection,
1765    /// producing compact masks at prototype resolution (e.g., 160×160 crops).
1766    /// Mask values are continuous sigmoid confidence outputs quantized to u8
1767    /// (0 = background, 255 = full confidence), NOT binary thresholded.
1768    ///
1769    /// The returned [`Vec<Segmentation>`] can be:
1770    /// - Inspected or exported for analytics, IoU computation, etc.
1771    /// - Passed directly to [`ImageProcessorTrait::draw_decoded_masks`] for
1772    ///   GPU-interpolated rendering.
1773    ///
1774    /// # Performance Note
1775    ///
1776    /// Calling `materialize_masks` + `draw_decoded_masks` separately prevents
1777    /// the HAL from using its internal fused optimization path. For render-only
1778    /// use cases, prefer [`ImageProcessorTrait::draw_proto_masks`] which selects
1779    /// the fastest path automatically (currently 1.6×–27× faster on tested
1780    /// platforms). Use this method when you need access to the intermediate masks.
1781    ///
1782    /// # Errors
1783    ///
1784    /// Returns [`Error::NoConverter`] if the CPU backend is not available.
1785    pub fn materialize_masks(
1786        &mut self,
1787        detect: &[DetectBox],
1788        proto_data: &ProtoData,
1789        letterbox: Option<[f32; 4]>,
1790        resolution: MaskResolution,
1791    ) -> Result<Vec<Segmentation>> {
1792        let cpu = self.cpu.as_mut().ok_or(Error::NoConverter)?;
1793        match resolution {
1794            MaskResolution::Proto => cpu.materialize_segmentations(detect, proto_data, letterbox),
1795            MaskResolution::Scaled { width, height } => {
1796                cpu.materialize_scaled_segmentations(detect, proto_data, letterbox, width, height)
1797            }
1798        }
1799    }
1800}
1801
1802impl ImageProcessorTrait for ImageProcessor {
1803    /// Converts the source image to the destination image format and size. The
1804    /// image is cropped first, then flipped, then rotated
1805    ///
1806    /// Prefer hardware accelerators when available, falling back to CPU if
1807    /// necessary.
1808    fn convert(
1809        &mut self,
1810        src: &TensorDyn,
1811        dst: &mut TensorDyn,
1812        rotation: Rotation,
1813        flip: Flip,
1814        crop: Crop,
1815    ) -> Result<()> {
1816        let start = Instant::now();
1817        let src_fmt = src.format();
1818        let dst_fmt = dst.format();
1819        let _span = tracing::trace_span!(
1820            "image_convert",
1821            ?src_fmt,
1822            ?dst_fmt,
1823            src_memory = ?src.memory(),
1824            dst_memory = ?dst.memory(),
1825            ?rotation,
1826            ?flip,
1827        )
1828        .entered();
1829        log::trace!(
1830            "convert: {src_fmt:?}({:?}/{:?}) → {dst_fmt:?}({:?}/{:?}), \
1831             rotation={rotation:?}, flip={flip:?}, backend={:?}",
1832            src.dtype(),
1833            src.memory(),
1834            dst.dtype(),
1835            dst.memory(),
1836            self.forced_backend,
1837        );
1838
1839        // ── Forced backend: no fallback chain ────────────────────────
1840        if let Some(forced) = self.forced_backend {
1841            return match forced {
1842                ForcedBackend::Cpu => {
1843                    if let Some(cpu) = self.cpu.as_mut() {
1844                        let r = cpu.convert(src, dst, rotation, flip, crop);
1845                        log::trace!(
1846                            "convert: forced=cpu result={} ({:?})",
1847                            if r.is_ok() { "ok" } else { "err" },
1848                            start.elapsed()
1849                        );
1850                        return r;
1851                    }
1852                    Err(Error::ForcedBackendUnavailable("cpu".into()))
1853                }
1854                ForcedBackend::G2d => {
1855                    #[cfg(target_os = "linux")]
1856                    if let Some(g2d) = self.g2d.as_mut() {
1857                        let r = g2d.convert(src, dst, rotation, flip, crop);
1858                        log::trace!(
1859                            "convert: forced=g2d result={} ({:?})",
1860                            if r.is_ok() { "ok" } else { "err" },
1861                            start.elapsed()
1862                        );
1863                        return r;
1864                    }
1865                    Err(Error::ForcedBackendUnavailable("g2d".into()))
1866                }
1867                ForcedBackend::OpenGl => {
1868                    #[cfg(target_os = "linux")]
1869                    #[cfg(feature = "opengl")]
1870                    if let Some(opengl) = self.opengl.as_mut() {
1871                        let r = opengl.convert(src, dst, rotation, flip, crop);
1872                        log::trace!(
1873                            "convert: forced=opengl result={} ({:?})",
1874                            if r.is_ok() { "ok" } else { "err" },
1875                            start.elapsed()
1876                        );
1877                        return r;
1878                    }
1879                    Err(Error::ForcedBackendUnavailable("opengl".into()))
1880                }
1881            };
1882        }
1883
1884        // ── Auto fallback chain: OpenGL → G2D → CPU ──────────────────
1885        #[cfg(target_os = "linux")]
1886        #[cfg(feature = "opengl")]
1887        if let Some(opengl) = self.opengl.as_mut() {
1888            match opengl.convert(src, dst, rotation, flip, crop) {
1889                Ok(_) => {
1890                    log::trace!(
1891                        "convert: auto selected=opengl for {src_fmt:?}→{dst_fmt:?} ({:?})",
1892                        start.elapsed()
1893                    );
1894                    return Ok(());
1895                }
1896                Err(e) => {
1897                    log::trace!("convert: auto opengl declined {src_fmt:?}→{dst_fmt:?}: {e}");
1898                }
1899            }
1900        }
1901
1902        #[cfg(target_os = "linux")]
1903        if let Some(g2d) = self.g2d.as_mut() {
1904            match g2d.convert(src, dst, rotation, flip, crop) {
1905                Ok(_) => {
1906                    log::trace!(
1907                        "convert: auto selected=g2d for {src_fmt:?}→{dst_fmt:?} ({:?})",
1908                        start.elapsed()
1909                    );
1910                    return Ok(());
1911                }
1912                Err(e) => {
1913                    log::trace!("convert: auto g2d declined {src_fmt:?}→{dst_fmt:?}: {e}");
1914                }
1915            }
1916        }
1917
1918        if let Some(cpu) = self.cpu.as_mut() {
1919            match cpu.convert(src, dst, rotation, flip, crop) {
1920                Ok(_) => {
1921                    log::trace!(
1922                        "convert: auto selected=cpu for {src_fmt:?}→{dst_fmt:?} ({:?})",
1923                        start.elapsed()
1924                    );
1925                    return Ok(());
1926                }
1927                Err(e) => {
1928                    log::trace!("convert: auto cpu failed {src_fmt:?}→{dst_fmt:?}: {e}");
1929                    return Err(e);
1930                }
1931            }
1932        }
1933        Err(Error::NoConverter)
1934    }
1935
1936    fn draw_decoded_masks(
1937        &mut self,
1938        dst: &mut TensorDyn,
1939        detect: &[DetectBox],
1940        segmentation: &[Segmentation],
1941        overlay: MaskOverlay<'_>,
1942    ) -> Result<()> {
1943        let _span = tracing::trace_span!(
1944            "draw_masks",
1945            n_detections = detect.len(),
1946            n_segmentations = segmentation.len(),
1947        )
1948        .entered();
1949        let start = Instant::now();
1950
1951        if let Some(bg) = overlay.background {
1952            if bg.aliases(dst) {
1953                return Err(Error::AliasedBuffers(
1954                    "background must not reference the same buffer as dst".to_string(),
1955                ));
1956            }
1957        }
1958
1959        // Un-letterbox detect boxes and segmentation bboxes for rendering when
1960        // a letterbox was applied to prepare the model input.
1961        let lb_boxes: Vec<DetectBox>;
1962        let lb_segs: Vec<Segmentation>;
1963        let (detect, segmentation) = if let Some(lb) = overlay.letterbox {
1964            lb_boxes = detect.iter().map(|&d| unletter_bbox(d, lb)).collect();
1965            // Keep segmentation bboxes in sync with the transformed detect boxes
1966            // when we have a 1:1 correspondence (instance segmentation).
1967            lb_segs = if segmentation.len() == lb_boxes.len() {
1968                segmentation
1969                    .iter()
1970                    .zip(lb_boxes.iter())
1971                    .map(|(s, d)| Segmentation {
1972                        xmin: d.bbox.xmin,
1973                        ymin: d.bbox.ymin,
1974                        xmax: d.bbox.xmax,
1975                        ymax: d.bbox.ymax,
1976                        segmentation: s.segmentation.clone(),
1977                    })
1978                    .collect()
1979            } else {
1980                segmentation.to_vec()
1981            };
1982            (lb_boxes.as_slice(), lb_segs.as_slice())
1983        } else {
1984            (detect, segmentation)
1985        };
1986        #[cfg(target_os = "linux")]
1987        let is_empty_frame = detect.is_empty() && segmentation.is_empty();
1988
1989        // ── Forced backend: no fallback chain ────────────────────────
1990        if let Some(forced) = self.forced_backend {
1991            return match forced {
1992                ForcedBackend::Cpu => {
1993                    if let Some(cpu) = self.cpu.as_mut() {
1994                        return cpu.draw_decoded_masks(dst, detect, segmentation, overlay);
1995                    }
1996                    Err(Error::ForcedBackendUnavailable("cpu".into()))
1997                }
1998                ForcedBackend::G2d => {
1999                    // G2D can only produce empty frames (clear / bg blit).
2000                    // For populated frames it has no rasterizer — fail loudly.
2001                    #[cfg(target_os = "linux")]
2002                    if let Some(g2d) = self.g2d.as_mut() {
2003                        return g2d.draw_decoded_masks(dst, detect, segmentation, overlay);
2004                    }
2005                    Err(Error::ForcedBackendUnavailable("g2d".into()))
2006                }
2007                ForcedBackend::OpenGl => {
2008                    // GL handles background natively via GPU blit, and now
2009                    // actively clears when there is no background.
2010                    #[cfg(target_os = "linux")]
2011                    #[cfg(feature = "opengl")]
2012                    if let Some(opengl) = self.opengl.as_mut() {
2013                        return opengl.draw_decoded_masks(dst, detect, segmentation, overlay);
2014                    }
2015                    Err(Error::ForcedBackendUnavailable("opengl".into()))
2016                }
2017            };
2018        }
2019
2020        // ── Auto dispatch ──────────────────────────────────────────
2021        // Empty frames prefer G2D when available — a single g2d_clear or
2022        // g2d_blit is the cheapest HW path to produce the correct output
2023        // and avoids spinning up the GL pipeline every zero-detection
2024        // frame in a triple-buffered display loop.
2025        #[cfg(target_os = "linux")]
2026        if is_empty_frame {
2027            if let Some(g2d) = self.g2d.as_mut() {
2028                match g2d.draw_decoded_masks(dst, detect, segmentation, overlay) {
2029                    Ok(_) => {
2030                        log::trace!(
2031                            "draw_decoded_masks empty frame via g2d in {:?}",
2032                            start.elapsed()
2033                        );
2034                        return Ok(());
2035                    }
2036                    Err(e) => log::trace!("g2d empty-frame path unavailable: {e:?}"),
2037                }
2038            }
2039        }
2040
2041        // Populated frames (or G2D unavailable): GL first, CPU fallback.
2042        // Both backends now own their own base-layer handling (bg blit
2043        // or clear), so we hand the overlay through untouched.
2044        #[cfg(target_os = "linux")]
2045        #[cfg(feature = "opengl")]
2046        if let Some(opengl) = self.opengl.as_mut() {
2047            log::trace!(
2048                "draw_decoded_masks started with opengl in {:?}",
2049                start.elapsed()
2050            );
2051            match opengl.draw_decoded_masks(dst, detect, segmentation, overlay) {
2052                Ok(_) => {
2053                    log::trace!("draw_decoded_masks with opengl in {:?}", start.elapsed());
2054                    return Ok(());
2055                }
2056                Err(e) => {
2057                    log::trace!("draw_decoded_masks didn't work with opengl: {e:?}")
2058                }
2059            }
2060        }
2061
2062        log::trace!(
2063            "draw_decoded_masks started with cpu in {:?}",
2064            start.elapsed()
2065        );
2066        if let Some(cpu) = self.cpu.as_mut() {
2067            match cpu.draw_decoded_masks(dst, detect, segmentation, overlay) {
2068                Ok(_) => {
2069                    log::trace!("draw_decoded_masks with cpu in {:?}", start.elapsed());
2070                    return Ok(());
2071                }
2072                Err(e) => {
2073                    log::trace!("draw_decoded_masks didn't work with cpu: {e:?}");
2074                    return Err(e);
2075                }
2076            }
2077        }
2078        Err(Error::NoConverter)
2079    }
2080
2081    fn draw_proto_masks(
2082        &mut self,
2083        dst: &mut TensorDyn,
2084        detect: &[DetectBox],
2085        proto_data: &ProtoData,
2086        overlay: MaskOverlay<'_>,
2087    ) -> Result<()> {
2088        let start = Instant::now();
2089
2090        if let Some(bg) = overlay.background {
2091            if bg.aliases(dst) {
2092                return Err(Error::AliasedBuffers(
2093                    "background must not reference the same buffer as dst".to_string(),
2094                ));
2095            }
2096        }
2097
2098        // Un-letterbox detect boxes for rendering when a letterbox was applied
2099        // to prepare the model input.  The original `detect` coords are still
2100        // passed to `materialize_segmentations` (which needs model-space coords
2101        // to correctly crop the proto tensor) alongside `overlay.letterbox` so
2102        // it can emit `Segmentation` structs in output-image space.
2103        let lb_boxes: Vec<DetectBox>;
2104        let render_detect = if let Some(lb) = overlay.letterbox {
2105            lb_boxes = detect.iter().map(|&d| unletter_bbox(d, lb)).collect();
2106            lb_boxes.as_slice()
2107        } else {
2108            detect
2109        };
2110        #[cfg(target_os = "linux")]
2111        let is_empty_frame = detect.is_empty();
2112
2113        // ── Forced backend: no fallback chain ────────────────────────
2114        if let Some(forced) = self.forced_backend {
2115            return match forced {
2116                ForcedBackend::Cpu => {
2117                    if let Some(cpu) = self.cpu.as_mut() {
2118                        return cpu.draw_proto_masks(dst, render_detect, proto_data, overlay);
2119                    }
2120                    Err(Error::ForcedBackendUnavailable("cpu".into()))
2121                }
2122                ForcedBackend::G2d => {
2123                    #[cfg(target_os = "linux")]
2124                    if let Some(g2d) = self.g2d.as_mut() {
2125                        return g2d.draw_proto_masks(dst, render_detect, proto_data, overlay);
2126                    }
2127                    Err(Error::ForcedBackendUnavailable("g2d".into()))
2128                }
2129                ForcedBackend::OpenGl => {
2130                    #[cfg(target_os = "linux")]
2131                    #[cfg(feature = "opengl")]
2132                    if let Some(opengl) = self.opengl.as_mut() {
2133                        return opengl.draw_proto_masks(dst, render_detect, proto_data, overlay);
2134                    }
2135                    Err(Error::ForcedBackendUnavailable("opengl".into()))
2136                }
2137            };
2138        }
2139
2140        // ── Auto dispatch ──────────────────────────────────────────
2141        // Empty frames: prefer G2D — cheapest HW path (clear or bg blit).
2142        #[cfg(target_os = "linux")]
2143        if is_empty_frame {
2144            if let Some(g2d) = self.g2d.as_mut() {
2145                match g2d.draw_proto_masks(dst, render_detect, proto_data, overlay) {
2146                    Ok(_) => {
2147                        log::trace!(
2148                            "draw_proto_masks empty frame via g2d in {:?}",
2149                            start.elapsed()
2150                        );
2151                        return Ok(());
2152                    }
2153                    Err(e) => log::trace!("g2d empty-frame path unavailable: {e:?}"),
2154                }
2155            }
2156        }
2157
2158        // Hybrid path: CPU materialize + GL overlay (benchmarked faster than
2159        // full-GPU draw_proto_masks on all tested platforms: 27× on imx8mp,
2160        // 4× on imx95, 2.5× on rpi5, 1.6× on x86).
2161        // GL owns its own bg-blit / glClear — we pass the overlay through.
2162        //
2163        // CPU materialize needs `&mut` for its MaskScratch buffers; GL also
2164        // needs `&mut`. The CPU borrow is scoped to its block so the
2165        // subsequent GL borrow is free to take over `self`.
2166        #[cfg(target_os = "linux")]
2167        #[cfg(feature = "opengl")]
2168        if let (Some(_), Some(_)) = (self.cpu.as_ref(), self.opengl.as_ref()) {
2169            let segmentation = match self.cpu.as_mut() {
2170                Some(cpu) => {
2171                    log::trace!(
2172                        "draw_proto_masks started with hybrid (cpu+opengl) in {:?}",
2173                        start.elapsed()
2174                    );
2175                    cpu.materialize_segmentations(detect, proto_data, overlay.letterbox)?
2176                }
2177                None => unreachable!("cpu presence checked above"),
2178            };
2179            if let Some(opengl) = self.opengl.as_mut() {
2180                match opengl.draw_decoded_masks(dst, render_detect, &segmentation, overlay) {
2181                    Ok(_) => {
2182                        log::trace!(
2183                            "draw_proto_masks with hybrid (cpu+opengl) in {:?}",
2184                            start.elapsed()
2185                        );
2186                        return Ok(());
2187                    }
2188                    Err(e) => {
2189                        log::trace!(
2190                            "draw_proto_masks hybrid path failed, falling back to cpu: {e:?}"
2191                        );
2192                    }
2193                }
2194            }
2195        }
2196
2197        let Some(cpu) = self.cpu.as_mut() else {
2198            return Err(Error::Internal(
2199                "draw_proto_masks requires CPU backend for fallback path".into(),
2200            ));
2201        };
2202        log::trace!("draw_proto_masks started with cpu in {:?}", start.elapsed());
2203        cpu.draw_proto_masks(dst, render_detect, proto_data, overlay)
2204    }
2205
2206    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()> {
2207        let start = Instant::now();
2208
2209        // ── Forced backend: no fallback chain ────────────────────────
2210        if let Some(forced) = self.forced_backend {
2211            return match forced {
2212                ForcedBackend::Cpu => {
2213                    if let Some(cpu) = self.cpu.as_mut() {
2214                        return cpu.set_class_colors(colors);
2215                    }
2216                    Err(Error::ForcedBackendUnavailable("cpu".into()))
2217                }
2218                ForcedBackend::G2d => Err(Error::NotSupported(
2219                    "g2d does not support set_class_colors".into(),
2220                )),
2221                ForcedBackend::OpenGl => {
2222                    #[cfg(target_os = "linux")]
2223                    #[cfg(feature = "opengl")]
2224                    if let Some(opengl) = self.opengl.as_mut() {
2225                        return opengl.set_class_colors(colors);
2226                    }
2227                    Err(Error::ForcedBackendUnavailable("opengl".into()))
2228                }
2229            };
2230        }
2231
2232        // skip G2D as it doesn't support rendering to image
2233
2234        #[cfg(target_os = "linux")]
2235        #[cfg(feature = "opengl")]
2236        if let Some(opengl) = self.opengl.as_mut() {
2237            log::trace!("image started with opengl in {:?}", start.elapsed());
2238            match opengl.set_class_colors(colors) {
2239                Ok(_) => {
2240                    log::trace!("colors set with opengl in {:?}", start.elapsed());
2241                    return Ok(());
2242                }
2243                Err(e) => {
2244                    log::trace!("colors didn't set with opengl: {e:?}")
2245                }
2246            }
2247        }
2248        log::trace!("image started with cpu in {:?}", start.elapsed());
2249        if let Some(cpu) = self.cpu.as_mut() {
2250            match cpu.set_class_colors(colors) {
2251                Ok(_) => {
2252                    log::trace!("colors set with cpu in {:?}", start.elapsed());
2253                    return Ok(());
2254                }
2255                Err(e) => {
2256                    log::trace!("colors didn't set with cpu: {e:?}");
2257                    return Err(e);
2258                }
2259            }
2260        }
2261        Err(Error::NoConverter)
2262    }
2263}
2264
2265// ---------------------------------------------------------------------------
2266// Image loading / saving helpers
2267// ---------------------------------------------------------------------------
2268
2269/// Read EXIF orientation from raw EXIF bytes and return (Rotation, Flip).
2270fn read_exif_orientation(exif_bytes: &[u8]) -> (Rotation, Flip) {
2271    let exifreader = exif::Reader::new();
2272    let Ok(exif_) = exifreader.read_raw(exif_bytes.to_vec()) else {
2273        return (Rotation::None, Flip::None);
2274    };
2275    let Some(orientation) = exif_.get_field(exif::Tag::Orientation, exif::In::PRIMARY) else {
2276        return (Rotation::None, Flip::None);
2277    };
2278    match orientation.value.get_uint(0) {
2279        Some(1) => (Rotation::None, Flip::None),
2280        Some(2) => (Rotation::None, Flip::Horizontal),
2281        Some(3) => (Rotation::Rotate180, Flip::None),
2282        Some(4) => (Rotation::Rotate180, Flip::Horizontal),
2283        Some(5) => (Rotation::Clockwise90, Flip::Horizontal),
2284        Some(6) => (Rotation::Clockwise90, Flip::None),
2285        Some(7) => (Rotation::CounterClockwise90, Flip::Horizontal),
2286        Some(8) => (Rotation::CounterClockwise90, Flip::None),
2287        Some(v) => {
2288            log::warn!("broken orientation EXIF value: {v}");
2289            (Rotation::None, Flip::None)
2290        }
2291        None => (Rotation::None, Flip::None),
2292    }
2293}
2294
2295/// Map a [`PixelFormat`] to the zune-jpeg `ColorSpace` for decoding.
2296/// Returns `None` for formats that the JPEG decoder cannot output directly.
2297fn pixelfmt_to_colorspace(fmt: PixelFormat) -> Option<ColorSpace> {
2298    match fmt {
2299        PixelFormat::Rgb => Some(ColorSpace::RGB),
2300        PixelFormat::Rgba => Some(ColorSpace::RGBA),
2301        PixelFormat::Grey => Some(ColorSpace::Luma),
2302        _ => None,
2303    }
2304}
2305
2306/// Map a zune-jpeg `ColorSpace` to a [`PixelFormat`].
2307fn colorspace_to_pixelfmt(cs: ColorSpace) -> Option<PixelFormat> {
2308    match cs {
2309        ColorSpace::RGB => Some(PixelFormat::Rgb),
2310        ColorSpace::RGBA => Some(PixelFormat::Rgba),
2311        ColorSpace::Luma => Some(PixelFormat::Grey),
2312        _ => None,
2313    }
2314}
2315
2316/// Load a JPEG image from raw bytes and return a [`TensorDyn`].
2317// TODO: evaluate replacing zune-jpeg with libjpeg-turbo (via `turbojpeg`
2318// crate). `tjDecompress2` accepts an explicit `pitch` parameter, which
2319// would let us decode directly into a pitch-padded DMA-BUF and drop the
2320// Mem-staging + row-copy introduced below for Mali G310 pitch alignment.
2321// Dropping zune-jpeg also gets us a 2-4× faster SIMD decode on AArch64.
2322// Blockers: adds a C dep (mozjpeg-sys / libturbojpeg) to the build;
2323// cross-compilation story needs validating with zigbuild.
2324fn load_jpeg(
2325    image: &[u8],
2326    format: Option<PixelFormat>,
2327    memory: Option<TensorMemory>,
2328) -> Result<TensorDyn> {
2329    let colour = match format {
2330        Some(f) => pixelfmt_to_colorspace(f)
2331            .ok_or_else(|| Error::NotSupported(format!("Unsupported image format {f:?}")))?,
2332        None => ColorSpace::RGB,
2333    };
2334    let options = DecoderOptions::default().jpeg_set_out_colorspace(colour);
2335    let mut decoder = JpegDecoder::new_with_options(ZCursor::new(image), options);
2336    decoder.decode_headers()?;
2337
2338    let image_info = decoder.info().ok_or(Error::Internal(
2339        "JPEG did not return decoded image info".to_string(),
2340    ))?;
2341
2342    let converted_cs = decoder
2343        .output_colorspace()
2344        .ok_or(Error::Internal("No output colorspace".to_string()))?;
2345
2346    let converted_fmt = colorspace_to_pixelfmt(converted_cs).ok_or(Error::NotSupported(
2347        "Unsupported JPEG decoder output".to_string(),
2348    ))?;
2349
2350    let dest_fmt = format.unwrap_or(converted_fmt);
2351
2352    let (rotation, flip) = decoder
2353        .exif()
2354        .map(|x| read_exif_orientation(x))
2355        .unwrap_or((Rotation::None, Flip::None));
2356
2357    let w = image_info.width as usize;
2358    let h = image_info.height as usize;
2359
2360    if (rotation, flip) == (Rotation::None, Flip::None) {
2361        // When caller wants DMA and the natural pitch would be rejected by
2362        // the GPU's DMA-BUF import (Mali G310 needs 64-byte pitch), decode
2363        // into a tightly-packed Mem staging buffer and row-copy into a
2364        // pitch-padded DMA tensor. zune-jpeg has no stride-aware decode,
2365        // so the Mem intermediate is unavoidable until we swap decoders
2366        // (see TODO below).
2367        #[cfg(target_os = "linux")]
2368        if let Some(aligned_pitch) = padded_dma_pitch_for(dest_fmt, w, &memory) {
2369            let staging = Tensor::<u8>::image(w, h, converted_fmt, Some(TensorMemory::Mem))?;
2370            decoder.decode_into(&mut staging.map()?)?;
2371            let packed = if converted_fmt != dest_fmt {
2372                let mut tmp = Tensor::<u8>::image(w, h, dest_fmt, Some(TensorMemory::Mem))?;
2373                CPUProcessor::convert_format_pf(&staging, &mut tmp, converted_fmt, dest_fmt)?;
2374                tmp
2375            } else {
2376                staging
2377            };
2378            let mut dma = Tensor::<u8>::image_with_stride(
2379                w,
2380                h,
2381                dest_fmt,
2382                aligned_pitch,
2383                Some(TensorMemory::Dma),
2384            )?;
2385            copy_packed_to_padded_dma(&packed, &mut dma)?;
2386            return Ok(TensorDyn::from(dma));
2387        }
2388
2389        let mut img = Tensor::<u8>::image(w, h, dest_fmt, memory)?;
2390
2391        if converted_fmt != dest_fmt {
2392            let tmp = Tensor::<u8>::image(w, h, converted_fmt, Some(TensorMemory::Mem))?;
2393            decoder.decode_into(&mut tmp.map()?)?;
2394            CPUProcessor::convert_format_pf(&tmp, &mut img, converted_fmt, dest_fmt)?;
2395            return Ok(TensorDyn::from(img));
2396        }
2397        decoder.decode_into(&mut img.map()?)?;
2398        return Ok(TensorDyn::from(img));
2399    }
2400
2401    let mut tmp = Tensor::<u8>::image(w, h, dest_fmt, Some(TensorMemory::Mem))?;
2402
2403    if converted_fmt != dest_fmt {
2404        let tmp2 = Tensor::<u8>::image(w, h, converted_fmt, Some(TensorMemory::Mem))?;
2405        decoder.decode_into(&mut tmp2.map()?)?;
2406        CPUProcessor::convert_format_pf(&tmp2, &mut tmp, converted_fmt, dest_fmt)?;
2407    } else {
2408        decoder.decode_into(&mut tmp.map()?)?;
2409    }
2410
2411    rotate_flip_to_dyn(&tmp, dest_fmt, rotation, flip, memory)
2412}
2413
2414/// Load a PNG image from raw bytes and return a [`TensorDyn`].
2415///
2416/// Supports the same destination formats as the CPU backend's format
2417/// converter (`Rgb`, `Rgba`, `Bgra`, `Grey`, etc.). Earlier revisions only
2418/// accepted `Rgb`/`Rgba`; greyscale PNGs decoded to `Grey` now work through
2419/// the same pitch-aware DMA path as JPEG. LumaA PNGs are normalised to
2420/// `Grey` inline (alpha stripped) before going through the shared CPU
2421/// converter.
2422fn load_png(
2423    image: &[u8],
2424    format: Option<PixelFormat>,
2425    memory: Option<TensorMemory>,
2426) -> Result<TensorDyn> {
2427    let dest_fmt = format.unwrap_or(PixelFormat::Rgb);
2428
2429    // Decode with add_alpha=false — any alpha upgrade/strip happens via
2430    // the CPU converter downstream so we share one code path with
2431    // load_jpeg instead of duplicating promotion logic here.
2432    let options = DecoderOptions::default()
2433        .png_set_add_alpha_channel(false)
2434        .png_set_decode_animated(false);
2435    let mut decoder = PngDecoder::new_with_options(ZCursor::new(image), options);
2436    decoder.decode_headers()?;
2437
2438    let (width, height, rotation, flip) = {
2439        let info = decoder
2440            .info()
2441            .ok_or_else(|| Error::Internal("PNG did not return decoded image info".to_string()))?;
2442        let (rot, flip) = info
2443            .exif
2444            .as_ref()
2445            .map(|x| read_exif_orientation(x))
2446            .unwrap_or((Rotation::None, Flip::None));
2447        (info.width, info.height, rot, flip)
2448    };
2449
2450    // Map the decoder's native colorspace onto a PixelFormat that the CPU
2451    // converter understands. LumaA has no direct PixelFormat variant so we
2452    // decode as LumaA and then strip alpha inline to get Grey.
2453    let decoder_cs = decoder
2454        .colorspace()
2455        .ok_or_else(|| Error::Internal("PNG decoder did not return colorspace".to_string()))?;
2456    let (decoded_fmt, strip_luma_alpha) = match decoder_cs {
2457        ColorSpace::Luma => (PixelFormat::Grey, false),
2458        ColorSpace::LumaA => (PixelFormat::Grey, true),
2459        ColorSpace::RGB => (PixelFormat::Rgb, false),
2460        ColorSpace::RGBA => (PixelFormat::Rgba, false),
2461        other => {
2462            return Err(Error::NotSupported(format!(
2463                "PNG decoder produced unsupported colorspace {other:?}"
2464            )));
2465        }
2466    };
2467
2468    // Reject destinations the CPU converter can't reach from the decoder's
2469    // output so callers get a precise error rather than a downstream map
2470    // failure. (`Grey → Grey` / `Rgb → Rgb` / etc. are identity pairs and
2471    // are always valid.)
2472    if decoded_fmt != dest_fmt
2473        && !crate::cpu::CPUProcessor::support_conversion_pf(decoded_fmt, dest_fmt)
2474    {
2475        return Err(Error::NotSupported(format!(
2476            "load_png: cannot convert decoder output {decoded_fmt:?} to {dest_fmt:?}"
2477        )));
2478    }
2479
2480    // Decode into a Mem staging buffer in the decoder's native format. For
2481    // LumaA we allocate an extra byte-pair-per-pixel buffer since our Tensor
2482    // API only knows 1-channel (Grey); after decode we compact to Grey.
2483    let staging = if strip_luma_alpha {
2484        // LumaA is 2 bytes per pixel in the raw decode; allocate a flat
2485        // Tensor large enough to hold it, then compact to Grey in place.
2486        let raw = Tensor::<u8>::new(&[height, width, 2], Some(TensorMemory::Mem), None)?;
2487        decoder.decode_into(&mut raw.map()?)?;
2488        let grey = Tensor::<u8>::image(width, height, PixelFormat::Grey, Some(TensorMemory::Mem))?;
2489        {
2490            let raw_map = raw.map()?;
2491            let mut grey_map = grey.map()?;
2492            let raw_bytes: &[u8] = &raw_map;
2493            let grey_bytes: &mut [u8] = &mut grey_map;
2494            for (pair, out) in raw_bytes.chunks_exact(2).zip(grey_bytes.iter_mut()) {
2495                *out = pair[0];
2496            }
2497        }
2498        grey
2499    } else {
2500        let staging = Tensor::<u8>::image(width, height, decoded_fmt, Some(TensorMemory::Mem))?;
2501        decoder.decode_into(&mut staging.map()?)?;
2502        staging
2503    };
2504
2505    // Optional CPU format conversion before the final memory placement.
2506    let packed = if decoded_fmt != dest_fmt {
2507        let mut tmp = Tensor::<u8>::image(width, height, dest_fmt, Some(TensorMemory::Mem))?;
2508        CPUProcessor::convert_format_pf(&staging, &mut tmp, decoded_fmt, dest_fmt)?;
2509        tmp
2510    } else {
2511        staging
2512    };
2513
2514    if (rotation, flip) != (Rotation::None, Flip::None) {
2515        return rotate_flip_to_dyn(&packed, dest_fmt, rotation, flip, memory);
2516    }
2517
2518    // Final placement. When the caller wants DMA and the natural pitch
2519    // would be rejected by the GPU's DMA-BUF import (see
2520    // `padded_dma_pitch_for`), allocate a pitch-padded DMA tensor and
2521    // row-copy. Otherwise allocate in the requested memory domain and
2522    // linear-copy — or, when the caller asked for Mem, just return the
2523    // staging tensor directly.
2524    #[cfg(target_os = "linux")]
2525    if let Some(aligned_pitch) = padded_dma_pitch_for(dest_fmt, width, &memory) {
2526        let mut dma = Tensor::<u8>::image_with_stride(
2527            width,
2528            height,
2529            dest_fmt,
2530            aligned_pitch,
2531            Some(TensorMemory::Dma),
2532        )?;
2533        copy_packed_to_padded_dma(&packed, &mut dma)?;
2534        return Ok(TensorDyn::from(dma));
2535    }
2536
2537    if matches!(memory, Some(TensorMemory::Mem)) {
2538        return Ok(TensorDyn::from(packed));
2539    }
2540    // DMA (default on Linux) or Shm with naturally-aligned pitch.
2541    let out = Tensor::<u8>::image(width, height, dest_fmt, memory)?;
2542    {
2543        let src_map = packed.map()?;
2544        let mut dst_map = out.map()?;
2545        let src_bytes: &[u8] = &src_map;
2546        let dst_bytes: &mut [u8] = &mut dst_map;
2547        dst_bytes.copy_from_slice(src_bytes);
2548    }
2549    Ok(TensorDyn::from(out))
2550}
2551
2552/// Load an image from raw bytes (JPEG or PNG) and return a [`TensorDyn`].
2553///
2554/// The optional `format` specifies the desired output pixel format (e.g.,
2555/// [`PixelFormat::Rgb`], [`PixelFormat::Rgba`]); if `None`, the native
2556/// format of the file is used (typically RGB for JPEG).
2557///
2558/// # Examples
2559/// ```rust
2560/// use edgefirst_image::load_image;
2561/// use edgefirst_tensor::PixelFormat;
2562/// # fn main() -> Result<(), edgefirst_image::Error> {
2563/// let jpeg = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/../../testdata/zidane.jpg"));
2564/// let img = load_image(jpeg, Some(PixelFormat::Rgb), None)?;
2565/// assert_eq!(img.width(), Some(1280));
2566/// assert_eq!(img.height(), Some(720));
2567/// # Ok(())
2568/// # }
2569/// ```
2570pub fn load_image(
2571    image: &[u8],
2572    format: Option<PixelFormat>,
2573    memory: Option<TensorMemory>,
2574) -> Result<TensorDyn> {
2575    if let Ok(i) = load_jpeg(image, format, memory) {
2576        return Ok(i);
2577    }
2578    if let Ok(i) = load_png(image, format, memory) {
2579        return Ok(i);
2580    }
2581    Err(Error::NotSupported(
2582        "Could not decode as jpeg or png".to_string(),
2583    ))
2584}
2585
2586/// Save a [`TensorDyn`] image as a JPEG file.
2587///
2588/// Only packed RGB and RGBA formats are supported.
2589pub fn save_jpeg(tensor: &TensorDyn, path: impl AsRef<std::path::Path>, quality: u8) -> Result<()> {
2590    let t = tensor.as_u8().ok_or(Error::UnsupportedFormat(
2591        "save_jpeg requires u8 tensor".to_string(),
2592    ))?;
2593    let fmt = t.format().ok_or(Error::NotAnImage)?;
2594    if fmt.layout() != PixelLayout::Packed {
2595        return Err(Error::NotImplemented(
2596            "Saving planar images is not supported".to_string(),
2597        ));
2598    }
2599
2600    let colour = match fmt {
2601        PixelFormat::Rgb => jpeg_encoder::ColorType::Rgb,
2602        PixelFormat::Rgba => jpeg_encoder::ColorType::Rgba,
2603        _ => {
2604            return Err(Error::NotImplemented(
2605                "Unsupported image format for saving".to_string(),
2606            ));
2607        }
2608    };
2609
2610    let w = t.width().ok_or(Error::NotAnImage)?;
2611    let h = t.height().ok_or(Error::NotAnImage)?;
2612    let encoder = jpeg_encoder::Encoder::new_file(path, quality)?;
2613    let tensor_map = t.map()?;
2614
2615    encoder.encode(&tensor_map, w as u16, h as u16, colour)?;
2616
2617    Ok(())
2618}
2619
2620pub(crate) struct FunctionTimer<T: Display> {
2621    name: T,
2622    start: std::time::Instant,
2623}
2624
2625impl<T: Display> FunctionTimer<T> {
2626    pub fn new(name: T) -> Self {
2627        Self {
2628            name,
2629            start: std::time::Instant::now(),
2630        }
2631    }
2632}
2633
2634impl<T: Display> Drop for FunctionTimer<T> {
2635    fn drop(&mut self) {
2636        log::trace!("{} elapsed: {:?}", self.name, self.start.elapsed())
2637    }
2638}
2639
2640const DEFAULT_COLORS: [[f32; 4]; 20] = [
2641    [0., 1., 0., 0.7],
2642    [1., 0.5568628, 0., 0.7],
2643    [0.25882353, 0.15294118, 0.13333333, 0.7],
2644    [0.8, 0.7647059, 0.78039216, 0.7],
2645    [0.3137255, 0.3137255, 0.3137255, 0.7],
2646    [0.1411765, 0.3098039, 0.1215686, 0.7],
2647    [1., 0.95686275, 0.5137255, 0.7],
2648    [0.3529412, 0.32156863, 0., 0.7],
2649    [0.4235294, 0.6235294, 0.6509804, 0.7],
2650    [0.5098039, 0.5098039, 0.7294118, 0.7],
2651    [0.00784314, 0.18823529, 0.29411765, 0.7],
2652    [0.0, 0.2706, 1.0, 0.7],
2653    [0.0, 0.0, 0.0, 0.7],
2654    [0.0, 0.5, 0.0, 0.7],
2655    [1.0, 0.0, 0.0, 0.7],
2656    [0.0, 0.0, 1.0, 0.7],
2657    [1.0, 0.5, 0.5, 0.7],
2658    [0.1333, 0.5451, 0.1333, 0.7],
2659    [0.1176, 0.4118, 0.8235, 0.7],
2660    [1., 1., 1., 0.7],
2661];
2662
2663const fn denorm<const M: usize, const N: usize>(a: [[f32; M]; N]) -> [[u8; M]; N] {
2664    let mut result = [[0; M]; N];
2665    let mut i = 0;
2666    while i < N {
2667        let mut j = 0;
2668        while j < M {
2669            result[i][j] = (a[i][j] * 255.0).round() as u8;
2670            j += 1;
2671        }
2672        i += 1;
2673    }
2674    result
2675}
2676
2677const DEFAULT_COLORS_U8: [[u8; 4]; 20] = denorm(DEFAULT_COLORS);
2678
2679#[cfg(test)]
2680#[cfg_attr(coverage_nightly, coverage(off))]
2681mod alignment_tests {
2682    use super::*;
2683
2684    #[test]
2685    fn align_width_rgba8_common_widths() {
2686        // RGBA8 (bpp=4, lcm(64,4)=64, so width must round to multiple of 16 px).
2687        assert_eq!(align_width_for_gpu_pitch(640, 4), 640); // 2560 byte pitch — already aligned
2688        assert_eq!(align_width_for_gpu_pitch(1280, 4), 1280); // 5120
2689        assert_eq!(align_width_for_gpu_pitch(1920, 4), 1920); // 7680
2690        assert_eq!(align_width_for_gpu_pitch(3840, 4), 3840); // 15360
2691                                                              // crowd.png case from the imx95 investigation:
2692        assert_eq!(align_width_for_gpu_pitch(3004, 4), 3008); // 12016 → 12032
2693        assert_eq!(align_width_for_gpu_pitch(3000, 4), 3008); // 12000 → 12032
2694        assert_eq!(align_width_for_gpu_pitch(17, 4), 32); // 68 → 128
2695        assert_eq!(align_width_for_gpu_pitch(1, 4), 16); // 4 → 64
2696    }
2697
2698    #[test]
2699    fn align_width_rgb888_packed() {
2700        // RGB888 (bpp=3, lcm(64,3)=192, so width must round to multiple of 64 px).
2701        assert_eq!(align_width_for_gpu_pitch(64, 3), 64); // 192 byte pitch
2702        assert_eq!(align_width_for_gpu_pitch(640, 3), 640); // 1920
2703        assert_eq!(align_width_for_gpu_pitch(1, 3), 64); // 3 → 192
2704        assert_eq!(align_width_for_gpu_pitch(65, 3), 128); // 195 → 384
2705                                                           // Verify the rounded width × bpp is a clean multiple of the LCM.
2706        for w in [3004usize, 1281, 100, 17] {
2707            let padded = align_width_for_gpu_pitch(w, 3);
2708            assert!(padded >= w);
2709            assert_eq!((padded * 3) % 64, 0);
2710            assert_eq!((padded * 3) % 3, 0);
2711        }
2712    }
2713
2714    #[test]
2715    fn align_width_grey_u8() {
2716        // Grey (bpp=1, lcm(64,1)=64, so width must round to multiple of 64 px).
2717        assert_eq!(align_width_for_gpu_pitch(64, 1), 64);
2718        assert_eq!(align_width_for_gpu_pitch(640, 1), 640);
2719        assert_eq!(align_width_for_gpu_pitch(1, 1), 64);
2720        assert_eq!(align_width_for_gpu_pitch(65, 1), 128);
2721    }
2722
2723    #[test]
2724    fn align_width_zero_inputs() {
2725        assert_eq!(align_width_for_gpu_pitch(0, 4), 0);
2726        assert_eq!(align_width_for_gpu_pitch(640, 0), 640);
2727    }
2728
2729    #[test]
2730    fn align_width_never_returns_smaller_than_input() {
2731        // Spot-check the "returned width >= input width" contract across a
2732        // range of values that would previously have hit `width * bpp`
2733        // overflow paths.
2734        for &bpp in &[1usize, 2, 3, 4, 8] {
2735            for &w in &[
2736                1usize,
2737                17,
2738                64,
2739                65,
2740                100,
2741                1280,
2742                1281,
2743                1920,
2744                3004,
2745                3072,
2746                3840,
2747                usize::MAX / 8,
2748                usize::MAX / 4,
2749                usize::MAX / 2,
2750                usize::MAX - 1,
2751                usize::MAX,
2752            ] {
2753                let aligned = align_width_for_gpu_pitch(w, bpp);
2754                assert!(
2755                    aligned >= w,
2756                    "align_width_for_gpu_pitch({w}, {bpp}) = {aligned} < {w}"
2757                );
2758            }
2759        }
2760    }
2761
2762    #[test]
2763    fn align_width_overflow_returns_unaligned_not_smaller() {
2764        // For width values close to usize::MAX, padding up would wrap. The
2765        // function must return the original width rather than wrapping or
2766        // panicking. A pre-aligned width round-trips unchanged even at the
2767        // extreme.
2768        let aligned_extreme = usize::MAX - 15; // 16-pixel boundary for RGBA8
2769        assert_eq!(
2770            align_width_for_gpu_pitch(aligned_extreme, 4),
2771            aligned_extreme
2772        );
2773        // A misaligned extreme value cannot be rounded up — the function
2774        // returns the original.
2775        let misaligned_extreme = usize::MAX - 1;
2776        let result = align_width_for_gpu_pitch(misaligned_extreme, 4);
2777        assert!(
2778            result == misaligned_extreme || result >= misaligned_extreme,
2779            "extreme misaligned width must not be rounded down to {result}"
2780        );
2781    }
2782
2783    #[test]
2784    fn checked_lcm_basic_and_overflow() {
2785        assert_eq!(checked_num_integer_lcm(64, 4), Some(64));
2786        assert_eq!(checked_num_integer_lcm(64, 3), Some(192));
2787        assert_eq!(checked_num_integer_lcm(64, 1), Some(64));
2788        assert_eq!(checked_num_integer_lcm(0, 4), Some(0));
2789        assert_eq!(checked_num_integer_lcm(64, 0), Some(0));
2790        // Coprime values whose product exceeds usize::MAX must return None.
2791        assert_eq!(
2792            checked_num_integer_lcm(usize::MAX, usize::MAX - 1),
2793            None,
2794            "coprime extreme values must overflow detect, not panic"
2795        );
2796    }
2797
2798    #[test]
2799    fn primary_plane_bpp_known_formats() {
2800        // Packed formats use channels × elem_size.
2801        assert_eq!(primary_plane_bpp(PixelFormat::Rgba, 1), Some(4));
2802        assert_eq!(primary_plane_bpp(PixelFormat::Bgra, 1), Some(4));
2803        assert_eq!(primary_plane_bpp(PixelFormat::Rgb, 1), Some(3));
2804        assert_eq!(primary_plane_bpp(PixelFormat::Grey, 1), Some(1));
2805        // Semi-planar (NV12) reports the luma plane's bpp.
2806        assert_eq!(primary_plane_bpp(PixelFormat::Nv12, 1), Some(1));
2807    }
2808}
2809
2810#[cfg(test)]
2811#[cfg_attr(coverage_nightly, coverage(off))]
2812mod image_tests {
2813    use super::*;
2814    use crate::{CPUProcessor, Rotation};
2815    #[cfg(target_os = "linux")]
2816    use edgefirst_tensor::is_dma_available;
2817    use edgefirst_tensor::{TensorMapTrait, TensorMemory, TensorTrait};
2818    use image::buffer::ConvertBuffer;
2819
2820    /// Test helper: call `ImageProcessorTrait::convert()` on two `TensorDyn`s
2821    /// by going through the `TensorDyn` API.
2822    ///
2823    /// Returns the `(src_image, dst_image)` reconstructed from the TensorDyn
2824    /// round-trip so the caller can feed them to `compare_images` etc.
2825    fn convert_img(
2826        proc: &mut dyn ImageProcessorTrait,
2827        src: TensorDyn,
2828        dst: TensorDyn,
2829        rotation: Rotation,
2830        flip: Flip,
2831        crop: Crop,
2832    ) -> (Result<()>, TensorDyn, TensorDyn) {
2833        let src_fourcc = src.format().unwrap();
2834        let dst_fourcc = dst.format().unwrap();
2835        let src_dyn = src;
2836        let mut dst_dyn = dst;
2837        let result = proc.convert(&src_dyn, &mut dst_dyn, rotation, flip, crop);
2838        let src_back = {
2839            let mut __t = src_dyn.into_u8().unwrap();
2840            __t.set_format(src_fourcc).unwrap();
2841            TensorDyn::from(__t)
2842        };
2843        let dst_back = {
2844            let mut __t = dst_dyn.into_u8().unwrap();
2845            __t.set_format(dst_fourcc).unwrap();
2846            TensorDyn::from(__t)
2847        };
2848        (result, src_back, dst_back)
2849    }
2850
2851    #[ctor::ctor]
2852    fn init() {
2853        env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
2854    }
2855
2856    macro_rules! function {
2857        () => {{
2858            fn f() {}
2859            fn type_name_of<T>(_: T) -> &'static str {
2860                std::any::type_name::<T>()
2861            }
2862            let name = type_name_of(f);
2863
2864            // Find and cut the rest of the path
2865            match &name[..name.len() - 3].rfind(':') {
2866                Some(pos) => &name[pos + 1..name.len() - 3],
2867                None => &name[..name.len() - 3],
2868            }
2869        }};
2870    }
2871
2872    #[test]
2873    fn test_invalid_crop() {
2874        let src = TensorDyn::image(100, 100, PixelFormat::Rgb, DType::U8, None).unwrap();
2875        let dst = TensorDyn::image(100, 100, PixelFormat::Rgb, DType::U8, None).unwrap();
2876
2877        let crop = Crop::new()
2878            .with_src_rect(Some(Rect::new(50, 50, 60, 60)))
2879            .with_dst_rect(Some(Rect::new(0, 0, 150, 150)));
2880
2881        let result = crop.check_crop_dyn(&src, &dst);
2882        assert!(matches!(
2883            result,
2884            Err(Error::CropInvalid(e)) if e.starts_with("Dest and Src crop invalid")
2885        ));
2886
2887        let crop = crop.with_src_rect(Some(Rect::new(0, 0, 10, 10)));
2888        let result = crop.check_crop_dyn(&src, &dst);
2889        assert!(matches!(
2890            result,
2891            Err(Error::CropInvalid(e)) if e.starts_with("Dest crop invalid")
2892        ));
2893
2894        let crop = crop
2895            .with_src_rect(Some(Rect::new(50, 50, 60, 60)))
2896            .with_dst_rect(Some(Rect::new(0, 0, 50, 50)));
2897        let result = crop.check_crop_dyn(&src, &dst);
2898        assert!(matches!(
2899            result,
2900            Err(Error::CropInvalid(e)) if e.starts_with("Src crop invalid")
2901        ));
2902
2903        let crop = crop.with_src_rect(Some(Rect::new(50, 50, 50, 50)));
2904
2905        let result = crop.check_crop_dyn(&src, &dst);
2906        assert!(result.is_ok());
2907    }
2908
2909    #[test]
2910    fn test_invalid_tensor_format() -> Result<(), Error> {
2911        // 4D tensor cannot be set to a 3-channel pixel format
2912        let mut tensor = Tensor::<u8>::new(&[720, 1280, 4, 1], None, None)?;
2913        let result = tensor.set_format(PixelFormat::Rgb);
2914        assert!(result.is_err(), "4D tensor should reject set_format");
2915
2916        // Tensor with wrong channel count for the format
2917        let mut tensor = Tensor::<u8>::new(&[720, 1280, 4], None, None)?;
2918        let result = tensor.set_format(PixelFormat::Rgb);
2919        assert!(result.is_err(), "4-channel tensor should reject RGB format");
2920
2921        Ok(())
2922    }
2923
2924    #[test]
2925    fn test_invalid_image_file() -> Result<(), Error> {
2926        let result = crate::load_image(&[123; 5000], None, None);
2927        assert!(matches!(
2928            result,
2929            Err(Error::NotSupported(e)) if e == "Could not decode as jpeg or png"));
2930
2931        Ok(())
2932    }
2933
2934    #[test]
2935    fn test_invalid_jpeg_format() -> Result<(), Error> {
2936        let result = crate::load_image(&[123; 5000], Some(PixelFormat::Yuyv), None);
2937        assert!(matches!(
2938            result,
2939            Err(Error::NotSupported(e)) if e == "Could not decode as jpeg or png"));
2940
2941        Ok(())
2942    }
2943
2944    #[test]
2945    fn test_load_resize_save() {
2946        let file = include_bytes!(concat!(
2947            env!("CARGO_MANIFEST_DIR"),
2948            "/../../testdata/zidane.jpg"
2949        ));
2950        let img = crate::load_image(file, Some(PixelFormat::Rgba), None).unwrap();
2951        assert_eq!(img.width(), Some(1280));
2952        assert_eq!(img.height(), Some(720));
2953
2954        let dst = TensorDyn::image(640, 360, PixelFormat::Rgba, DType::U8, None).unwrap();
2955        let mut converter = CPUProcessor::new();
2956        let (result, _img, dst) = convert_img(
2957            &mut converter,
2958            img,
2959            dst,
2960            Rotation::None,
2961            Flip::None,
2962            Crop::no_crop(),
2963        );
2964        result.unwrap();
2965        assert_eq!(dst.width(), Some(640));
2966        assert_eq!(dst.height(), Some(360));
2967
2968        crate::save_jpeg(&dst, "zidane_resized.jpg", 80).unwrap();
2969
2970        let file = std::fs::read("zidane_resized.jpg").unwrap();
2971        let img = crate::load_image(&file, None, None).unwrap();
2972        assert_eq!(img.width(), Some(640));
2973        assert_eq!(img.height(), Some(360));
2974        assert_eq!(img.format().unwrap(), PixelFormat::Rgb);
2975    }
2976
2977    #[test]
2978    fn test_from_tensor_planar() -> Result<(), Error> {
2979        let mut tensor = Tensor::new(&[3, 720, 1280], None, None)?;
2980        tensor.map()?.copy_from_slice(include_bytes!(concat!(
2981            env!("CARGO_MANIFEST_DIR"),
2982            "/../../testdata/camera720p.8bps"
2983        )));
2984        let planar = {
2985            tensor
2986                .set_format(PixelFormat::PlanarRgb)
2987                .map_err(|e| crate::Error::Internal(e.to_string()))?;
2988            TensorDyn::from(tensor)
2989        };
2990
2991        let rbga = load_bytes_to_tensor(
2992            1280,
2993            720,
2994            PixelFormat::Rgba,
2995            None,
2996            include_bytes!(concat!(
2997                env!("CARGO_MANIFEST_DIR"),
2998                "/../../testdata/camera720p.rgba"
2999            )),
3000        )?;
3001        compare_images_convert_to_rgb(&planar, &rbga, 0.98, function!());
3002
3003        Ok(())
3004    }
3005
3006    #[test]
3007    fn test_from_tensor_invalid_format() {
3008        // PixelFormat::from_fourcc_str returns None for unknown FourCC codes.
3009        // Since there's no "TEST" pixel format, this validates graceful handling.
3010        assert!(PixelFormat::from_fourcc(u32::from_le_bytes(*b"TEST")).is_none());
3011    }
3012
3013    #[test]
3014    #[should_panic(expected = "Failed to save planar RGB image")]
3015    fn test_save_planar() {
3016        let planar_img = load_bytes_to_tensor(
3017            1280,
3018            720,
3019            PixelFormat::PlanarRgb,
3020            None,
3021            include_bytes!(concat!(
3022                env!("CARGO_MANIFEST_DIR"),
3023                "/../../testdata/camera720p.8bps"
3024            )),
3025        )
3026        .unwrap();
3027
3028        let save_path = "/tmp/planar_rgb.jpg";
3029        crate::save_jpeg(&planar_img, save_path, 90).expect("Failed to save planar RGB image");
3030    }
3031
3032    #[test]
3033    #[should_panic(expected = "Failed to save YUYV image")]
3034    fn test_save_yuyv() {
3035        let planar_img = load_bytes_to_tensor(
3036            1280,
3037            720,
3038            PixelFormat::Yuyv,
3039            None,
3040            include_bytes!(concat!(
3041                env!("CARGO_MANIFEST_DIR"),
3042                "/../../testdata/camera720p.yuyv"
3043            )),
3044        )
3045        .unwrap();
3046
3047        let save_path = "/tmp/yuyv.jpg";
3048        crate::save_jpeg(&planar_img, save_path, 90).expect("Failed to save YUYV image");
3049    }
3050
3051    #[test]
3052    fn test_rotation_angle() {
3053        assert_eq!(Rotation::from_degrees_clockwise(0), Rotation::None);
3054        assert_eq!(Rotation::from_degrees_clockwise(90), Rotation::Clockwise90);
3055        assert_eq!(Rotation::from_degrees_clockwise(180), Rotation::Rotate180);
3056        assert_eq!(
3057            Rotation::from_degrees_clockwise(270),
3058            Rotation::CounterClockwise90
3059        );
3060        assert_eq!(Rotation::from_degrees_clockwise(360), Rotation::None);
3061        assert_eq!(Rotation::from_degrees_clockwise(450), Rotation::Clockwise90);
3062        assert_eq!(Rotation::from_degrees_clockwise(540), Rotation::Rotate180);
3063        assert_eq!(
3064            Rotation::from_degrees_clockwise(630),
3065            Rotation::CounterClockwise90
3066        );
3067    }
3068
3069    #[test]
3070    #[should_panic(expected = "rotation angle is not a multiple of 90")]
3071    fn test_rotation_angle_panic() {
3072        Rotation::from_degrees_clockwise(361);
3073    }
3074
3075    #[test]
3076    fn test_disable_env_var() -> Result<(), Error> {
3077        // EDGEFIRST_FORCE_BACKEND takes precedence over EDGEFIRST_DISABLE_*,
3078        // so clear it for the duration of this test to avoid races with
3079        // test_force_backend_cpu running in parallel.
3080        let saved_force = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
3081        unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") };
3082
3083        #[cfg(target_os = "linux")]
3084        {
3085            let original = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
3086            unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
3087            let converter = ImageProcessor::new()?;
3088            match original {
3089                Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
3090                None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
3091            }
3092            assert!(converter.g2d.is_none());
3093        }
3094
3095        #[cfg(target_os = "linux")]
3096        #[cfg(feature = "opengl")]
3097        {
3098            let original = std::env::var("EDGEFIRST_DISABLE_GL").ok();
3099            unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
3100            let converter = ImageProcessor::new()?;
3101            match original {
3102                Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
3103                None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
3104            }
3105            assert!(converter.opengl.is_none());
3106        }
3107
3108        let original = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
3109        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
3110        let converter = ImageProcessor::new()?;
3111        match original {
3112            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
3113            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
3114        }
3115        assert!(converter.cpu.is_none());
3116
3117        let original_cpu = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
3118        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
3119        let original_gl = std::env::var("EDGEFIRST_DISABLE_GL").ok();
3120        unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
3121        let original_g2d = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
3122        unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
3123        let mut converter = ImageProcessor::new()?;
3124
3125        let src = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None)?;
3126        let dst = TensorDyn::image(640, 360, PixelFormat::Rgba, DType::U8, None)?;
3127        let (result, _src, _dst) = convert_img(
3128            &mut converter,
3129            src,
3130            dst,
3131            Rotation::None,
3132            Flip::None,
3133            Crop::no_crop(),
3134        );
3135        assert!(matches!(result, Err(Error::NoConverter)));
3136
3137        match original_cpu {
3138            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
3139            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
3140        }
3141        match original_gl {
3142            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
3143            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
3144        }
3145        match original_g2d {
3146            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
3147            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
3148        }
3149        match saved_force {
3150            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
3151            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
3152        }
3153
3154        Ok(())
3155    }
3156
3157    #[test]
3158    fn test_unsupported_conversion() {
3159        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
3160        let dst = TensorDyn::image(640, 360, PixelFormat::Nv12, DType::U8, None).unwrap();
3161        let mut converter = ImageProcessor::new().unwrap();
3162        let (result, _src, _dst) = convert_img(
3163            &mut converter,
3164            src,
3165            dst,
3166            Rotation::None,
3167            Flip::None,
3168            Crop::no_crop(),
3169        );
3170        log::debug!("result: {:?}", result);
3171        assert!(matches!(
3172            result,
3173            Err(Error::NotSupported(e)) if e.starts_with("Conversion from NV12 to NV12")
3174        ));
3175    }
3176
3177    #[test]
3178    fn test_load_grey() {
3179        let grey_img = crate::load_image(
3180            include_bytes!(concat!(
3181                env!("CARGO_MANIFEST_DIR"),
3182                "/../../testdata/grey.jpg"
3183            )),
3184            Some(PixelFormat::Rgba),
3185            None,
3186        )
3187        .unwrap();
3188
3189        let grey_but_rgb_img = crate::load_image(
3190            include_bytes!(concat!(
3191                env!("CARGO_MANIFEST_DIR"),
3192                "/../../testdata/grey-rgb.jpg"
3193            )),
3194            Some(PixelFormat::Rgba),
3195            None,
3196        )
3197        .unwrap();
3198
3199        compare_images(&grey_img, &grey_but_rgb_img, 0.99, function!());
3200    }
3201
3202    #[test]
3203    fn test_new_nv12() {
3204        let nv12 = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
3205        assert_eq!(nv12.height(), Some(720));
3206        assert_eq!(nv12.width(), Some(1280));
3207        assert_eq!(nv12.format().unwrap(), PixelFormat::Nv12);
3208        // PixelFormat::Nv12.channels() returns 1 (luma plane channel count)
3209        assert_eq!(nv12.format().unwrap().channels(), 1);
3210        assert!(nv12.format().is_some_and(
3211            |f| f.layout() == PixelLayout::Planar || f.layout() == PixelLayout::SemiPlanar
3212        ))
3213    }
3214
3215    #[test]
3216    #[cfg(target_os = "linux")]
3217    fn test_new_image_converter() {
3218        let dst_width = 640;
3219        let dst_height = 360;
3220        let file = include_bytes!(concat!(
3221            env!("CARGO_MANIFEST_DIR"),
3222            "/../../testdata/zidane.jpg"
3223        ))
3224        .to_vec();
3225        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3226
3227        let mut converter = ImageProcessor::new().unwrap();
3228        let converter_dst = converter
3229            .create_image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None)
3230            .unwrap();
3231        let (result, src, converter_dst) = convert_img(
3232            &mut converter,
3233            src,
3234            converter_dst,
3235            Rotation::None,
3236            Flip::None,
3237            Crop::no_crop(),
3238        );
3239        result.unwrap();
3240
3241        let cpu_dst =
3242            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3243        let mut cpu_converter = CPUProcessor::new();
3244        let (result, _src, cpu_dst) = convert_img(
3245            &mut cpu_converter,
3246            src,
3247            cpu_dst,
3248            Rotation::None,
3249            Flip::None,
3250            Crop::no_crop(),
3251        );
3252        result.unwrap();
3253
3254        compare_images(&converter_dst, &cpu_dst, 0.98, function!());
3255    }
3256
3257    #[test]
3258    #[cfg(target_os = "linux")]
3259    fn test_create_image_dtype_i8() {
3260        let mut converter = ImageProcessor::new().unwrap();
3261
3262        // I8 image should allocate successfully via create_image
3263        let dst = converter
3264            .create_image(320, 240, PixelFormat::Rgb, DType::I8, None)
3265            .unwrap();
3266        assert_eq!(dst.dtype(), DType::I8);
3267        assert!(dst.width() == Some(320));
3268        assert!(dst.height() == Some(240));
3269        assert_eq!(dst.format(), Some(PixelFormat::Rgb));
3270
3271        // U8 for comparison
3272        let dst_u8 = converter
3273            .create_image(320, 240, PixelFormat::Rgb, DType::U8, None)
3274            .unwrap();
3275        assert_eq!(dst_u8.dtype(), DType::U8);
3276
3277        // Convert into I8 dst should succeed
3278        let file = include_bytes!(concat!(
3279            env!("CARGO_MANIFEST_DIR"),
3280            "/../../testdata/zidane.jpg"
3281        ))
3282        .to_vec();
3283        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3284        let mut dst_i8 = converter
3285            .create_image(320, 240, PixelFormat::Rgb, DType::I8, None)
3286            .unwrap();
3287        converter
3288            .convert(
3289                &src,
3290                &mut dst_i8,
3291                Rotation::None,
3292                Flip::None,
3293                Crop::no_crop(),
3294            )
3295            .unwrap();
3296    }
3297
3298    #[test]
3299    #[cfg(target_os = "linux")]
3300    fn test_create_image_nv12_dma_non_aligned_width() {
3301        // Regression for C2: create_image must not apply stride padding to
3302        // non-packed formats. NV12 is semi-planar (PixelLayout::SemiPlanar),
3303        // so the try_dma path should fall through to the plain
3304        // TensorDyn::image allocation for any width, regardless of the
3305        // 64-byte GPU pitch alignment.
3306        let converter = ImageProcessor::new().unwrap();
3307
3308        // 100 is intentionally not a multiple of 64 (the Mali pitch
3309        // alignment) to prove that non-packed layouts do not take the
3310        // stride-padded branch.
3311        let result = converter.create_image(
3312            100,
3313            64,
3314            PixelFormat::Nv12,
3315            DType::U8,
3316            Some(TensorMemory::Dma),
3317        );
3318
3319        match result {
3320            Ok(img) => {
3321                assert_eq!(img.width(), Some(100));
3322                assert_eq!(img.height(), Some(64));
3323                assert_eq!(img.format(), Some(PixelFormat::Nv12));
3324                // Non-packed formats must never carry a row_stride override.
3325                assert!(
3326                    img.row_stride().is_none(),
3327                    "NV12 must not be stride-padded by create_image",
3328                );
3329            }
3330            Err(e) => {
3331                // Accept skip on hosts without a dma-heap, but never the
3332                // "NotImplemented" we used to return for non-packed layouts.
3333                let msg = format!("{e}");
3334                assert!(
3335                    !msg.contains("image_with_stride"),
3336                    "NV12 should not hit the stride-padded path: {msg}",
3337                );
3338            }
3339        }
3340    }
3341
3342    #[test]
3343    #[ignore] // Hangs on desktop platforms where DMA-buf is unavailable and PBO
3344              // fallback triggers a GPU driver hang during SHM→texture upload (e.g.,
3345              // NVIDIA without /dev/dma_heap permissions). Works on embedded targets.
3346    fn test_crop_skip() {
3347        let file = include_bytes!(concat!(
3348            env!("CARGO_MANIFEST_DIR"),
3349            "/../../testdata/zidane.jpg"
3350        ))
3351        .to_vec();
3352        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3353
3354        let mut converter = ImageProcessor::new().unwrap();
3355        let converter_dst = converter
3356            .create_image(1280, 720, PixelFormat::Rgba, DType::U8, None)
3357            .unwrap();
3358        let crop = Crop::new()
3359            .with_src_rect(Some(Rect::new(0, 0, 640, 640)))
3360            .with_dst_rect(Some(Rect::new(0, 0, 640, 640)));
3361        let (result, src, converter_dst) = convert_img(
3362            &mut converter,
3363            src,
3364            converter_dst,
3365            Rotation::None,
3366            Flip::None,
3367            crop,
3368        );
3369        result.unwrap();
3370
3371        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
3372        let mut cpu_converter = CPUProcessor::new();
3373        let (result, _src, cpu_dst) = convert_img(
3374            &mut cpu_converter,
3375            src,
3376            cpu_dst,
3377            Rotation::None,
3378            Flip::None,
3379            crop,
3380        );
3381        result.unwrap();
3382
3383        compare_images(&converter_dst, &cpu_dst, 0.99999, function!());
3384    }
3385
3386    #[test]
3387    fn test_invalid_pixel_format() {
3388        // PixelFormat::from_fourcc returns None for unknown formats,
3389        // so TensorDyn::image cannot be called with an invalid format.
3390        assert!(PixelFormat::from_fourcc(u32::from_le_bytes(*b"TEST")).is_none());
3391    }
3392
3393    // Helper function to check if G2D library is available (Linux/i.MX8 only)
3394    #[cfg(target_os = "linux")]
3395    static G2D_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
3396
3397    #[cfg(target_os = "linux")]
3398    fn is_g2d_available() -> bool {
3399        *G2D_AVAILABLE.get_or_init(|| G2DProcessor::new().is_ok())
3400    }
3401
3402    #[cfg(target_os = "linux")]
3403    #[cfg(feature = "opengl")]
3404    static GL_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
3405
3406    #[cfg(target_os = "linux")]
3407    #[cfg(feature = "opengl")]
3408    // Helper function to check if OpenGL is available
3409    fn is_opengl_available() -> bool {
3410        #[cfg(all(target_os = "linux", feature = "opengl"))]
3411        {
3412            *GL_AVAILABLE.get_or_init(|| GLProcessorThreaded::new(None).is_ok())
3413        }
3414
3415        #[cfg(not(all(target_os = "linux", feature = "opengl")))]
3416        {
3417            false
3418        }
3419    }
3420
3421    #[test]
3422    fn test_load_jpeg_with_exif() {
3423        let file = include_bytes!(concat!(
3424            env!("CARGO_MANIFEST_DIR"),
3425            "/../../testdata/zidane_rotated_exif.jpg"
3426        ))
3427        .to_vec();
3428        let loaded = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3429
3430        assert_eq!(loaded.height(), Some(1280));
3431        assert_eq!(loaded.width(), Some(720));
3432
3433        let file = include_bytes!(concat!(
3434            env!("CARGO_MANIFEST_DIR"),
3435            "/../../testdata/zidane.jpg"
3436        ))
3437        .to_vec();
3438        let cpu_src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3439
3440        let (dst_width, dst_height) = (cpu_src.height().unwrap(), cpu_src.width().unwrap());
3441
3442        let cpu_dst =
3443            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3444        let mut cpu_converter = CPUProcessor::new();
3445
3446        let (result, _cpu_src, cpu_dst) = convert_img(
3447            &mut cpu_converter,
3448            cpu_src,
3449            cpu_dst,
3450            Rotation::Clockwise90,
3451            Flip::None,
3452            Crop::no_crop(),
3453        );
3454        result.unwrap();
3455
3456        compare_images(&loaded, &cpu_dst, 0.98, function!());
3457    }
3458
3459    #[test]
3460    fn test_load_png_with_exif() {
3461        let file = include_bytes!(concat!(
3462            env!("CARGO_MANIFEST_DIR"),
3463            "/../../testdata/zidane_rotated_exif_180.png"
3464        ))
3465        .to_vec();
3466        let loaded = crate::load_png(&file, Some(PixelFormat::Rgba), None).unwrap();
3467
3468        assert_eq!(loaded.height(), Some(720));
3469        assert_eq!(loaded.width(), Some(1280));
3470
3471        let file = include_bytes!(concat!(
3472            env!("CARGO_MANIFEST_DIR"),
3473            "/../../testdata/zidane.jpg"
3474        ))
3475        .to_vec();
3476        let cpu_src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3477
3478        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
3479        let mut cpu_converter = CPUProcessor::new();
3480
3481        let (result, _cpu_src, cpu_dst) = convert_img(
3482            &mut cpu_converter,
3483            cpu_src,
3484            cpu_dst,
3485            Rotation::Rotate180,
3486            Flip::None,
3487            Crop::no_crop(),
3488        );
3489        result.unwrap();
3490
3491        compare_images(&loaded, &cpu_dst, 0.98, function!());
3492    }
3493
3494    /// Synthesise an RGB JPEG with a deterministic pattern at `(width, height)`
3495    /// using the workspace's `jpeg-encoder` crate (the `image` crate is
3496    /// compiled without its JPEG feature). Used to exercise the decoder /
3497    /// pitch-padding paths for arbitrary dimensions without having to bundle
3498    /// a fixture file per test size.
3499    #[cfg(target_os = "linux")]
3500    fn make_rgb_jpeg(width: u32, height: u32) -> Vec<u8> {
3501        let mut bytes = Vec::with_capacity((width * height * 3) as usize);
3502        for y in 0..height {
3503            for x in 0..width {
3504                bytes.push(((x + y) & 0xFF) as u8);
3505                bytes.push(((x.wrapping_mul(3)) & 0xFF) as u8);
3506                bytes.push(((y.wrapping_mul(5)) & 0xFF) as u8);
3507            }
3508        }
3509        let mut out = Vec::new();
3510        let encoder = jpeg_encoder::Encoder::new(&mut out, 85);
3511        encoder
3512            .encode(
3513                &bytes,
3514                width as u16,
3515                height as u16,
3516                jpeg_encoder::ColorType::Rgb,
3517            )
3518            .expect("jpeg-encoder must succeed on trivial input");
3519        out
3520    }
3521
3522    /// End-to-end: a 375×333 RGBA JPEG (width NOT divisible by 4) loaded
3523    /// via the pitch-padded DMA path and letterboxed through the GL
3524    /// backend must produce correct output. Before the Rgba/Bgra
3525    /// width%4 relaxation in `DmaImportAttrs::from_tensor`, this case
3526    /// failed the pre-check and forced a CPU texture upload fallback;
3527    /// with the relaxation, EGL import succeeds at the driver level and
3528    /// the GL fast path runs. Output correctness is checked against a
3529    /// CPU reference (convert ran with `EDGEFIRST_FORCE_BACKEND=cpu`).
3530    #[test]
3531    #[cfg(target_os = "linux")]
3532    #[cfg(feature = "opengl")]
3533    fn test_convert_rgba_non_4_aligned_width_end_to_end() {
3534        use edgefirst_tensor::is_dma_available;
3535        if !is_dma_available() {
3536            eprintln!(
3537                "SKIPPED: test_convert_rgba_non_4_aligned_width_end_to_end — DMA not available"
3538            );
3539            return;
3540        }
3541        // 375 is the canonical failure width from dataset loaders —
3542        // 375 * 4 = 1500 bytes/row, pitch-padded to 1536. Width%4 = 3,
3543        // so the old pre-check rejected it; new code accepts it.
3544        let jpeg = make_rgb_jpeg(375, 333);
3545        let src_gl = crate::load_jpeg(&jpeg, Some(PixelFormat::Rgba), None).unwrap();
3546        assert_eq!(src_gl.width(), Some(375));
3547        // Row stride must still be pitch-padded (separate concern from width).
3548        let stride = src_gl.row_stride().unwrap();
3549        assert_eq!(stride, 1536, "expected padded pitch 1536, got {stride}");
3550
3551        // GL-backed convert into a pitch-aligned 640×640 Rgba dest.
3552        let mut gl_proc = ImageProcessor::new().unwrap();
3553        let gl_dst = gl_proc
3554            .create_image(640, 640, PixelFormat::Rgba, DType::U8, None)
3555            .unwrap();
3556        let (r_gl, _src_gl, gl_dst) = convert_img(
3557            &mut gl_proc,
3558            src_gl,
3559            gl_dst,
3560            Rotation::None,
3561            Flip::None,
3562            Crop::no_crop(),
3563        );
3564        r_gl.expect("GL-backed convert must succeed for 375x333 Rgba src");
3565
3566        // CPU reference via a fresh load so the two paths start from
3567        // byte-identical inputs. `with_config(backend=Cpu)` forces the
3568        // CPU-only processor regardless of which backends the host has
3569        // available.
3570        let src_cpu =
3571            crate::load_jpeg(&jpeg, Some(PixelFormat::Rgba), Some(TensorMemory::Mem)).unwrap();
3572        let mut cpu_proc = ImageProcessor::with_config(ImageProcessorConfig {
3573            backend: ComputeBackend::Cpu,
3574            ..Default::default()
3575        })
3576        .unwrap();
3577        let cpu_dst = TensorDyn::image(
3578            640,
3579            640,
3580            PixelFormat::Rgba,
3581            DType::U8,
3582            Some(TensorMemory::Mem),
3583        )
3584        .unwrap();
3585        let (r_cpu, _src_cpu, cpu_dst) = convert_img(
3586            &mut cpu_proc,
3587            src_cpu,
3588            cpu_dst,
3589            Rotation::None,
3590            Flip::None,
3591            Crop::no_crop(),
3592        );
3593        r_cpu.unwrap();
3594
3595        // Structural similarity: the GL path may have gone through EGL
3596        // import OR fallen back to CPU texture upload — either way, the
3597        // output must match the CPU reference closely.
3598        compare_images(&gl_dst, &cpu_dst, 0.95, function!());
3599    }
3600
3601    /// Regression lock: loading a JPEG at a non-64-aligned RGBA pitch (e.g.
3602    /// 500×333 → natural pitch 2000, needs to be padded to 2048) must go
3603    /// through `image_with_stride` and set `row_stride()` / `effective_row_stride()`
3604    /// to the padded value. The earlier pitch-padding commit fixed this in
3605    /// `load_jpeg`; a regression would surface as `row_stride == None` or
3606    /// `effective_row_stride == 2000`.
3607    #[test]
3608    #[cfg(target_os = "linux")]
3609    fn test_load_jpeg_rgba_non_aligned_pitch_padded_dma() {
3610        use edgefirst_tensor::is_dma_available;
3611        if !is_dma_available() {
3612            eprintln!(
3613                "SKIPPED: test_load_jpeg_rgba_non_aligned_pitch_padded_dma — DMA not available"
3614            );
3615            return;
3616        }
3617        // Widths that force a non-64-aligned natural RGBA pitch. All three
3618        // are divisible by 4 so the EGL width-alignment pre-check passes.
3619        // The pitch-padding fix is what makes these importable at all.
3620        for &w in &[500u32, 612, 428] {
3621            let jpeg = make_rgb_jpeg(w, 333);
3622            let loaded = crate::load_jpeg(&jpeg, Some(PixelFormat::Rgba), None).unwrap();
3623            let natural = (w as usize) * 4;
3624            let aligned = crate::align_pitch_bytes_to_gpu_alignment(natural).unwrap();
3625            assert!(
3626                aligned > natural,
3627                "test sanity: width {w} should be unaligned"
3628            );
3629            let stride = loaded
3630                .row_stride()
3631                .expect("padded DMA path must set an explicit row_stride — regression if None");
3632            assert_eq!(
3633                stride, aligned,
3634                "width {w}: expected padded stride {aligned}, got {stride} \
3635                 (regression: pitch-padding branch skipped?)"
3636            );
3637            let eff = loaded.effective_row_stride().unwrap();
3638            assert_eq!(
3639                eff, aligned,
3640                "effective_row_stride must match stored stride"
3641            );
3642            assert_eq!(loaded.width(), Some(w as usize));
3643            assert_eq!(loaded.height(), Some(333));
3644        }
3645    }
3646
3647    /// `padded_dma_pitch_for` must respect the caller's memory choice and
3648    /// must NOT route into the pitch-padded DMA path when the caller left
3649    /// the choice to the allocator (`None`) but DMA is unavailable on the
3650    /// host. The padded path requires `image_with_stride`, which always
3651    /// allocates DMA — taking it on a system without `/dev/dma_heap`
3652    /// would convert a normally-working image load into a hard failure
3653    /// (since `Tensor::image(..., None)` would have fallen back to
3654    /// SHM/Mem).
3655    #[test]
3656    #[cfg(target_os = "linux")]
3657    fn test_padded_dma_pitch_for_respects_memory_choice() {
3658        use edgefirst_tensor::{is_dma_available, TensorMemory};
3659
3660        // 500×4 = 2000 → padded to 2048 by GPU alignment. Use it for
3661        // every case so any "no padding" answer is unambiguous.
3662        let unaligned_w = 500;
3663
3664        // Caller asks for Mem / Shm: never pad, regardless of DMA.
3665        assert_eq!(
3666            crate::padded_dma_pitch_for(PixelFormat::Rgba, unaligned_w, &Some(TensorMemory::Mem),),
3667            None,
3668            "Mem must never trigger DMA padding"
3669        );
3670        assert_eq!(
3671            crate::padded_dma_pitch_for(PixelFormat::Rgba, unaligned_w, &Some(TensorMemory::Shm),),
3672            None,
3673            "Shm must never trigger DMA padding"
3674        );
3675
3676        // Caller explicitly asks for DMA: always pad if width needs it.
3677        // Even if the runtime can't actually allocate DMA, the caller
3678        // owns that decision and the resulting allocation error is
3679        // their problem, not ours.
3680        assert_eq!(
3681            crate::padded_dma_pitch_for(PixelFormat::Rgba, unaligned_w, &Some(TensorMemory::Dma),),
3682            Some(2048),
3683            "explicit Dma must pad regardless of runtime DMA availability"
3684        );
3685
3686        // Caller leaves it to the allocator: behaviour depends on
3687        // host-runtime DMA availability. This is the case the fix
3688        // guards against.
3689        let none_result = crate::padded_dma_pitch_for(PixelFormat::Rgba, unaligned_w, &None);
3690        if is_dma_available() {
3691            assert_eq!(
3692                none_result,
3693                Some(2048),
3694                "memory=None + DMA available → pad (will route through DMA)"
3695            );
3696        } else {
3697            assert_eq!(
3698                none_result, None,
3699                "memory=None + DMA unavailable → must NOT pad (would force \
3700                 image_with_stride into a DMA-only allocation that fails). \
3701                 Regression: padded_dma_pitch_for ignored is_dma_available()."
3702            );
3703        }
3704    }
3705
3706    // Synthesise a small greyscale PNG in memory at `(width, height)` with a
3707    // deterministic ramp pattern so multiple tests can cross-check output
3708    // without bundling an extra fixture file.
3709    fn make_grey_png(width: u32, height: u32) -> Vec<u8> {
3710        let mut bytes = Vec::with_capacity((width * height) as usize);
3711        for y in 0..height {
3712            for x in 0..width {
3713                bytes.push(((x + y) & 0xFF) as u8);
3714            }
3715        }
3716        let img = image::GrayImage::from_vec(width, height, bytes).unwrap();
3717        let mut buf = Vec::new();
3718        img.write_to(&mut std::io::Cursor::new(&mut buf), image::ImageFormat::Png)
3719            .unwrap();
3720        buf
3721    }
3722
3723    /// Greyscale PNG with a width that forces a pitch-misaligned natural
3724    /// row stride (612 bytes is not a multiple of the 64-byte GPU pitch
3725    /// alignment) must still load via the pitch-padded DMA path. Gated on
3726    /// DMA availability because `image_with_stride` is DMA-only.
3727    #[test]
3728    #[cfg(target_os = "linux")]
3729    fn test_load_png_grey_misaligned_width_dma() {
3730        use edgefirst_tensor::is_dma_available;
3731        if !is_dma_available() {
3732            eprintln!("SKIPPED: test_load_png_grey_misaligned_width_dma — DMA not available");
3733            return;
3734        }
3735        let png = make_grey_png(612, 388);
3736        let loaded = crate::load_png(&png, Some(PixelFormat::Grey), None).unwrap();
3737        assert_eq!(loaded.width(), Some(612));
3738        assert_eq!(loaded.height(), Some(388));
3739        assert_eq!(loaded.format(), Some(PixelFormat::Grey));
3740
3741        // Round-trip pixels — natural-pitch DMA-BUFs pad the stride so we
3742        // must indirect through row_stride() rather than assume width.
3743        let map = loaded.as_u8().unwrap().map().unwrap();
3744        let stride = loaded.row_stride().unwrap_or(612);
3745        assert!(stride >= 612);
3746        let bytes: &[u8] = &map;
3747        for y in 0..388usize {
3748            for x in 0..612usize {
3749                let expected = ((x + y) & 0xFF) as u8;
3750                let got = bytes[y * stride + x];
3751                assert_eq!(
3752                    got, expected,
3753                    "grey png mismatch at ({x},{y}): got {got} expected {expected}"
3754                );
3755            }
3756        }
3757    }
3758
3759    /// Greyscale PNG loaded with explicit Mem backing — runs on any
3760    /// platform (no DMA permission requirement) and covers the
3761    /// decoder-native Luma → Grey no-conversion path.
3762    #[test]
3763    fn test_load_png_grey_mem() {
3764        use edgefirst_tensor::TensorMemory;
3765        let png = make_grey_png(612, 100);
3766        let loaded =
3767            crate::load_png(&png, Some(PixelFormat::Grey), Some(TensorMemory::Mem)).unwrap();
3768        assert_eq!(loaded.width(), Some(612));
3769        assert_eq!(loaded.height(), Some(100));
3770        assert_eq!(loaded.format(), Some(PixelFormat::Grey));
3771        let map = loaded.as_u8().unwrap().map().unwrap();
3772        let bytes: &[u8] = &map;
3773        // Mem allocation uses the natural pitch — 612 bytes per row, exact.
3774        assert_eq!(bytes.len(), 612 * 100);
3775        for y in 0..100 {
3776            for x in 0..612 {
3777                assert_eq!(bytes[y * 612 + x], ((x + y) & 0xFF) as u8);
3778            }
3779        }
3780    }
3781
3782    /// Greyscale PNG decoded into RGB — exercises the decoder-colorspace
3783    /// mismatch path (Luma → Rgb via CPU converter). Uses Mem memory to
3784    /// stay portable to host-side test environments.
3785    #[test]
3786    fn test_load_png_grey_to_rgb_mem() {
3787        use edgefirst_tensor::TensorMemory;
3788        let png = make_grey_png(620, 240);
3789        let loaded =
3790            crate::load_png(&png, Some(PixelFormat::Rgb), Some(TensorMemory::Mem)).unwrap();
3791        assert_eq!(loaded.width(), Some(620));
3792        assert_eq!(loaded.height(), Some(240));
3793        assert_eq!(loaded.format(), Some(PixelFormat::Rgb));
3794
3795        // Greyscale promoted to RGB replicates luma into each channel.
3796        let map = loaded.as_u8().unwrap().map().unwrap();
3797        let bytes: &[u8] = &map;
3798        for (x, y) in [(0usize, 0usize), (100, 50), (619, 239)] {
3799            let expected = ((x + y) & 0xFF) as u8;
3800            let off = (y * 620 + x) * 3;
3801            assert_eq!(bytes[off], expected, "R@{x},{y}");
3802            assert_eq!(bytes[off + 1], expected, "G@{x},{y}");
3803            assert_eq!(bytes[off + 2], expected, "B@{x},{y}");
3804        }
3805    }
3806
3807    #[test]
3808    #[cfg(target_os = "linux")]
3809    fn test_g2d_resize() {
3810        if !is_g2d_available() {
3811            eprintln!("SKIPPED: test_g2d_resize - G2D library (libg2d.so.2) not available");
3812            return;
3813        }
3814        if !is_dma_available() {
3815            eprintln!(
3816                "SKIPPED: test_g2d_resize - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3817            );
3818            return;
3819        }
3820
3821        let dst_width = 640;
3822        let dst_height = 360;
3823        let file = include_bytes!(concat!(
3824            env!("CARGO_MANIFEST_DIR"),
3825            "/../../testdata/zidane.jpg"
3826        ))
3827        .to_vec();
3828        let src =
3829            crate::load_image(&file, Some(PixelFormat::Rgba), Some(TensorMemory::Dma)).unwrap();
3830
3831        let g2d_dst = TensorDyn::image(
3832            dst_width,
3833            dst_height,
3834            PixelFormat::Rgba,
3835            DType::U8,
3836            Some(TensorMemory::Dma),
3837        )
3838        .unwrap();
3839        let mut g2d_converter = G2DProcessor::new().unwrap();
3840        let (result, src, g2d_dst) = convert_img(
3841            &mut g2d_converter,
3842            src,
3843            g2d_dst,
3844            Rotation::None,
3845            Flip::None,
3846            Crop::no_crop(),
3847        );
3848        result.unwrap();
3849
3850        let cpu_dst =
3851            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3852        let mut cpu_converter = CPUProcessor::new();
3853        let (result, _src, cpu_dst) = convert_img(
3854            &mut cpu_converter,
3855            src,
3856            cpu_dst,
3857            Rotation::None,
3858            Flip::None,
3859            Crop::no_crop(),
3860        );
3861        result.unwrap();
3862
3863        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
3864    }
3865
3866    #[test]
3867    #[cfg(target_os = "linux")]
3868    #[cfg(feature = "opengl")]
3869    fn test_opengl_resize() {
3870        if !is_opengl_available() {
3871            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3872            return;
3873        }
3874
3875        let dst_width = 640;
3876        let dst_height = 360;
3877        let file = include_bytes!(concat!(
3878            env!("CARGO_MANIFEST_DIR"),
3879            "/../../testdata/zidane.jpg"
3880        ))
3881        .to_vec();
3882        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
3883
3884        let cpu_dst =
3885            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3886        let mut cpu_converter = CPUProcessor::new();
3887        let (result, src, cpu_dst) = convert_img(
3888            &mut cpu_converter,
3889            src,
3890            cpu_dst,
3891            Rotation::None,
3892            Flip::None,
3893            Crop::no_crop(),
3894        );
3895        result.unwrap();
3896
3897        let mut src = src;
3898        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3899
3900        for _ in 0..5 {
3901            let gl_dst =
3902                TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None)
3903                    .unwrap();
3904            let (result, src_back, gl_dst) = convert_img(
3905                &mut gl_converter,
3906                src,
3907                gl_dst,
3908                Rotation::None,
3909                Flip::None,
3910                Crop::no_crop(),
3911            );
3912            result.unwrap();
3913            src = src_back;
3914
3915            compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3916        }
3917    }
3918
3919    #[test]
3920    #[cfg(target_os = "linux")]
3921    #[cfg(feature = "opengl")]
3922    fn test_opengl_10_threads() {
3923        if !is_opengl_available() {
3924            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3925            return;
3926        }
3927
3928        let handles: Vec<_> = (0..10)
3929            .map(|i| {
3930                std::thread::Builder::new()
3931                    .name(format!("Thread {i}"))
3932                    .spawn(test_opengl_resize)
3933                    .unwrap()
3934            })
3935            .collect();
3936        handles.into_iter().for_each(|h| {
3937            if let Err(e) = h.join() {
3938                std::panic::resume_unwind(e)
3939            }
3940        });
3941    }
3942
3943    #[test]
3944    #[cfg(target_os = "linux")]
3945    #[cfg(feature = "opengl")]
3946    fn test_opengl_grey() {
3947        if !is_opengl_available() {
3948            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3949            return;
3950        }
3951
3952        let img = crate::load_image(
3953            include_bytes!(concat!(
3954                env!("CARGO_MANIFEST_DIR"),
3955                "/../../testdata/grey.jpg"
3956            )),
3957            Some(PixelFormat::Grey),
3958            None,
3959        )
3960        .unwrap();
3961
3962        let gl_dst = TensorDyn::image(640, 640, PixelFormat::Grey, DType::U8, None).unwrap();
3963        let cpu_dst = TensorDyn::image(640, 640, PixelFormat::Grey, DType::U8, None).unwrap();
3964
3965        let mut converter = CPUProcessor::new();
3966
3967        let (result, img, cpu_dst) = convert_img(
3968            &mut converter,
3969            img,
3970            cpu_dst,
3971            Rotation::None,
3972            Flip::None,
3973            Crop::no_crop(),
3974        );
3975        result.unwrap();
3976
3977        let mut gl = GLProcessorThreaded::new(None).unwrap();
3978        let (result, _img, gl_dst) = convert_img(
3979            &mut gl,
3980            img,
3981            gl_dst,
3982            Rotation::None,
3983            Flip::None,
3984            Crop::no_crop(),
3985        );
3986        result.unwrap();
3987
3988        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3989    }
3990
3991    #[test]
3992    #[cfg(target_os = "linux")]
3993    fn test_g2d_src_crop() {
3994        if !is_g2d_available() {
3995            eprintln!("SKIPPED: test_g2d_src_crop - G2D library (libg2d.so.2) not available");
3996            return;
3997        }
3998        if !is_dma_available() {
3999            eprintln!(
4000                "SKIPPED: test_g2d_src_crop - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4001            );
4002            return;
4003        }
4004
4005        let dst_width = 640;
4006        let dst_height = 640;
4007        let file = include_bytes!(concat!(
4008            env!("CARGO_MANIFEST_DIR"),
4009            "/../../testdata/zidane.jpg"
4010        ))
4011        .to_vec();
4012        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
4013
4014        let cpu_dst =
4015            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4016        let mut cpu_converter = CPUProcessor::new();
4017        let crop = Crop {
4018            src_rect: Some(Rect {
4019                left: 0,
4020                top: 0,
4021                width: 640,
4022                height: 360,
4023            }),
4024            dst_rect: None,
4025            dst_color: None,
4026        };
4027        let (result, src, cpu_dst) = convert_img(
4028            &mut cpu_converter,
4029            src,
4030            cpu_dst,
4031            Rotation::None,
4032            Flip::None,
4033            crop,
4034        );
4035        result.unwrap();
4036
4037        let g2d_dst =
4038            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4039        let mut g2d_converter = G2DProcessor::new().unwrap();
4040        let (result, _src, g2d_dst) = convert_img(
4041            &mut g2d_converter,
4042            src,
4043            g2d_dst,
4044            Rotation::None,
4045            Flip::None,
4046            crop,
4047        );
4048        result.unwrap();
4049
4050        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
4051    }
4052
4053    #[test]
4054    #[cfg(target_os = "linux")]
4055    fn test_g2d_dst_crop() {
4056        if !is_g2d_available() {
4057            eprintln!("SKIPPED: test_g2d_dst_crop - G2D library (libg2d.so.2) not available");
4058            return;
4059        }
4060        if !is_dma_available() {
4061            eprintln!(
4062                "SKIPPED: test_g2d_dst_crop - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4063            );
4064            return;
4065        }
4066
4067        let dst_width = 640;
4068        let dst_height = 640;
4069        let file = include_bytes!(concat!(
4070            env!("CARGO_MANIFEST_DIR"),
4071            "/../../testdata/zidane.jpg"
4072        ))
4073        .to_vec();
4074        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
4075
4076        let cpu_dst =
4077            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4078        let mut cpu_converter = CPUProcessor::new();
4079        let crop = Crop {
4080            src_rect: None,
4081            dst_rect: Some(Rect::new(100, 100, 512, 288)),
4082            dst_color: None,
4083        };
4084        let (result, src, cpu_dst) = convert_img(
4085            &mut cpu_converter,
4086            src,
4087            cpu_dst,
4088            Rotation::None,
4089            Flip::None,
4090            crop,
4091        );
4092        result.unwrap();
4093
4094        let g2d_dst =
4095            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4096        let mut g2d_converter = G2DProcessor::new().unwrap();
4097        let (result, _src, g2d_dst) = convert_img(
4098            &mut g2d_converter,
4099            src,
4100            g2d_dst,
4101            Rotation::None,
4102            Flip::None,
4103            crop,
4104        );
4105        result.unwrap();
4106
4107        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
4108    }
4109
4110    #[test]
4111    #[cfg(target_os = "linux")]
4112    fn test_g2d_all_rgba() {
4113        if !is_g2d_available() {
4114            eprintln!("SKIPPED: test_g2d_all_rgba - G2D library (libg2d.so.2) not available");
4115            return;
4116        }
4117        if !is_dma_available() {
4118            eprintln!(
4119                "SKIPPED: test_g2d_all_rgba - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4120            );
4121            return;
4122        }
4123
4124        let dst_width = 640;
4125        let dst_height = 640;
4126        let file = include_bytes!(concat!(
4127            env!("CARGO_MANIFEST_DIR"),
4128            "/../../testdata/zidane.jpg"
4129        ))
4130        .to_vec();
4131        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
4132        let src_dyn = src;
4133
4134        let mut cpu_dst =
4135            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4136        let mut cpu_converter = CPUProcessor::new();
4137        let mut g2d_dst =
4138            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4139        let mut g2d_converter = G2DProcessor::new().unwrap();
4140
4141        let crop = Crop {
4142            src_rect: Some(Rect::new(50, 120, 1024, 576)),
4143            dst_rect: Some(Rect::new(100, 100, 512, 288)),
4144            dst_color: None,
4145        };
4146
4147        for rot in [
4148            Rotation::None,
4149            Rotation::Clockwise90,
4150            Rotation::Rotate180,
4151            Rotation::CounterClockwise90,
4152        ] {
4153            cpu_dst
4154                .as_u8()
4155                .unwrap()
4156                .map()
4157                .unwrap()
4158                .as_mut_slice()
4159                .fill(114);
4160            g2d_dst
4161                .as_u8()
4162                .unwrap()
4163                .map()
4164                .unwrap()
4165                .as_mut_slice()
4166                .fill(114);
4167            for flip in [Flip::None, Flip::Horizontal, Flip::Vertical] {
4168                let mut cpu_dst_dyn = cpu_dst;
4169                cpu_converter
4170                    .convert(&src_dyn, &mut cpu_dst_dyn, Rotation::None, Flip::None, crop)
4171                    .unwrap();
4172                cpu_dst = {
4173                    let mut __t = cpu_dst_dyn.into_u8().unwrap();
4174                    __t.set_format(PixelFormat::Rgba).unwrap();
4175                    TensorDyn::from(__t)
4176                };
4177
4178                let mut g2d_dst_dyn = g2d_dst;
4179                g2d_converter
4180                    .convert(&src_dyn, &mut g2d_dst_dyn, Rotation::None, Flip::None, crop)
4181                    .unwrap();
4182                g2d_dst = {
4183                    let mut __t = g2d_dst_dyn.into_u8().unwrap();
4184                    __t.set_format(PixelFormat::Rgba).unwrap();
4185                    TensorDyn::from(__t)
4186                };
4187
4188                compare_images(
4189                    &g2d_dst,
4190                    &cpu_dst,
4191                    0.98,
4192                    &format!("{} {:?} {:?}", function!(), rot, flip),
4193                );
4194            }
4195        }
4196    }
4197
4198    #[test]
4199    #[cfg(target_os = "linux")]
4200    #[cfg(feature = "opengl")]
4201    fn test_opengl_src_crop() {
4202        if !is_opengl_available() {
4203            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4204            return;
4205        }
4206
4207        let dst_width = 640;
4208        let dst_height = 360;
4209        let file = include_bytes!(concat!(
4210            env!("CARGO_MANIFEST_DIR"),
4211            "/../../testdata/zidane.jpg"
4212        ))
4213        .to_vec();
4214        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
4215        let crop = Crop {
4216            src_rect: Some(Rect {
4217                left: 320,
4218                top: 180,
4219                width: 1280 - 320,
4220                height: 720 - 180,
4221            }),
4222            dst_rect: None,
4223            dst_color: None,
4224        };
4225
4226        let cpu_dst =
4227            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4228        let mut cpu_converter = CPUProcessor::new();
4229        let (result, src, cpu_dst) = convert_img(
4230            &mut cpu_converter,
4231            src,
4232            cpu_dst,
4233            Rotation::None,
4234            Flip::None,
4235            crop,
4236        );
4237        result.unwrap();
4238
4239        let gl_dst =
4240            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4241        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4242        let (result, _src, gl_dst) = convert_img(
4243            &mut gl_converter,
4244            src,
4245            gl_dst,
4246            Rotation::None,
4247            Flip::None,
4248            crop,
4249        );
4250        result.unwrap();
4251
4252        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
4253    }
4254
4255    #[test]
4256    #[cfg(target_os = "linux")]
4257    #[cfg(feature = "opengl")]
4258    fn test_opengl_dst_crop() {
4259        if !is_opengl_available() {
4260            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4261            return;
4262        }
4263
4264        let dst_width = 640;
4265        let dst_height = 640;
4266        let file = include_bytes!(concat!(
4267            env!("CARGO_MANIFEST_DIR"),
4268            "/../../testdata/zidane.jpg"
4269        ))
4270        .to_vec();
4271        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
4272
4273        let cpu_dst =
4274            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4275        let mut cpu_converter = CPUProcessor::new();
4276        let crop = Crop {
4277            src_rect: None,
4278            dst_rect: Some(Rect::new(100, 100, 512, 288)),
4279            dst_color: None,
4280        };
4281        let (result, src, cpu_dst) = convert_img(
4282            &mut cpu_converter,
4283            src,
4284            cpu_dst,
4285            Rotation::None,
4286            Flip::None,
4287            crop,
4288        );
4289        result.unwrap();
4290
4291        let gl_dst =
4292            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4293        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4294        let (result, _src, gl_dst) = convert_img(
4295            &mut gl_converter,
4296            src,
4297            gl_dst,
4298            Rotation::None,
4299            Flip::None,
4300            crop,
4301        );
4302        result.unwrap();
4303
4304        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
4305    }
4306
4307    #[test]
4308    #[cfg(target_os = "linux")]
4309    #[cfg(feature = "opengl")]
4310    fn test_opengl_all_rgba() {
4311        if !is_opengl_available() {
4312            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4313            return;
4314        }
4315
4316        let dst_width = 640;
4317        let dst_height = 640;
4318        let file = include_bytes!(concat!(
4319            env!("CARGO_MANIFEST_DIR"),
4320            "/../../testdata/zidane.jpg"
4321        ))
4322        .to_vec();
4323
4324        let mut cpu_converter = CPUProcessor::new();
4325
4326        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4327
4328        let mut mem = vec![None, Some(TensorMemory::Mem), Some(TensorMemory::Shm)];
4329        if is_dma_available() {
4330            mem.push(Some(TensorMemory::Dma));
4331        }
4332        let crop = Crop {
4333            src_rect: Some(Rect::new(50, 120, 1024, 576)),
4334            dst_rect: Some(Rect::new(100, 100, 512, 288)),
4335            dst_color: None,
4336        };
4337        for m in mem {
4338            let src = crate::load_image(&file, Some(PixelFormat::Rgba), m).unwrap();
4339            let src_dyn = src;
4340
4341            for rot in [
4342                Rotation::None,
4343                Rotation::Clockwise90,
4344                Rotation::Rotate180,
4345                Rotation::CounterClockwise90,
4346            ] {
4347                for flip in [Flip::None, Flip::Horizontal, Flip::Vertical] {
4348                    let cpu_dst =
4349                        TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, m)
4350                            .unwrap();
4351                    let gl_dst =
4352                        TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, m)
4353                            .unwrap();
4354                    cpu_dst
4355                        .as_u8()
4356                        .unwrap()
4357                        .map()
4358                        .unwrap()
4359                        .as_mut_slice()
4360                        .fill(114);
4361                    gl_dst
4362                        .as_u8()
4363                        .unwrap()
4364                        .map()
4365                        .unwrap()
4366                        .as_mut_slice()
4367                        .fill(114);
4368
4369                    let mut cpu_dst_dyn = cpu_dst;
4370                    cpu_converter
4371                        .convert(&src_dyn, &mut cpu_dst_dyn, Rotation::None, Flip::None, crop)
4372                        .unwrap();
4373                    let cpu_dst = {
4374                        let mut __t = cpu_dst_dyn.into_u8().unwrap();
4375                        __t.set_format(PixelFormat::Rgba).unwrap();
4376                        TensorDyn::from(__t)
4377                    };
4378
4379                    let mut gl_dst_dyn = gl_dst;
4380                    gl_converter
4381                        .convert(&src_dyn, &mut gl_dst_dyn, Rotation::None, Flip::None, crop)
4382                        .map_err(|e| {
4383                            log::error!("error mem {m:?} rot {rot:?} error: {e:?}");
4384                            e
4385                        })
4386                        .unwrap();
4387                    let gl_dst = {
4388                        let mut __t = gl_dst_dyn.into_u8().unwrap();
4389                        __t.set_format(PixelFormat::Rgba).unwrap();
4390                        TensorDyn::from(__t)
4391                    };
4392
4393                    compare_images(
4394                        &gl_dst,
4395                        &cpu_dst,
4396                        0.98,
4397                        &format!("{} {:?} {:?}", function!(), rot, flip),
4398                    );
4399                }
4400            }
4401        }
4402    }
4403
4404    #[test]
4405    #[cfg(target_os = "linux")]
4406    fn test_cpu_rotate() {
4407        for rot in [
4408            Rotation::Clockwise90,
4409            Rotation::Rotate180,
4410            Rotation::CounterClockwise90,
4411        ] {
4412            test_cpu_rotate_(rot);
4413        }
4414    }
4415
4416    #[cfg(target_os = "linux")]
4417    fn test_cpu_rotate_(rot: Rotation) {
4418        // This test rotates the image 4 times and checks that the image was returned to
4419        // be the same Currently doesn't check if rotations actually rotated in
4420        // right direction
4421        let file = include_bytes!(concat!(
4422            env!("CARGO_MANIFEST_DIR"),
4423            "/../../testdata/zidane.jpg"
4424        ))
4425        .to_vec();
4426
4427        let unchanged_src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
4428        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
4429
4430        let (dst_width, dst_height) = match rot {
4431            Rotation::None | Rotation::Rotate180 => (src.width().unwrap(), src.height().unwrap()),
4432            Rotation::Clockwise90 | Rotation::CounterClockwise90 => {
4433                (src.height().unwrap(), src.width().unwrap())
4434            }
4435        };
4436
4437        let cpu_dst =
4438            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4439        let mut cpu_converter = CPUProcessor::new();
4440
4441        // After rotating 4 times, the image should be the same as the original
4442
4443        let (result, src, cpu_dst) = convert_img(
4444            &mut cpu_converter,
4445            src,
4446            cpu_dst,
4447            rot,
4448            Flip::None,
4449            Crop::no_crop(),
4450        );
4451        result.unwrap();
4452
4453        let (result, cpu_dst, src) = convert_img(
4454            &mut cpu_converter,
4455            cpu_dst,
4456            src,
4457            rot,
4458            Flip::None,
4459            Crop::no_crop(),
4460        );
4461        result.unwrap();
4462
4463        let (result, src, cpu_dst) = convert_img(
4464            &mut cpu_converter,
4465            src,
4466            cpu_dst,
4467            rot,
4468            Flip::None,
4469            Crop::no_crop(),
4470        );
4471        result.unwrap();
4472
4473        let (result, _cpu_dst, src) = convert_img(
4474            &mut cpu_converter,
4475            cpu_dst,
4476            src,
4477            rot,
4478            Flip::None,
4479            Crop::no_crop(),
4480        );
4481        result.unwrap();
4482
4483        compare_images(&src, &unchanged_src, 0.98, function!());
4484    }
4485
4486    #[test]
4487    #[cfg(target_os = "linux")]
4488    #[cfg(feature = "opengl")]
4489    fn test_opengl_rotate() {
4490        if !is_opengl_available() {
4491            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4492            return;
4493        }
4494
4495        let size = (1280, 720);
4496        let mut mem = vec![None, Some(TensorMemory::Shm), Some(TensorMemory::Mem)];
4497
4498        if is_dma_available() {
4499            mem.push(Some(TensorMemory::Dma));
4500        }
4501        for m in mem {
4502            for rot in [
4503                Rotation::Clockwise90,
4504                Rotation::Rotate180,
4505                Rotation::CounterClockwise90,
4506            ] {
4507                test_opengl_rotate_(size, rot, m);
4508            }
4509        }
4510    }
4511
4512    #[cfg(target_os = "linux")]
4513    #[cfg(feature = "opengl")]
4514    fn test_opengl_rotate_(
4515        size: (usize, usize),
4516        rot: Rotation,
4517        tensor_memory: Option<TensorMemory>,
4518    ) {
4519        let (dst_width, dst_height) = match rot {
4520            Rotation::None | Rotation::Rotate180 => size,
4521            Rotation::Clockwise90 | Rotation::CounterClockwise90 => (size.1, size.0),
4522        };
4523
4524        let file = include_bytes!(concat!(
4525            env!("CARGO_MANIFEST_DIR"),
4526            "/../../testdata/zidane.jpg"
4527        ))
4528        .to_vec();
4529        let src = crate::load_image(&file, Some(PixelFormat::Rgba), tensor_memory).unwrap();
4530
4531        let cpu_dst =
4532            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4533        let mut cpu_converter = CPUProcessor::new();
4534
4535        let (result, mut src, cpu_dst) = convert_img(
4536            &mut cpu_converter,
4537            src,
4538            cpu_dst,
4539            rot,
4540            Flip::None,
4541            Crop::no_crop(),
4542        );
4543        result.unwrap();
4544
4545        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4546
4547        for _ in 0..5 {
4548            let gl_dst = TensorDyn::image(
4549                dst_width,
4550                dst_height,
4551                PixelFormat::Rgba,
4552                DType::U8,
4553                tensor_memory,
4554            )
4555            .unwrap();
4556            let (result, src_back, gl_dst) = convert_img(
4557                &mut gl_converter,
4558                src,
4559                gl_dst,
4560                rot,
4561                Flip::None,
4562                Crop::no_crop(),
4563            );
4564            result.unwrap();
4565            src = src_back;
4566            compare_images(&gl_dst, &cpu_dst, 0.98, function!());
4567        }
4568    }
4569
4570    #[test]
4571    #[cfg(target_os = "linux")]
4572    fn test_g2d_rotate() {
4573        if !is_g2d_available() {
4574            eprintln!("SKIPPED: test_g2d_rotate - G2D library (libg2d.so.2) not available");
4575            return;
4576        }
4577        if !is_dma_available() {
4578            eprintln!(
4579                "SKIPPED: test_g2d_rotate - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4580            );
4581            return;
4582        }
4583
4584        let size = (1280, 720);
4585        for rot in [
4586            Rotation::Clockwise90,
4587            Rotation::Rotate180,
4588            Rotation::CounterClockwise90,
4589        ] {
4590            test_g2d_rotate_(size, rot);
4591        }
4592    }
4593
4594    #[cfg(target_os = "linux")]
4595    fn test_g2d_rotate_(size: (usize, usize), rot: Rotation) {
4596        let (dst_width, dst_height) = match rot {
4597            Rotation::None | Rotation::Rotate180 => size,
4598            Rotation::Clockwise90 | Rotation::CounterClockwise90 => (size.1, size.0),
4599        };
4600
4601        let file = include_bytes!(concat!(
4602            env!("CARGO_MANIFEST_DIR"),
4603            "/../../testdata/zidane.jpg"
4604        ))
4605        .to_vec();
4606        let src =
4607            crate::load_image(&file, Some(PixelFormat::Rgba), Some(TensorMemory::Dma)).unwrap();
4608
4609        let cpu_dst =
4610            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4611        let mut cpu_converter = CPUProcessor::new();
4612
4613        let (result, src, cpu_dst) = convert_img(
4614            &mut cpu_converter,
4615            src,
4616            cpu_dst,
4617            rot,
4618            Flip::None,
4619            Crop::no_crop(),
4620        );
4621        result.unwrap();
4622
4623        let g2d_dst = TensorDyn::image(
4624            dst_width,
4625            dst_height,
4626            PixelFormat::Rgba,
4627            DType::U8,
4628            Some(TensorMemory::Dma),
4629        )
4630        .unwrap();
4631        let mut g2d_converter = G2DProcessor::new().unwrap();
4632
4633        let (result, _src, g2d_dst) = convert_img(
4634            &mut g2d_converter,
4635            src,
4636            g2d_dst,
4637            rot,
4638            Flip::None,
4639            Crop::no_crop(),
4640        );
4641        result.unwrap();
4642
4643        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
4644    }
4645
4646    #[test]
4647    fn test_rgba_to_yuyv_resize_cpu() {
4648        let src = load_bytes_to_tensor(
4649            1280,
4650            720,
4651            PixelFormat::Rgba,
4652            None,
4653            include_bytes!(concat!(
4654                env!("CARGO_MANIFEST_DIR"),
4655                "/../../testdata/camera720p.rgba"
4656            )),
4657        )
4658        .unwrap();
4659
4660        let (dst_width, dst_height) = (640, 360);
4661
4662        let dst =
4663            TensorDyn::image(dst_width, dst_height, PixelFormat::Yuyv, DType::U8, None).unwrap();
4664
4665        let dst_through_yuyv =
4666            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4667        let dst_direct =
4668            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4669
4670        let mut cpu_converter = CPUProcessor::new();
4671
4672        let (result, src, dst) = convert_img(
4673            &mut cpu_converter,
4674            src,
4675            dst,
4676            Rotation::None,
4677            Flip::None,
4678            Crop::no_crop(),
4679        );
4680        result.unwrap();
4681
4682        let (result, _dst, dst_through_yuyv) = convert_img(
4683            &mut cpu_converter,
4684            dst,
4685            dst_through_yuyv,
4686            Rotation::None,
4687            Flip::None,
4688            Crop::no_crop(),
4689        );
4690        result.unwrap();
4691
4692        let (result, _src, dst_direct) = convert_img(
4693            &mut cpu_converter,
4694            src,
4695            dst_direct,
4696            Rotation::None,
4697            Flip::None,
4698            Crop::no_crop(),
4699        );
4700        result.unwrap();
4701
4702        compare_images(&dst_through_yuyv, &dst_direct, 0.98, function!());
4703    }
4704
4705    #[test]
4706    #[cfg(target_os = "linux")]
4707    #[cfg(feature = "opengl")]
4708    #[ignore = "opengl doesn't support rendering to PixelFormat::Yuyv texture"]
4709    fn test_rgba_to_yuyv_resize_opengl() {
4710        if !is_opengl_available() {
4711            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4712            return;
4713        }
4714
4715        if !is_dma_available() {
4716            eprintln!(
4717                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
4718                function!()
4719            );
4720            return;
4721        }
4722
4723        let src = load_bytes_to_tensor(
4724            1280,
4725            720,
4726            PixelFormat::Rgba,
4727            None,
4728            include_bytes!(concat!(
4729                env!("CARGO_MANIFEST_DIR"),
4730                "/../../testdata/camera720p.rgba"
4731            )),
4732        )
4733        .unwrap();
4734
4735        let (dst_width, dst_height) = (640, 360);
4736
4737        let dst = TensorDyn::image(
4738            dst_width,
4739            dst_height,
4740            PixelFormat::Yuyv,
4741            DType::U8,
4742            Some(TensorMemory::Dma),
4743        )
4744        .unwrap();
4745
4746        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4747
4748        let (result, src, dst) = convert_img(
4749            &mut gl_converter,
4750            src,
4751            dst,
4752            Rotation::None,
4753            Flip::None,
4754            Crop::new()
4755                .with_dst_rect(Some(Rect::new(100, 100, 100, 100)))
4756                .with_dst_color(Some([255, 255, 255, 255])),
4757        );
4758        result.unwrap();
4759
4760        std::fs::write(
4761            "rgba_to_yuyv_opengl.yuyv",
4762            dst.as_u8().unwrap().map().unwrap().as_slice(),
4763        )
4764        .unwrap();
4765        let cpu_dst = TensorDyn::image(
4766            dst_width,
4767            dst_height,
4768            PixelFormat::Yuyv,
4769            DType::U8,
4770            Some(TensorMemory::Dma),
4771        )
4772        .unwrap();
4773        let (result, _src, cpu_dst) = convert_img(
4774            &mut CPUProcessor::new(),
4775            src,
4776            cpu_dst,
4777            Rotation::None,
4778            Flip::None,
4779            Crop::no_crop(),
4780        );
4781        result.unwrap();
4782
4783        compare_images_convert_to_rgb(&dst, &cpu_dst, 0.98, function!());
4784    }
4785
4786    #[test]
4787    #[cfg(target_os = "linux")]
4788    fn test_rgba_to_yuyv_resize_g2d() {
4789        if !is_g2d_available() {
4790            eprintln!(
4791                "SKIPPED: test_rgba_to_yuyv_resize_g2d - G2D library (libg2d.so.2) not available"
4792            );
4793            return;
4794        }
4795        if !is_dma_available() {
4796            eprintln!(
4797                "SKIPPED: test_rgba_to_yuyv_resize_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4798            );
4799            return;
4800        }
4801
4802        let src = load_bytes_to_tensor(
4803            1280,
4804            720,
4805            PixelFormat::Rgba,
4806            Some(TensorMemory::Dma),
4807            include_bytes!(concat!(
4808                env!("CARGO_MANIFEST_DIR"),
4809                "/../../testdata/camera720p.rgba"
4810            )),
4811        )
4812        .unwrap();
4813
4814        let (dst_width, dst_height) = (1280, 720);
4815
4816        let cpu_dst = TensorDyn::image(
4817            dst_width,
4818            dst_height,
4819            PixelFormat::Yuyv,
4820            DType::U8,
4821            Some(TensorMemory::Dma),
4822        )
4823        .unwrap();
4824
4825        let g2d_dst = TensorDyn::image(
4826            dst_width,
4827            dst_height,
4828            PixelFormat::Yuyv,
4829            DType::U8,
4830            Some(TensorMemory::Dma),
4831        )
4832        .unwrap();
4833
4834        let mut g2d_converter = G2DProcessor::new().unwrap();
4835        let crop = Crop {
4836            src_rect: None,
4837            dst_rect: Some(Rect::new(100, 100, 2, 2)),
4838            dst_color: None,
4839        };
4840
4841        g2d_dst
4842            .as_u8()
4843            .unwrap()
4844            .map()
4845            .unwrap()
4846            .as_mut_slice()
4847            .fill(128);
4848        let (result, src, g2d_dst) = convert_img(
4849            &mut g2d_converter,
4850            src,
4851            g2d_dst,
4852            Rotation::None,
4853            Flip::None,
4854            crop,
4855        );
4856        result.unwrap();
4857
4858        let cpu_dst_img = cpu_dst;
4859        cpu_dst_img
4860            .as_u8()
4861            .unwrap()
4862            .map()
4863            .unwrap()
4864            .as_mut_slice()
4865            .fill(128);
4866        let (result, _src, cpu_dst) = convert_img(
4867            &mut CPUProcessor::new(),
4868            src,
4869            cpu_dst_img,
4870            Rotation::None,
4871            Flip::None,
4872            crop,
4873        );
4874        result.unwrap();
4875
4876        compare_images_convert_to_rgb(&cpu_dst, &g2d_dst, 0.98, function!());
4877    }
4878
4879    #[test]
4880    fn test_yuyv_to_rgba_cpu() {
4881        let file = include_bytes!(concat!(
4882            env!("CARGO_MANIFEST_DIR"),
4883            "/../../testdata/camera720p.yuyv"
4884        ))
4885        .to_vec();
4886        let src = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
4887        src.as_u8()
4888            .unwrap()
4889            .map()
4890            .unwrap()
4891            .as_mut_slice()
4892            .copy_from_slice(&file);
4893
4894        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4895        let mut cpu_converter = CPUProcessor::new();
4896
4897        let (result, _src, dst) = convert_img(
4898            &mut cpu_converter,
4899            src,
4900            dst,
4901            Rotation::None,
4902            Flip::None,
4903            Crop::no_crop(),
4904        );
4905        result.unwrap();
4906
4907        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4908        target_image
4909            .as_u8()
4910            .unwrap()
4911            .map()
4912            .unwrap()
4913            .as_mut_slice()
4914            .copy_from_slice(include_bytes!(concat!(
4915                env!("CARGO_MANIFEST_DIR"),
4916                "/../../testdata/camera720p.rgba"
4917            )));
4918
4919        compare_images(&dst, &target_image, 0.98, function!());
4920    }
4921
4922    #[test]
4923    fn test_yuyv_to_rgb_cpu() {
4924        let file = include_bytes!(concat!(
4925            env!("CARGO_MANIFEST_DIR"),
4926            "/../../testdata/camera720p.yuyv"
4927        ))
4928        .to_vec();
4929        let src = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
4930        src.as_u8()
4931            .unwrap()
4932            .map()
4933            .unwrap()
4934            .as_mut_slice()
4935            .copy_from_slice(&file);
4936
4937        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4938        let mut cpu_converter = CPUProcessor::new();
4939
4940        let (result, _src, dst) = convert_img(
4941            &mut cpu_converter,
4942            src,
4943            dst,
4944            Rotation::None,
4945            Flip::None,
4946            Crop::no_crop(),
4947        );
4948        result.unwrap();
4949
4950        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4951        target_image
4952            .as_u8()
4953            .unwrap()
4954            .map()
4955            .unwrap()
4956            .as_mut_slice()
4957            .as_chunks_mut::<3>()
4958            .0
4959            .iter_mut()
4960            .zip(
4961                include_bytes!(concat!(
4962                    env!("CARGO_MANIFEST_DIR"),
4963                    "/../../testdata/camera720p.rgba"
4964                ))
4965                .as_chunks::<4>()
4966                .0,
4967            )
4968            .for_each(|(dst, src)| *dst = [src[0], src[1], src[2]]);
4969
4970        compare_images(&dst, &target_image, 0.98, function!());
4971    }
4972
4973    #[test]
4974    #[cfg(target_os = "linux")]
4975    fn test_yuyv_to_rgba_g2d() {
4976        if !is_g2d_available() {
4977            eprintln!("SKIPPED: test_yuyv_to_rgba_g2d - G2D library (libg2d.so.2) not available");
4978            return;
4979        }
4980        if !is_dma_available() {
4981            eprintln!(
4982                "SKIPPED: test_yuyv_to_rgba_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4983            );
4984            return;
4985        }
4986
4987        let src = load_bytes_to_tensor(
4988            1280,
4989            720,
4990            PixelFormat::Yuyv,
4991            None,
4992            include_bytes!(concat!(
4993                env!("CARGO_MANIFEST_DIR"),
4994                "/../../testdata/camera720p.yuyv"
4995            )),
4996        )
4997        .unwrap();
4998
4999        let dst = TensorDyn::image(
5000            1280,
5001            720,
5002            PixelFormat::Rgba,
5003            DType::U8,
5004            Some(TensorMemory::Dma),
5005        )
5006        .unwrap();
5007        let mut g2d_converter = G2DProcessor::new().unwrap();
5008
5009        let (result, _src, dst) = convert_img(
5010            &mut g2d_converter,
5011            src,
5012            dst,
5013            Rotation::None,
5014            Flip::None,
5015            Crop::no_crop(),
5016        );
5017        result.unwrap();
5018
5019        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5020        target_image
5021            .as_u8()
5022            .unwrap()
5023            .map()
5024            .unwrap()
5025            .as_mut_slice()
5026            .copy_from_slice(include_bytes!(concat!(
5027                env!("CARGO_MANIFEST_DIR"),
5028                "/../../testdata/camera720p.rgba"
5029            )));
5030
5031        compare_images(&dst, &target_image, 0.98, function!());
5032    }
5033
5034    #[test]
5035    #[cfg(target_os = "linux")]
5036    #[cfg(feature = "opengl")]
5037    fn test_yuyv_to_rgba_opengl() {
5038        if !is_opengl_available() {
5039            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5040            return;
5041        }
5042        if !is_dma_available() {
5043            eprintln!(
5044                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
5045                function!()
5046            );
5047            return;
5048        }
5049
5050        let src = load_bytes_to_tensor(
5051            1280,
5052            720,
5053            PixelFormat::Yuyv,
5054            Some(TensorMemory::Dma),
5055            include_bytes!(concat!(
5056                env!("CARGO_MANIFEST_DIR"),
5057                "/../../testdata/camera720p.yuyv"
5058            )),
5059        )
5060        .unwrap();
5061
5062        let dst = TensorDyn::image(
5063            1280,
5064            720,
5065            PixelFormat::Rgba,
5066            DType::U8,
5067            Some(TensorMemory::Dma),
5068        )
5069        .unwrap();
5070        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
5071
5072        let (result, _src, dst) = convert_img(
5073            &mut gl_converter,
5074            src,
5075            dst,
5076            Rotation::None,
5077            Flip::None,
5078            Crop::no_crop(),
5079        );
5080        result.unwrap();
5081
5082        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5083        target_image
5084            .as_u8()
5085            .unwrap()
5086            .map()
5087            .unwrap()
5088            .as_mut_slice()
5089            .copy_from_slice(include_bytes!(concat!(
5090                env!("CARGO_MANIFEST_DIR"),
5091                "/../../testdata/camera720p.rgba"
5092            )));
5093
5094        compare_images(&dst, &target_image, 0.98, function!());
5095    }
5096
5097    #[test]
5098    #[cfg(target_os = "linux")]
5099    fn test_yuyv_to_rgb_g2d() {
5100        if !is_g2d_available() {
5101            eprintln!("SKIPPED: test_yuyv_to_rgb_g2d - G2D library (libg2d.so.2) not available");
5102            return;
5103        }
5104        if !is_dma_available() {
5105            eprintln!(
5106                "SKIPPED: test_yuyv_to_rgb_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
5107            );
5108            return;
5109        }
5110
5111        let src = load_bytes_to_tensor(
5112            1280,
5113            720,
5114            PixelFormat::Yuyv,
5115            None,
5116            include_bytes!(concat!(
5117                env!("CARGO_MANIFEST_DIR"),
5118                "/../../testdata/camera720p.yuyv"
5119            )),
5120        )
5121        .unwrap();
5122
5123        let g2d_dst = TensorDyn::image(
5124            1280,
5125            720,
5126            PixelFormat::Rgb,
5127            DType::U8,
5128            Some(TensorMemory::Dma),
5129        )
5130        .unwrap();
5131        let mut g2d_converter = G2DProcessor::new().unwrap();
5132
5133        let (result, src, g2d_dst) = convert_img(
5134            &mut g2d_converter,
5135            src,
5136            g2d_dst,
5137            Rotation::None,
5138            Flip::None,
5139            Crop::no_crop(),
5140        );
5141        result.unwrap();
5142
5143        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5144        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
5145
5146        let (result, _src, cpu_dst) = convert_img(
5147            &mut cpu_converter,
5148            src,
5149            cpu_dst,
5150            Rotation::None,
5151            Flip::None,
5152            Crop::no_crop(),
5153        );
5154        result.unwrap();
5155
5156        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
5157    }
5158
5159    #[test]
5160    #[cfg(target_os = "linux")]
5161    fn test_yuyv_to_yuyv_resize_g2d() {
5162        if !is_g2d_available() {
5163            eprintln!(
5164                "SKIPPED: test_yuyv_to_yuyv_resize_g2d - G2D library (libg2d.so.2) not available"
5165            );
5166            return;
5167        }
5168        if !is_dma_available() {
5169            eprintln!(
5170                "SKIPPED: test_yuyv_to_yuyv_resize_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
5171            );
5172            return;
5173        }
5174
5175        let src = load_bytes_to_tensor(
5176            1280,
5177            720,
5178            PixelFormat::Yuyv,
5179            None,
5180            include_bytes!(concat!(
5181                env!("CARGO_MANIFEST_DIR"),
5182                "/../../testdata/camera720p.yuyv"
5183            )),
5184        )
5185        .unwrap();
5186
5187        let g2d_dst = TensorDyn::image(
5188            600,
5189            400,
5190            PixelFormat::Yuyv,
5191            DType::U8,
5192            Some(TensorMemory::Dma),
5193        )
5194        .unwrap();
5195        let mut g2d_converter = G2DProcessor::new().unwrap();
5196
5197        let (result, src, g2d_dst) = convert_img(
5198            &mut g2d_converter,
5199            src,
5200            g2d_dst,
5201            Rotation::None,
5202            Flip::None,
5203            Crop::no_crop(),
5204        );
5205        result.unwrap();
5206
5207        let cpu_dst = TensorDyn::image(600, 400, PixelFormat::Yuyv, DType::U8, None).unwrap();
5208        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
5209
5210        let (result, _src, cpu_dst) = convert_img(
5211            &mut cpu_converter,
5212            src,
5213            cpu_dst,
5214            Rotation::None,
5215            Flip::None,
5216            Crop::no_crop(),
5217        );
5218        result.unwrap();
5219
5220        // TODO: compare PixelFormat::Yuyv and PixelFormat::Yuyv images without having to convert them to PixelFormat::Rgb
5221        compare_images_convert_to_rgb(&g2d_dst, &cpu_dst, 0.98, function!());
5222    }
5223
5224    #[test]
5225    fn test_yuyv_to_rgba_resize_cpu() {
5226        let src = load_bytes_to_tensor(
5227            1280,
5228            720,
5229            PixelFormat::Yuyv,
5230            None,
5231            include_bytes!(concat!(
5232                env!("CARGO_MANIFEST_DIR"),
5233                "/../../testdata/camera720p.yuyv"
5234            )),
5235        )
5236        .unwrap();
5237
5238        let (dst_width, dst_height) = (960, 540);
5239
5240        let dst =
5241            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
5242        let mut cpu_converter = CPUProcessor::new();
5243
5244        let (result, _src, dst) = convert_img(
5245            &mut cpu_converter,
5246            src,
5247            dst,
5248            Rotation::None,
5249            Flip::None,
5250            Crop::no_crop(),
5251        );
5252        result.unwrap();
5253
5254        let dst_target =
5255            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
5256        let src_target = load_bytes_to_tensor(
5257            1280,
5258            720,
5259            PixelFormat::Rgba,
5260            None,
5261            include_bytes!(concat!(
5262                env!("CARGO_MANIFEST_DIR"),
5263                "/../../testdata/camera720p.rgba"
5264            )),
5265        )
5266        .unwrap();
5267        let (result, _src_target, dst_target) = convert_img(
5268            &mut cpu_converter,
5269            src_target,
5270            dst_target,
5271            Rotation::None,
5272            Flip::None,
5273            Crop::no_crop(),
5274        );
5275        result.unwrap();
5276
5277        compare_images(&dst, &dst_target, 0.98, function!());
5278    }
5279
5280    #[test]
5281    #[cfg(target_os = "linux")]
5282    fn test_yuyv_to_rgba_crop_flip_g2d() {
5283        if !is_g2d_available() {
5284            eprintln!(
5285                "SKIPPED: test_yuyv_to_rgba_crop_flip_g2d - G2D library (libg2d.so.2) not available"
5286            );
5287            return;
5288        }
5289        if !is_dma_available() {
5290            eprintln!(
5291                "SKIPPED: test_yuyv_to_rgba_crop_flip_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
5292            );
5293            return;
5294        }
5295
5296        let src = load_bytes_to_tensor(
5297            1280,
5298            720,
5299            PixelFormat::Yuyv,
5300            Some(TensorMemory::Dma),
5301            include_bytes!(concat!(
5302                env!("CARGO_MANIFEST_DIR"),
5303                "/../../testdata/camera720p.yuyv"
5304            )),
5305        )
5306        .unwrap();
5307
5308        let (dst_width, dst_height) = (640, 640);
5309
5310        let dst_g2d = TensorDyn::image(
5311            dst_width,
5312            dst_height,
5313            PixelFormat::Rgba,
5314            DType::U8,
5315            Some(TensorMemory::Dma),
5316        )
5317        .unwrap();
5318        let mut g2d_converter = G2DProcessor::new().unwrap();
5319        let crop = Crop {
5320            src_rect: Some(Rect {
5321                left: 20,
5322                top: 15,
5323                width: 400,
5324                height: 300,
5325            }),
5326            dst_rect: None,
5327            dst_color: None,
5328        };
5329
5330        let (result, src, dst_g2d) = convert_img(
5331            &mut g2d_converter,
5332            src,
5333            dst_g2d,
5334            Rotation::None,
5335            Flip::Horizontal,
5336            crop,
5337        );
5338        result.unwrap();
5339
5340        let dst_cpu = TensorDyn::image(
5341            dst_width,
5342            dst_height,
5343            PixelFormat::Rgba,
5344            DType::U8,
5345            Some(TensorMemory::Dma),
5346        )
5347        .unwrap();
5348        let mut cpu_converter = CPUProcessor::new();
5349
5350        let (result, _src, dst_cpu) = convert_img(
5351            &mut cpu_converter,
5352            src,
5353            dst_cpu,
5354            Rotation::None,
5355            Flip::Horizontal,
5356            crop,
5357        );
5358        result.unwrap();
5359        compare_images(&dst_g2d, &dst_cpu, 0.98, function!());
5360    }
5361
5362    #[test]
5363    #[cfg(target_os = "linux")]
5364    #[cfg(feature = "opengl")]
5365    fn test_yuyv_to_rgba_crop_flip_opengl() {
5366        if !is_opengl_available() {
5367            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5368            return;
5369        }
5370
5371        if !is_dma_available() {
5372            eprintln!(
5373                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
5374                function!()
5375            );
5376            return;
5377        }
5378
5379        let src = load_bytes_to_tensor(
5380            1280,
5381            720,
5382            PixelFormat::Yuyv,
5383            Some(TensorMemory::Dma),
5384            include_bytes!(concat!(
5385                env!("CARGO_MANIFEST_DIR"),
5386                "/../../testdata/camera720p.yuyv"
5387            )),
5388        )
5389        .unwrap();
5390
5391        let (dst_width, dst_height) = (640, 640);
5392
5393        let dst_gl = TensorDyn::image(
5394            dst_width,
5395            dst_height,
5396            PixelFormat::Rgba,
5397            DType::U8,
5398            Some(TensorMemory::Dma),
5399        )
5400        .unwrap();
5401        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
5402        let crop = Crop {
5403            src_rect: Some(Rect {
5404                left: 20,
5405                top: 15,
5406                width: 400,
5407                height: 300,
5408            }),
5409            dst_rect: None,
5410            dst_color: None,
5411        };
5412
5413        let (result, src, dst_gl) = convert_img(
5414            &mut gl_converter,
5415            src,
5416            dst_gl,
5417            Rotation::None,
5418            Flip::Horizontal,
5419            crop,
5420        );
5421        result.unwrap();
5422
5423        let dst_cpu = TensorDyn::image(
5424            dst_width,
5425            dst_height,
5426            PixelFormat::Rgba,
5427            DType::U8,
5428            Some(TensorMemory::Dma),
5429        )
5430        .unwrap();
5431        let mut cpu_converter = CPUProcessor::new();
5432
5433        let (result, _src, dst_cpu) = convert_img(
5434            &mut cpu_converter,
5435            src,
5436            dst_cpu,
5437            Rotation::None,
5438            Flip::Horizontal,
5439            crop,
5440        );
5441        result.unwrap();
5442        compare_images(&dst_gl, &dst_cpu, 0.98, function!());
5443    }
5444
5445    #[test]
5446    fn test_vyuy_to_rgba_cpu() {
5447        let file = include_bytes!(concat!(
5448            env!("CARGO_MANIFEST_DIR"),
5449            "/../../testdata/camera720p.vyuy"
5450        ))
5451        .to_vec();
5452        let src = TensorDyn::image(1280, 720, PixelFormat::Vyuy, DType::U8, None).unwrap();
5453        src.as_u8()
5454            .unwrap()
5455            .map()
5456            .unwrap()
5457            .as_mut_slice()
5458            .copy_from_slice(&file);
5459
5460        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5461        let mut cpu_converter = CPUProcessor::new();
5462
5463        let (result, _src, dst) = convert_img(
5464            &mut cpu_converter,
5465            src,
5466            dst,
5467            Rotation::None,
5468            Flip::None,
5469            Crop::no_crop(),
5470        );
5471        result.unwrap();
5472
5473        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5474        target_image
5475            .as_u8()
5476            .unwrap()
5477            .map()
5478            .unwrap()
5479            .as_mut_slice()
5480            .copy_from_slice(include_bytes!(concat!(
5481                env!("CARGO_MANIFEST_DIR"),
5482                "/../../testdata/camera720p.rgba"
5483            )));
5484
5485        compare_images(&dst, &target_image, 0.98, function!());
5486    }
5487
5488    #[test]
5489    fn test_vyuy_to_rgb_cpu() {
5490        let file = include_bytes!(concat!(
5491            env!("CARGO_MANIFEST_DIR"),
5492            "/../../testdata/camera720p.vyuy"
5493        ))
5494        .to_vec();
5495        let src = TensorDyn::image(1280, 720, PixelFormat::Vyuy, DType::U8, None).unwrap();
5496        src.as_u8()
5497            .unwrap()
5498            .map()
5499            .unwrap()
5500            .as_mut_slice()
5501            .copy_from_slice(&file);
5502
5503        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5504        let mut cpu_converter = CPUProcessor::new();
5505
5506        let (result, _src, dst) = convert_img(
5507            &mut cpu_converter,
5508            src,
5509            dst,
5510            Rotation::None,
5511            Flip::None,
5512            Crop::no_crop(),
5513        );
5514        result.unwrap();
5515
5516        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5517        target_image
5518            .as_u8()
5519            .unwrap()
5520            .map()
5521            .unwrap()
5522            .as_mut_slice()
5523            .as_chunks_mut::<3>()
5524            .0
5525            .iter_mut()
5526            .zip(
5527                include_bytes!(concat!(
5528                    env!("CARGO_MANIFEST_DIR"),
5529                    "/../../testdata/camera720p.rgba"
5530                ))
5531                .as_chunks::<4>()
5532                .0,
5533            )
5534            .for_each(|(dst, src)| *dst = [src[0], src[1], src[2]]);
5535
5536        compare_images(&dst, &target_image, 0.98, function!());
5537    }
5538
5539    #[test]
5540    #[cfg(target_os = "linux")]
5541    #[ignore = "G2D does not support VYUY; re-enable when hardware support is added"]
5542    fn test_vyuy_to_rgba_g2d() {
5543        if !is_g2d_available() {
5544            eprintln!("SKIPPED: test_vyuy_to_rgba_g2d - G2D library (libg2d.so.2) not available");
5545            return;
5546        }
5547        if !is_dma_available() {
5548            eprintln!(
5549                "SKIPPED: test_vyuy_to_rgba_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
5550            );
5551            return;
5552        }
5553
5554        let src = load_bytes_to_tensor(
5555            1280,
5556            720,
5557            PixelFormat::Vyuy,
5558            None,
5559            include_bytes!(concat!(
5560                env!("CARGO_MANIFEST_DIR"),
5561                "/../../testdata/camera720p.vyuy"
5562            )),
5563        )
5564        .unwrap();
5565
5566        let dst = TensorDyn::image(
5567            1280,
5568            720,
5569            PixelFormat::Rgba,
5570            DType::U8,
5571            Some(TensorMemory::Dma),
5572        )
5573        .unwrap();
5574        let mut g2d_converter = G2DProcessor::new().unwrap();
5575
5576        let (result, _src, dst) = convert_img(
5577            &mut g2d_converter,
5578            src,
5579            dst,
5580            Rotation::None,
5581            Flip::None,
5582            Crop::no_crop(),
5583        );
5584        match result {
5585            Err(Error::G2D(_)) => {
5586                eprintln!("SKIPPED: test_vyuy_to_rgba_g2d - G2D does not support PixelFormat::Vyuy format");
5587                return;
5588            }
5589            r => r.unwrap(),
5590        }
5591
5592        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5593        target_image
5594            .as_u8()
5595            .unwrap()
5596            .map()
5597            .unwrap()
5598            .as_mut_slice()
5599            .copy_from_slice(include_bytes!(concat!(
5600                env!("CARGO_MANIFEST_DIR"),
5601                "/../../testdata/camera720p.rgba"
5602            )));
5603
5604        compare_images(&dst, &target_image, 0.98, function!());
5605    }
5606
5607    #[test]
5608    #[cfg(target_os = "linux")]
5609    #[ignore = "G2D does not support VYUY; re-enable when hardware support is added"]
5610    fn test_vyuy_to_rgb_g2d() {
5611        if !is_g2d_available() {
5612            eprintln!("SKIPPED: test_vyuy_to_rgb_g2d - G2D library (libg2d.so.2) not available");
5613            return;
5614        }
5615        if !is_dma_available() {
5616            eprintln!(
5617                "SKIPPED: test_vyuy_to_rgb_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
5618            );
5619            return;
5620        }
5621
5622        let src = load_bytes_to_tensor(
5623            1280,
5624            720,
5625            PixelFormat::Vyuy,
5626            None,
5627            include_bytes!(concat!(
5628                env!("CARGO_MANIFEST_DIR"),
5629                "/../../testdata/camera720p.vyuy"
5630            )),
5631        )
5632        .unwrap();
5633
5634        let g2d_dst = TensorDyn::image(
5635            1280,
5636            720,
5637            PixelFormat::Rgb,
5638            DType::U8,
5639            Some(TensorMemory::Dma),
5640        )
5641        .unwrap();
5642        let mut g2d_converter = G2DProcessor::new().unwrap();
5643
5644        let (result, src, g2d_dst) = convert_img(
5645            &mut g2d_converter,
5646            src,
5647            g2d_dst,
5648            Rotation::None,
5649            Flip::None,
5650            Crop::no_crop(),
5651        );
5652        match result {
5653            Err(Error::G2D(_)) => {
5654                eprintln!(
5655                    "SKIPPED: test_vyuy_to_rgb_g2d - G2D does not support PixelFormat::Vyuy format"
5656                );
5657                return;
5658            }
5659            r => r.unwrap(),
5660        }
5661
5662        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5663        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
5664
5665        let (result, _src, cpu_dst) = convert_img(
5666            &mut cpu_converter,
5667            src,
5668            cpu_dst,
5669            Rotation::None,
5670            Flip::None,
5671            Crop::no_crop(),
5672        );
5673        result.unwrap();
5674
5675        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
5676    }
5677
5678    #[test]
5679    #[cfg(target_os = "linux")]
5680    #[cfg(feature = "opengl")]
5681    fn test_vyuy_to_rgba_opengl() {
5682        if !is_opengl_available() {
5683            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5684            return;
5685        }
5686        if !is_dma_available() {
5687            eprintln!(
5688                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
5689                function!()
5690            );
5691            return;
5692        }
5693
5694        let src = load_bytes_to_tensor(
5695            1280,
5696            720,
5697            PixelFormat::Vyuy,
5698            Some(TensorMemory::Dma),
5699            include_bytes!(concat!(
5700                env!("CARGO_MANIFEST_DIR"),
5701                "/../../testdata/camera720p.vyuy"
5702            )),
5703        )
5704        .unwrap();
5705
5706        let dst = TensorDyn::image(
5707            1280,
5708            720,
5709            PixelFormat::Rgba,
5710            DType::U8,
5711            Some(TensorMemory::Dma),
5712        )
5713        .unwrap();
5714        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
5715
5716        let (result, _src, dst) = convert_img(
5717            &mut gl_converter,
5718            src,
5719            dst,
5720            Rotation::None,
5721            Flip::None,
5722            Crop::no_crop(),
5723        );
5724        match result {
5725            Err(Error::NotSupported(_)) => {
5726                eprintln!(
5727                    "SKIPPED: {} - OpenGL does not support PixelFormat::Vyuy DMA format",
5728                    function!()
5729                );
5730                return;
5731            }
5732            r => r.unwrap(),
5733        }
5734
5735        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5736        target_image
5737            .as_u8()
5738            .unwrap()
5739            .map()
5740            .unwrap()
5741            .as_mut_slice()
5742            .copy_from_slice(include_bytes!(concat!(
5743                env!("CARGO_MANIFEST_DIR"),
5744                "/../../testdata/camera720p.rgba"
5745            )));
5746
5747        compare_images(&dst, &target_image, 0.98, function!());
5748    }
5749
5750    #[test]
5751    fn test_nv12_to_rgba_cpu() {
5752        let file = include_bytes!(concat!(
5753            env!("CARGO_MANIFEST_DIR"),
5754            "/../../testdata/zidane.nv12"
5755        ))
5756        .to_vec();
5757        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5758        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5759            .copy_from_slice(&file);
5760
5761        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5762        let mut cpu_converter = CPUProcessor::new();
5763
5764        let (result, _src, dst) = convert_img(
5765            &mut cpu_converter,
5766            src,
5767            dst,
5768            Rotation::None,
5769            Flip::None,
5770            Crop::no_crop(),
5771        );
5772        result.unwrap();
5773
5774        let target_image = crate::load_image(
5775            include_bytes!(concat!(
5776                env!("CARGO_MANIFEST_DIR"),
5777                "/../../testdata/zidane.jpg"
5778            )),
5779            Some(PixelFormat::Rgba),
5780            None,
5781        )
5782        .unwrap();
5783
5784        compare_images(&dst, &target_image, 0.98, function!());
5785    }
5786
5787    #[test]
5788    fn test_nv12_to_rgb_cpu() {
5789        let file = include_bytes!(concat!(
5790            env!("CARGO_MANIFEST_DIR"),
5791            "/../../testdata/zidane.nv12"
5792        ))
5793        .to_vec();
5794        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5795        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5796            .copy_from_slice(&file);
5797
5798        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5799        let mut cpu_converter = CPUProcessor::new();
5800
5801        let (result, _src, dst) = convert_img(
5802            &mut cpu_converter,
5803            src,
5804            dst,
5805            Rotation::None,
5806            Flip::None,
5807            Crop::no_crop(),
5808        );
5809        result.unwrap();
5810
5811        let target_image = crate::load_image(
5812            include_bytes!(concat!(
5813                env!("CARGO_MANIFEST_DIR"),
5814                "/../../testdata/zidane.jpg"
5815            )),
5816            Some(PixelFormat::Rgb),
5817            None,
5818        )
5819        .unwrap();
5820
5821        compare_images(&dst, &target_image, 0.98, function!());
5822    }
5823
5824    #[test]
5825    fn test_nv12_to_grey_cpu() {
5826        let file = include_bytes!(concat!(
5827            env!("CARGO_MANIFEST_DIR"),
5828            "/../../testdata/zidane.nv12"
5829        ))
5830        .to_vec();
5831        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5832        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5833            .copy_from_slice(&file);
5834
5835        let dst = TensorDyn::image(1280, 720, PixelFormat::Grey, DType::U8, None).unwrap();
5836        let mut cpu_converter = CPUProcessor::new();
5837
5838        let (result, _src, dst) = convert_img(
5839            &mut cpu_converter,
5840            src,
5841            dst,
5842            Rotation::None,
5843            Flip::None,
5844            Crop::no_crop(),
5845        );
5846        result.unwrap();
5847
5848        let target_image = crate::load_image(
5849            include_bytes!(concat!(
5850                env!("CARGO_MANIFEST_DIR"),
5851                "/../../testdata/zidane.jpg"
5852            )),
5853            Some(PixelFormat::Grey),
5854            None,
5855        )
5856        .unwrap();
5857
5858        compare_images(&dst, &target_image, 0.98, function!());
5859    }
5860
5861    #[test]
5862    fn test_nv12_to_yuyv_cpu() {
5863        let file = include_bytes!(concat!(
5864            env!("CARGO_MANIFEST_DIR"),
5865            "/../../testdata/zidane.nv12"
5866        ))
5867        .to_vec();
5868        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5869        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5870            .copy_from_slice(&file);
5871
5872        let dst = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
5873        let mut cpu_converter = CPUProcessor::new();
5874
5875        let (result, _src, dst) = convert_img(
5876            &mut cpu_converter,
5877            src,
5878            dst,
5879            Rotation::None,
5880            Flip::None,
5881            Crop::no_crop(),
5882        );
5883        result.unwrap();
5884
5885        let target_image = crate::load_image(
5886            include_bytes!(concat!(
5887                env!("CARGO_MANIFEST_DIR"),
5888                "/../../testdata/zidane.jpg"
5889            )),
5890            Some(PixelFormat::Rgb),
5891            None,
5892        )
5893        .unwrap();
5894
5895        compare_images_convert_to_rgb(&dst, &target_image, 0.98, function!());
5896    }
5897
5898    #[test]
5899    fn test_cpu_resize_planar_rgb() {
5900        let src = TensorDyn::image(4, 4, PixelFormat::Rgba, DType::U8, None).unwrap();
5901        #[rustfmt::skip]
5902        let src_image = [
5903                    255, 0, 0, 255,     0, 255, 0, 255,     0, 0, 255, 255,     255, 255, 0, 255,
5904                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5905                    0, 0, 255, 0,       0, 255, 255, 255,   255, 255, 0, 0,     0, 0, 0, 255,
5906                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5907        ];
5908        src.as_u8()
5909            .unwrap()
5910            .map()
5911            .unwrap()
5912            .as_mut_slice()
5913            .copy_from_slice(&src_image);
5914
5915        let cpu_dst = TensorDyn::image(5, 5, PixelFormat::PlanarRgb, DType::U8, None).unwrap();
5916        let mut cpu_converter = CPUProcessor::new();
5917
5918        let (result, _src, cpu_dst) = convert_img(
5919            &mut cpu_converter,
5920            src,
5921            cpu_dst,
5922            Rotation::None,
5923            Flip::None,
5924            Crop::new()
5925                .with_dst_rect(Some(Rect {
5926                    left: 1,
5927                    top: 1,
5928                    width: 4,
5929                    height: 4,
5930                }))
5931                .with_dst_color(Some([114, 114, 114, 255])),
5932        );
5933        result.unwrap();
5934
5935        #[rustfmt::skip]
5936        let expected_dst = [
5937            114, 114, 114, 114, 114,    114, 255, 0, 0, 255,    114, 255, 0, 255, 255,      114, 0, 0, 255, 0,        114, 255, 0, 255, 255,
5938            114, 114, 114, 114, 114,    114, 0, 255, 0, 255,    114, 0, 0, 0, 0,            114, 0, 255, 255, 0,      114, 0, 0, 0, 0,
5939            114, 114, 114, 114, 114,    114, 0, 0, 255, 0,      114, 0, 0, 255, 255,        114, 255, 255, 0, 0,      114, 0, 0, 255, 255,
5940        ];
5941
5942        assert_eq!(
5943            cpu_dst.as_u8().unwrap().map().unwrap().as_slice(),
5944            &expected_dst
5945        );
5946    }
5947
5948    #[test]
5949    fn test_cpu_resize_planar_rgba() {
5950        let src = TensorDyn::image(4, 4, PixelFormat::Rgba, DType::U8, None).unwrap();
5951        #[rustfmt::skip]
5952        let src_image = [
5953                    255, 0, 0, 255,     0, 255, 0, 255,     0, 0, 255, 255,     255, 255, 0, 255,
5954                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5955                    0, 0, 255, 0,       0, 255, 255, 255,   255, 255, 0, 0,     0, 0, 0, 255,
5956                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5957        ];
5958        src.as_u8()
5959            .unwrap()
5960            .map()
5961            .unwrap()
5962            .as_mut_slice()
5963            .copy_from_slice(&src_image);
5964
5965        let cpu_dst = TensorDyn::image(5, 5, PixelFormat::PlanarRgba, DType::U8, None).unwrap();
5966        let mut cpu_converter = CPUProcessor::new();
5967
5968        let (result, _src, cpu_dst) = convert_img(
5969            &mut cpu_converter,
5970            src,
5971            cpu_dst,
5972            Rotation::None,
5973            Flip::None,
5974            Crop::new()
5975                .with_dst_rect(Some(Rect {
5976                    left: 1,
5977                    top: 1,
5978                    width: 4,
5979                    height: 4,
5980                }))
5981                .with_dst_color(Some([114, 114, 114, 255])),
5982        );
5983        result.unwrap();
5984
5985        #[rustfmt::skip]
5986        let expected_dst = [
5987            114, 114, 114, 114, 114,    114, 255, 0, 0, 255,        114, 255, 0, 255, 255,      114, 0, 0, 255, 0,        114, 255, 0, 255, 255,
5988            114, 114, 114, 114, 114,    114, 0, 255, 0, 255,        114, 0, 0, 0, 0,            114, 0, 255, 255, 0,      114, 0, 0, 0, 0,
5989            114, 114, 114, 114, 114,    114, 0, 0, 255, 0,          114, 0, 0, 255, 255,        114, 255, 255, 0, 0,      114, 0, 0, 255, 255,
5990            255, 255, 255, 255, 255,    255, 255, 255, 255, 255,    255, 0, 255, 0, 255,        255, 0, 255, 0, 255,      255, 0, 255, 0, 255,
5991        ];
5992
5993        assert_eq!(
5994            cpu_dst.as_u8().unwrap().map().unwrap().as_slice(),
5995            &expected_dst
5996        );
5997    }
5998
5999    #[test]
6000    #[cfg(target_os = "linux")]
6001    #[cfg(feature = "opengl")]
6002    fn test_opengl_resize_planar_rgb() {
6003        if !is_opengl_available() {
6004            eprintln!("SKIPPED: {} - OpenGL not available", function!());
6005            return;
6006        }
6007
6008        if !is_dma_available() {
6009            eprintln!(
6010                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
6011                function!()
6012            );
6013            return;
6014        }
6015
6016        let dst_width = 640;
6017        let dst_height = 640;
6018        let file = include_bytes!(concat!(
6019            env!("CARGO_MANIFEST_DIR"),
6020            "/../../testdata/test_image.jpg"
6021        ))
6022        .to_vec();
6023        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
6024
6025        let cpu_dst = TensorDyn::image(
6026            dst_width,
6027            dst_height,
6028            PixelFormat::PlanarRgb,
6029            DType::U8,
6030            None,
6031        )
6032        .unwrap();
6033        let mut cpu_converter = CPUProcessor::new();
6034        let (result, src, cpu_dst) = convert_img(
6035            &mut cpu_converter,
6036            src,
6037            cpu_dst,
6038            Rotation::None,
6039            Flip::None,
6040            Crop::no_crop(),
6041        );
6042        result.unwrap();
6043        let crop_letterbox = Crop::new()
6044            .with_dst_rect(Some(Rect {
6045                left: 102,
6046                top: 102,
6047                width: 440,
6048                height: 440,
6049            }))
6050            .with_dst_color(Some([114, 114, 114, 114]));
6051        let (result, src, cpu_dst) = convert_img(
6052            &mut cpu_converter,
6053            src,
6054            cpu_dst,
6055            Rotation::None,
6056            Flip::None,
6057            crop_letterbox,
6058        );
6059        result.unwrap();
6060
6061        let gl_dst = TensorDyn::image(
6062            dst_width,
6063            dst_height,
6064            PixelFormat::PlanarRgb,
6065            DType::U8,
6066            None,
6067        )
6068        .unwrap();
6069        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
6070
6071        let (result, _src, gl_dst) = convert_img(
6072            &mut gl_converter,
6073            src,
6074            gl_dst,
6075            Rotation::None,
6076            Flip::None,
6077            crop_letterbox,
6078        );
6079        result.unwrap();
6080        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
6081    }
6082
6083    #[test]
6084    fn test_cpu_resize_nv16() {
6085        let file = include_bytes!(concat!(
6086            env!("CARGO_MANIFEST_DIR"),
6087            "/../../testdata/zidane.jpg"
6088        ))
6089        .to_vec();
6090        let src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
6091
6092        let cpu_nv16_dst = TensorDyn::image(640, 640, PixelFormat::Nv16, DType::U8, None).unwrap();
6093        let cpu_rgb_dst = TensorDyn::image(640, 640, PixelFormat::Rgb, DType::U8, None).unwrap();
6094        let mut cpu_converter = CPUProcessor::new();
6095        let crop = Crop::new()
6096            .with_dst_rect(Some(Rect {
6097                left: 20,
6098                top: 140,
6099                width: 600,
6100                height: 360,
6101            }))
6102            .with_dst_color(Some([255, 128, 0, 255]));
6103
6104        let (result, src, cpu_nv16_dst) = convert_img(
6105            &mut cpu_converter,
6106            src,
6107            cpu_nv16_dst,
6108            Rotation::None,
6109            Flip::None,
6110            crop,
6111        );
6112        result.unwrap();
6113
6114        let (result, _src, cpu_rgb_dst) = convert_img(
6115            &mut cpu_converter,
6116            src,
6117            cpu_rgb_dst,
6118            Rotation::None,
6119            Flip::None,
6120            crop,
6121        );
6122        result.unwrap();
6123        compare_images_convert_to_rgb(&cpu_nv16_dst, &cpu_rgb_dst, 0.99, function!());
6124    }
6125
6126    fn load_bytes_to_tensor(
6127        width: usize,
6128        height: usize,
6129        format: PixelFormat,
6130        memory: Option<TensorMemory>,
6131        bytes: &[u8],
6132    ) -> Result<TensorDyn, Error> {
6133        let src = TensorDyn::image(width, height, format, DType::U8, memory)?;
6134        src.as_u8()
6135            .unwrap()
6136            .map()?
6137            .as_mut_slice()
6138            .copy_from_slice(bytes);
6139        Ok(src)
6140    }
6141
6142    fn compare_images(img1: &TensorDyn, img2: &TensorDyn, threshold: f64, name: &str) {
6143        assert_eq!(img1.height(), img2.height(), "Heights differ");
6144        assert_eq!(img1.width(), img2.width(), "Widths differ");
6145        assert_eq!(
6146            img1.format().unwrap(),
6147            img2.format().unwrap(),
6148            "PixelFormat differ"
6149        );
6150        assert!(
6151            matches!(
6152                img1.format().unwrap(),
6153                PixelFormat::Rgb | PixelFormat::Rgba | PixelFormat::Grey | PixelFormat::PlanarRgb
6154            ),
6155            "format must be Rgb or Rgba for comparison"
6156        );
6157
6158        let image1 = match img1.format().unwrap() {
6159            PixelFormat::Rgb => image::RgbImage::from_vec(
6160                img1.width().unwrap() as u32,
6161                img1.height().unwrap() as u32,
6162                img1.as_u8().unwrap().map().unwrap().to_vec(),
6163            )
6164            .unwrap(),
6165            PixelFormat::Rgba => image::RgbaImage::from_vec(
6166                img1.width().unwrap() as u32,
6167                img1.height().unwrap() as u32,
6168                img1.as_u8().unwrap().map().unwrap().to_vec(),
6169            )
6170            .unwrap()
6171            .convert(),
6172            PixelFormat::Grey => image::GrayImage::from_vec(
6173                img1.width().unwrap() as u32,
6174                img1.height().unwrap() as u32,
6175                img1.as_u8().unwrap().map().unwrap().to_vec(),
6176            )
6177            .unwrap()
6178            .convert(),
6179            PixelFormat::PlanarRgb => image::GrayImage::from_vec(
6180                img1.width().unwrap() as u32,
6181                (img1.height().unwrap() * 3) as u32,
6182                img1.as_u8().unwrap().map().unwrap().to_vec(),
6183            )
6184            .unwrap()
6185            .convert(),
6186            _ => return,
6187        };
6188
6189        let image2 = match img2.format().unwrap() {
6190            PixelFormat::Rgb => image::RgbImage::from_vec(
6191                img2.width().unwrap() as u32,
6192                img2.height().unwrap() as u32,
6193                img2.as_u8().unwrap().map().unwrap().to_vec(),
6194            )
6195            .unwrap(),
6196            PixelFormat::Rgba => image::RgbaImage::from_vec(
6197                img2.width().unwrap() as u32,
6198                img2.height().unwrap() as u32,
6199                img2.as_u8().unwrap().map().unwrap().to_vec(),
6200            )
6201            .unwrap()
6202            .convert(),
6203            PixelFormat::Grey => image::GrayImage::from_vec(
6204                img2.width().unwrap() as u32,
6205                img2.height().unwrap() as u32,
6206                img2.as_u8().unwrap().map().unwrap().to_vec(),
6207            )
6208            .unwrap()
6209            .convert(),
6210            PixelFormat::PlanarRgb => image::GrayImage::from_vec(
6211                img2.width().unwrap() as u32,
6212                (img2.height().unwrap() * 3) as u32,
6213                img2.as_u8().unwrap().map().unwrap().to_vec(),
6214            )
6215            .unwrap()
6216            .convert(),
6217            _ => return,
6218        };
6219
6220        let similarity = image_compare::rgb_similarity_structure(
6221            &image_compare::Algorithm::RootMeanSquared,
6222            &image1,
6223            &image2,
6224        )
6225        .expect("Image Comparison failed");
6226        if similarity.score < threshold {
6227            // image1.save(format!("{name}_1.png"));
6228            // image2.save(format!("{name}_2.png"));
6229            similarity
6230                .image
6231                .to_color_map()
6232                .save(format!("{name}.png"))
6233                .unwrap();
6234            panic!(
6235                "{name}: converted image and target image have similarity score too low: {} < {}",
6236                similarity.score, threshold
6237            )
6238        }
6239    }
6240
6241    fn compare_images_convert_to_rgb(
6242        img1: &TensorDyn,
6243        img2: &TensorDyn,
6244        threshold: f64,
6245        name: &str,
6246    ) {
6247        assert_eq!(img1.height(), img2.height(), "Heights differ");
6248        assert_eq!(img1.width(), img2.width(), "Widths differ");
6249
6250        let mut img_rgb1 = TensorDyn::image(
6251            img1.width().unwrap(),
6252            img1.height().unwrap(),
6253            PixelFormat::Rgb,
6254            DType::U8,
6255            Some(TensorMemory::Mem),
6256        )
6257        .unwrap();
6258        let mut img_rgb2 = TensorDyn::image(
6259            img1.width().unwrap(),
6260            img1.height().unwrap(),
6261            PixelFormat::Rgb,
6262            DType::U8,
6263            Some(TensorMemory::Mem),
6264        )
6265        .unwrap();
6266        let mut __cv = CPUProcessor::default();
6267        let r1 = __cv.convert(
6268            img1,
6269            &mut img_rgb1,
6270            crate::Rotation::None,
6271            crate::Flip::None,
6272            crate::Crop::default(),
6273        );
6274        let r2 = __cv.convert(
6275            img2,
6276            &mut img_rgb2,
6277            crate::Rotation::None,
6278            crate::Flip::None,
6279            crate::Crop::default(),
6280        );
6281        if r1.is_err() || r2.is_err() {
6282            // Fallback: compare raw bytes as greyscale strip
6283            let w = img1.width().unwrap() as u32;
6284            let data1 = img1.as_u8().unwrap().map().unwrap().to_vec();
6285            let data2 = img2.as_u8().unwrap().map().unwrap().to_vec();
6286            let h1 = (data1.len() as u32) / w;
6287            let h2 = (data2.len() as u32) / w;
6288            let g1 = image::GrayImage::from_vec(w, h1, data1).unwrap();
6289            let g2 = image::GrayImage::from_vec(w, h2, data2).unwrap();
6290            let similarity = image_compare::gray_similarity_structure(
6291                &image_compare::Algorithm::RootMeanSquared,
6292                &g1,
6293                &g2,
6294            )
6295            .expect("Image Comparison failed");
6296            if similarity.score < threshold {
6297                panic!(
6298                    "{name}: converted image and target image have similarity score too low: {} < {}",
6299                    similarity.score, threshold
6300                )
6301            }
6302            return;
6303        }
6304
6305        let image1 = image::RgbImage::from_vec(
6306            img_rgb1.width().unwrap() as u32,
6307            img_rgb1.height().unwrap() as u32,
6308            img_rgb1.as_u8().unwrap().map().unwrap().to_vec(),
6309        )
6310        .unwrap();
6311
6312        let image2 = image::RgbImage::from_vec(
6313            img_rgb2.width().unwrap() as u32,
6314            img_rgb2.height().unwrap() as u32,
6315            img_rgb2.as_u8().unwrap().map().unwrap().to_vec(),
6316        )
6317        .unwrap();
6318
6319        let similarity = image_compare::rgb_similarity_structure(
6320            &image_compare::Algorithm::RootMeanSquared,
6321            &image1,
6322            &image2,
6323        )
6324        .expect("Image Comparison failed");
6325        if similarity.score < threshold {
6326            // image1.save(format!("{name}_1.png"));
6327            // image2.save(format!("{name}_2.png"));
6328            similarity
6329                .image
6330                .to_color_map()
6331                .save(format!("{name}.png"))
6332                .unwrap();
6333            panic!(
6334                "{name}: converted image and target image have similarity score too low: {} < {}",
6335                similarity.score, threshold
6336            )
6337        }
6338    }
6339
6340    // =========================================================================
6341    // PixelFormat::Nv12 Format Tests
6342    // =========================================================================
6343
6344    #[test]
6345    fn test_nv12_image_creation() {
6346        let width = 640;
6347        let height = 480;
6348        let img = TensorDyn::image(width, height, PixelFormat::Nv12, DType::U8, None).unwrap();
6349
6350        assert_eq!(img.width(), Some(width));
6351        assert_eq!(img.height(), Some(height));
6352        assert_eq!(img.format().unwrap(), PixelFormat::Nv12);
6353        // PixelFormat::Nv12 uses shape [H*3/2, W] to store Y plane + UV plane
6354        assert_eq!(img.as_u8().unwrap().shape(), &[height * 3 / 2, width]);
6355    }
6356
6357    #[test]
6358    fn test_nv12_channels() {
6359        let img = TensorDyn::image(640, 480, PixelFormat::Nv12, DType::U8, None).unwrap();
6360        // PixelFormat::Nv12.channels() returns 1 (luma plane)
6361        assert_eq!(img.format().unwrap().channels(), 1);
6362    }
6363
6364    // =========================================================================
6365    // Tensor Format Metadata Tests
6366    // =========================================================================
6367
6368    #[test]
6369    fn test_tensor_set_format_planar() {
6370        let mut tensor = Tensor::<u8>::new(&[3, 480, 640], None, None).unwrap();
6371        tensor.set_format(PixelFormat::PlanarRgb).unwrap();
6372        assert_eq!(tensor.format(), Some(PixelFormat::PlanarRgb));
6373        assert_eq!(tensor.width(), Some(640));
6374        assert_eq!(tensor.height(), Some(480));
6375    }
6376
6377    #[test]
6378    fn test_tensor_set_format_interleaved() {
6379        let mut tensor = Tensor::<u8>::new(&[480, 640, 4], None, None).unwrap();
6380        tensor.set_format(PixelFormat::Rgba).unwrap();
6381        assert_eq!(tensor.format(), Some(PixelFormat::Rgba));
6382        assert_eq!(tensor.width(), Some(640));
6383        assert_eq!(tensor.height(), Some(480));
6384    }
6385
6386    #[test]
6387    fn test_tensordyn_image_rgb() {
6388        let img = TensorDyn::image(640, 480, PixelFormat::Rgb, DType::U8, None).unwrap();
6389        assert_eq!(img.width(), Some(640));
6390        assert_eq!(img.height(), Some(480));
6391        assert_eq!(img.format(), Some(PixelFormat::Rgb));
6392    }
6393
6394    #[test]
6395    fn test_tensordyn_image_planar_rgb() {
6396        let img = TensorDyn::image(640, 480, PixelFormat::PlanarRgb, DType::U8, None).unwrap();
6397        assert_eq!(img.width(), Some(640));
6398        assert_eq!(img.height(), Some(480));
6399        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
6400    }
6401
6402    #[test]
6403    fn test_rgb_int8_format() {
6404        // Int8 variant: same PixelFormat::Rgb but with DType::I8
6405        let img = TensorDyn::image(
6406            1280,
6407            720,
6408            PixelFormat::Rgb,
6409            DType::I8,
6410            Some(TensorMemory::Mem),
6411        )
6412        .unwrap();
6413        assert_eq!(img.width(), Some(1280));
6414        assert_eq!(img.height(), Some(720));
6415        assert_eq!(img.format(), Some(PixelFormat::Rgb));
6416        assert_eq!(img.dtype(), DType::I8);
6417    }
6418
6419    #[test]
6420    fn test_planar_rgb_int8_format() {
6421        let img = TensorDyn::image(
6422            1280,
6423            720,
6424            PixelFormat::PlanarRgb,
6425            DType::I8,
6426            Some(TensorMemory::Mem),
6427        )
6428        .unwrap();
6429        assert_eq!(img.width(), Some(1280));
6430        assert_eq!(img.height(), Some(720));
6431        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
6432        assert_eq!(img.dtype(), DType::I8);
6433    }
6434
6435    #[test]
6436    fn test_rgb_from_tensor() {
6437        let mut tensor = Tensor::<u8>::new(&[720, 1280, 3], None, None).unwrap();
6438        tensor.set_format(PixelFormat::Rgb).unwrap();
6439        let img = TensorDyn::from(tensor);
6440        assert_eq!(img.width(), Some(1280));
6441        assert_eq!(img.height(), Some(720));
6442        assert_eq!(img.format(), Some(PixelFormat::Rgb));
6443    }
6444
6445    #[test]
6446    fn test_planar_rgb_from_tensor() {
6447        let mut tensor = Tensor::<u8>::new(&[3, 720, 1280], None, None).unwrap();
6448        tensor.set_format(PixelFormat::PlanarRgb).unwrap();
6449        let img = TensorDyn::from(tensor);
6450        assert_eq!(img.width(), Some(1280));
6451        assert_eq!(img.height(), Some(720));
6452        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
6453    }
6454
6455    #[test]
6456    fn test_dtype_determines_int8() {
6457        // DType::I8 indicates int8 data
6458        let u8_img = TensorDyn::image(64, 64, PixelFormat::Rgb, DType::U8, None).unwrap();
6459        let i8_img = TensorDyn::image(64, 64, PixelFormat::Rgb, DType::I8, None).unwrap();
6460        assert_eq!(u8_img.dtype(), DType::U8);
6461        assert_eq!(i8_img.dtype(), DType::I8);
6462    }
6463
6464    #[test]
6465    fn test_pixel_layout_packed_vs_planar() {
6466        // Packed vs planar layout classification
6467        assert_eq!(PixelFormat::Rgb.layout(), PixelLayout::Packed);
6468        assert_eq!(PixelFormat::Rgba.layout(), PixelLayout::Packed);
6469        assert_eq!(PixelFormat::PlanarRgb.layout(), PixelLayout::Planar);
6470        assert_eq!(PixelFormat::Nv12.layout(), PixelLayout::SemiPlanar);
6471    }
6472
6473    /// Integration test that exercises the PBO-to-PBO convert path.
6474    /// Uses ImageProcessor::create_image() to allocate PBO-backed tensors,
6475    /// then converts between them. Skipped when GL is unavailable or the
6476    /// backend is not PBO (e.g. DMA-buf systems).
6477    #[cfg(target_os = "linux")]
6478    #[cfg(feature = "opengl")]
6479    #[test]
6480    fn test_convert_pbo_to_pbo() {
6481        let mut converter = ImageProcessor::new().unwrap();
6482
6483        // Skip if GL is not available or backend is not PBO
6484        let is_pbo = converter
6485            .opengl
6486            .as_ref()
6487            .is_some_and(|gl| gl.transfer_backend() == opengl_headless::TransferBackend::Pbo);
6488        if !is_pbo {
6489            eprintln!("Skipping test_convert_pbo_to_pbo: backend is not PBO");
6490            return;
6491        }
6492
6493        let src_w = 640;
6494        let src_h = 480;
6495        let dst_w = 320;
6496        let dst_h = 240;
6497
6498        // Create PBO-backed source image
6499        let pbo_src = converter
6500            .create_image(src_w, src_h, PixelFormat::Rgba, DType::U8, None)
6501            .unwrap();
6502        assert_eq!(
6503            pbo_src.as_u8().unwrap().memory(),
6504            TensorMemory::Pbo,
6505            "create_image should produce a PBO tensor"
6506        );
6507
6508        // Fill source PBO with test pattern: load JPEG then convert Mem→PBO
6509        let file = include_bytes!(concat!(
6510            env!("CARGO_MANIFEST_DIR"),
6511            "/../../testdata/zidane.jpg"
6512        ))
6513        .to_vec();
6514        let jpeg_src = crate::load_image(&file, Some(PixelFormat::Rgba), None).unwrap();
6515
6516        // Resize JPEG into a Mem temp of the right size, then copy into PBO
6517        let mem_src = TensorDyn::image(
6518            src_w,
6519            src_h,
6520            PixelFormat::Rgba,
6521            DType::U8,
6522            Some(TensorMemory::Mem),
6523        )
6524        .unwrap();
6525        let (result, _jpeg_src, mem_src) = convert_img(
6526            &mut CPUProcessor::new(),
6527            jpeg_src,
6528            mem_src,
6529            Rotation::None,
6530            Flip::None,
6531            Crop::no_crop(),
6532        );
6533        result.unwrap();
6534
6535        // Copy pixel data into the PBO source by mapping it
6536        {
6537            let src_data = mem_src.as_u8().unwrap().map().unwrap();
6538            let mut pbo_map = pbo_src.as_u8().unwrap().map().unwrap();
6539            pbo_map.copy_from_slice(&src_data);
6540        }
6541
6542        // Create PBO-backed destination image
6543        let pbo_dst = converter
6544            .create_image(dst_w, dst_h, PixelFormat::Rgba, DType::U8, None)
6545            .unwrap();
6546        assert_eq!(pbo_dst.as_u8().unwrap().memory(), TensorMemory::Pbo);
6547
6548        // Convert PBO→PBO (this exercises convert_pbo_to_pbo)
6549        let mut pbo_dst = pbo_dst;
6550        let result = converter.convert(
6551            &pbo_src,
6552            &mut pbo_dst,
6553            Rotation::None,
6554            Flip::None,
6555            Crop::no_crop(),
6556        );
6557        result.unwrap();
6558
6559        // Verify: compare with CPU-only conversion of the same input
6560        let cpu_dst = TensorDyn::image(
6561            dst_w,
6562            dst_h,
6563            PixelFormat::Rgba,
6564            DType::U8,
6565            Some(TensorMemory::Mem),
6566        )
6567        .unwrap();
6568        let (result, _mem_src, cpu_dst) = convert_img(
6569            &mut CPUProcessor::new(),
6570            mem_src,
6571            cpu_dst,
6572            Rotation::None,
6573            Flip::None,
6574            Crop::no_crop(),
6575        );
6576        result.unwrap();
6577
6578        let pbo_dst_img = {
6579            let mut __t = pbo_dst.into_u8().unwrap();
6580            __t.set_format(PixelFormat::Rgba).unwrap();
6581            TensorDyn::from(__t)
6582        };
6583        compare_images(&pbo_dst_img, &cpu_dst, 0.95, function!());
6584        log::info!("test_convert_pbo_to_pbo: PASS — PBO-to-PBO convert matches CPU reference");
6585    }
6586
6587    #[test]
6588    fn test_image_bgra() {
6589        let img = TensorDyn::image(
6590            640,
6591            480,
6592            PixelFormat::Bgra,
6593            DType::U8,
6594            Some(edgefirst_tensor::TensorMemory::Mem),
6595        )
6596        .unwrap();
6597        assert_eq!(img.width(), Some(640));
6598        assert_eq!(img.height(), Some(480));
6599        assert_eq!(img.format().unwrap().channels(), 4);
6600        assert_eq!(img.format().unwrap(), PixelFormat::Bgra);
6601    }
6602
6603    // ========================================================================
6604    // Tests for EDGEFIRST_FORCE_BACKEND env var
6605    // ========================================================================
6606
6607    #[test]
6608    fn test_force_backend_cpu() {
6609        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6610        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
6611        let result = ImageProcessor::new();
6612        match original {
6613            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6614            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6615        }
6616        let converter = result.unwrap();
6617        assert!(converter.cpu.is_some());
6618        assert_eq!(converter.forced_backend, Some(ForcedBackend::Cpu));
6619    }
6620
6621    #[test]
6622    fn test_force_backend_invalid() {
6623        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6624        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "invalid") };
6625        let result = ImageProcessor::new();
6626        match original {
6627            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6628            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6629        }
6630        assert!(
6631            matches!(&result, Err(Error::ForcedBackendUnavailable(s)) if s.contains("unknown")),
6632            "invalid backend value should return ForcedBackendUnavailable error: {result:?}"
6633        );
6634    }
6635
6636    #[test]
6637    fn test_force_backend_unset() {
6638        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6639        unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") };
6640        let result = ImageProcessor::new();
6641        match original {
6642            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6643            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6644        }
6645        let converter = result.unwrap();
6646        assert!(converter.forced_backend.is_none());
6647    }
6648
6649    // ========================================================================
6650    // Tests for hybrid mask path error handling
6651    // ========================================================================
6652
6653    #[test]
6654    fn test_draw_proto_masks_no_cpu_returns_error() {
6655        // Disable CPU backend to trigger the error path
6656        let original_cpu = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
6657        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
6658        let original_gl = std::env::var("EDGEFIRST_DISABLE_GL").ok();
6659        unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
6660        let original_g2d = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
6661        unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
6662
6663        let result = ImageProcessor::new();
6664
6665        match original_cpu {
6666            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
6667            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
6668        }
6669        match original_gl {
6670            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
6671            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
6672        }
6673        match original_g2d {
6674            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
6675            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
6676        }
6677
6678        let mut converter = result.unwrap();
6679        assert!(converter.cpu.is_none(), "CPU should be disabled");
6680
6681        let dst = TensorDyn::image(
6682            640,
6683            480,
6684            PixelFormat::Rgba,
6685            DType::U8,
6686            Some(TensorMemory::Mem),
6687        )
6688        .unwrap();
6689        let mut dst_dyn = dst;
6690        let det = [DetectBox {
6691            bbox: edgefirst_decoder::BoundingBox {
6692                xmin: 0.1,
6693                ymin: 0.1,
6694                xmax: 0.5,
6695                ymax: 0.5,
6696            },
6697            score: 0.9,
6698            label: 0,
6699        }];
6700        let proto_data = {
6701            use edgefirst_tensor::{Tensor, TensorDyn};
6702            let coeff_t = Tensor::<f32>::from_slice(&[0.5_f32; 4], &[1, 4]).unwrap();
6703            let protos_t =
6704                Tensor::<f32>::from_slice(&vec![0.0_f32; 8 * 8 * 4], &[8, 8, 4]).unwrap();
6705            ProtoData {
6706                mask_coefficients: TensorDyn::F32(coeff_t),
6707                protos: TensorDyn::F32(protos_t),
6708                layout: ProtoLayout::Nhwc,
6709            }
6710        };
6711        let result =
6712            converter.draw_proto_masks(&mut dst_dyn, &det, &proto_data, Default::default());
6713        assert!(
6714            matches!(&result, Err(Error::Internal(s)) if s.contains("CPU backend")),
6715            "draw_proto_masks without CPU should return Internal error: {result:?}"
6716        );
6717    }
6718
6719    #[test]
6720    fn test_draw_proto_masks_cpu_fallback_works() {
6721        // Force CPU-only backend to ensure the CPU fallback path executes
6722        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6723        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
6724        let result = ImageProcessor::new();
6725        match original {
6726            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6727            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6728        }
6729
6730        let mut converter = result.unwrap();
6731        assert!(converter.cpu.is_some());
6732
6733        let dst = TensorDyn::image(
6734            64,
6735            64,
6736            PixelFormat::Rgba,
6737            DType::U8,
6738            Some(TensorMemory::Mem),
6739        )
6740        .unwrap();
6741        let mut dst_dyn = dst;
6742        let det = [DetectBox {
6743            bbox: edgefirst_decoder::BoundingBox {
6744                xmin: 0.1,
6745                ymin: 0.1,
6746                xmax: 0.5,
6747                ymax: 0.5,
6748            },
6749            score: 0.9,
6750            label: 0,
6751        }];
6752        let proto_data = {
6753            use edgefirst_tensor::{Tensor, TensorDyn};
6754            let coeff_t = Tensor::<f32>::from_slice(&[0.5_f32; 4], &[1, 4]).unwrap();
6755            let protos_t =
6756                Tensor::<f32>::from_slice(&vec![0.0_f32; 8 * 8 * 4], &[8, 8, 4]).unwrap();
6757            ProtoData {
6758                mask_coefficients: TensorDyn::F32(coeff_t),
6759                protos: TensorDyn::F32(protos_t),
6760                layout: ProtoLayout::Nhwc,
6761            }
6762        };
6763        let result =
6764            converter.draw_proto_masks(&mut dst_dyn, &det, &proto_data, Default::default());
6765        assert!(result.is_ok(), "CPU fallback path should work: {result:?}");
6766    }
6767
6768    // ============================================================
6769    // draw_decoded_masks / draw_proto_masks — 4-scenario pixel-
6770    // verified tests. Exercises each backend against the full
6771    // output-contract matrix:
6772    //
6773    //   | detections | background | expected dst             |
6774    //   |------------|------------|--------------------------|
6775    //   | empty      | none       | fully cleared (0x00)     |
6776    //   | empty      | set        | fully equal to bg        |
6777    //   | set        | none       | cleared outside box +    |
6778    //   |            |            | mask-coloured inside     |
6779    //   | set        | set        | bg outside box + mask    |
6780    //   |            |            | blended inside           |
6781    //
6782    // Every test pre-fills dst with a non-zero "dirty" pattern so
6783    // that any silent `return Ok(())` leaks the pattern into the
6784    // asserted output and fails loudly.
6785    // ============================================================
6786
6787    /// Run `body` with `EDGEFIRST_FORCE_BACKEND` temporarily set (or
6788    /// removed), restoring the prior value afterward. Tests are mutated
6789    /// env-serialized via the process-wide `FORCE_BACKEND_MUTEX`.
6790    fn with_force_backend<R>(value: Option<&str>, body: impl FnOnce() -> R) -> R {
6791        use std::sync::{Mutex, MutexGuard, OnceLock};
6792        static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
6793        let _guard: MutexGuard<()> = LOCK
6794            .get_or_init(|| Mutex::new(()))
6795            .lock()
6796            .unwrap_or_else(|e| e.into_inner());
6797        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6798        match value {
6799            Some(v) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", v) },
6800            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6801        }
6802        let r = body();
6803        match original {
6804            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6805            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6806        }
6807        r
6808    }
6809
6810    /// Allocate an RGBA image tensor and pre-fill every byte with a
6811    /// distinctive non-zero pattern. Any test that relies on the old
6812    /// "dst is already cleared" assumption will see this pattern leak
6813    /// through to the output and fail.
6814    fn make_dirty_dst(w: usize, h: usize, mem: Option<TensorMemory>) -> TensorDyn {
6815        let dst = TensorDyn::image(w, h, PixelFormat::Rgba, DType::U8, mem).unwrap();
6816        {
6817            use edgefirst_tensor::TensorMapTrait;
6818            let u8t = dst.as_u8().unwrap();
6819            let mut map = u8t.map().unwrap();
6820            for (i, b) in map.as_mut_slice().iter_mut().enumerate() {
6821                *b = 0xA0u8.wrapping_add((i as u8) & 0x3F);
6822            }
6823        }
6824        dst
6825    }
6826
6827    /// Allocate an RGBA background filled with a constant colour.
6828    fn make_bg(w: usize, h: usize, mem: Option<TensorMemory>, rgba: [u8; 4]) -> TensorDyn {
6829        let bg = TensorDyn::image(w, h, PixelFormat::Rgba, DType::U8, mem).unwrap();
6830        {
6831            use edgefirst_tensor::TensorMapTrait;
6832            let u8t = bg.as_u8().unwrap();
6833            let mut map = u8t.map().unwrap();
6834            for chunk in map.as_mut_slice().chunks_exact_mut(4) {
6835                chunk.copy_from_slice(&rgba);
6836            }
6837        }
6838        bg
6839    }
6840
6841    fn pixel_at(dst: &TensorDyn, x: usize, y: usize) -> [u8; 4] {
6842        use edgefirst_tensor::TensorMapTrait;
6843        let w = dst.width().unwrap();
6844        let off = (y * w + x) * 4;
6845        let u8t = dst.as_u8().unwrap();
6846        let map = u8t.map().unwrap();
6847        let s = map.as_slice();
6848        [s[off], s[off + 1], s[off + 2], s[off + 3]]
6849    }
6850
6851    fn assert_every_pixel_eq(dst: &TensorDyn, expected: [u8; 4], case: &str) {
6852        use edgefirst_tensor::TensorMapTrait;
6853        let u8t = dst.as_u8().unwrap();
6854        let map = u8t.map().unwrap();
6855        for (i, chunk) in map.as_slice().chunks_exact(4).enumerate() {
6856            assert_eq!(
6857                chunk, &expected,
6858                "{case}: pixel idx {i} = {chunk:?}, expected {expected:?}"
6859            );
6860        }
6861    }
6862
6863    /// Scenario 1: empty detections, empty segmentation, no background
6864    /// → dst must be fully cleared to 0x00000000.
6865    fn scenario_empty_no_bg(processor: &mut ImageProcessor, case: &str) {
6866        let mut dst = make_dirty_dst(64, 64, None);
6867        processor
6868            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::default())
6869            .unwrap_or_else(|e| panic!("{case}/decoded_masks empty+no-bg failed: {e:?}"));
6870        assert_every_pixel_eq(&dst, [0, 0, 0, 0], &format!("{case}/decoded"));
6871
6872        let mut dst = make_dirty_dst(64, 64, None);
6873        let proto = {
6874            use edgefirst_tensor::{Tensor, TensorDyn};
6875            // Placeholder (no detections); shape [1, 4] to keep the tensor well-formed.
6876            let coeff_t = Tensor::<f32>::from_slice(&[0.0_f32; 4], &[1, 4]).unwrap();
6877            let protos_t =
6878                Tensor::<f32>::from_slice(&vec![0.0_f32; 8 * 8 * 4], &[8, 8, 4]).unwrap();
6879            ProtoData {
6880                mask_coefficients: TensorDyn::F32(coeff_t),
6881                protos: TensorDyn::F32(protos_t),
6882                layout: ProtoLayout::Nhwc,
6883            }
6884        };
6885        processor
6886            .draw_proto_masks(&mut dst, &[], &proto, MaskOverlay::default())
6887            .unwrap_or_else(|e| panic!("{case}/proto_masks empty+no-bg failed: {e:?}"));
6888        assert_every_pixel_eq(&dst, [0, 0, 0, 0], &format!("{case}/proto"));
6889    }
6890
6891    /// Scenario 2: empty detections, empty segmentation, background set
6892    /// → dst must be fully equal to bg.
6893    fn scenario_empty_with_bg(processor: &mut ImageProcessor, case: &str) {
6894        let bg_color = [42, 99, 200, 255];
6895        let bg = make_bg(64, 64, None, bg_color);
6896        let overlay = MaskOverlay::new().with_background(&bg);
6897
6898        let mut dst = make_dirty_dst(64, 64, None);
6899        processor
6900            .draw_decoded_masks(&mut dst, &[], &[], overlay)
6901            .unwrap_or_else(|e| panic!("{case}/decoded_masks empty+bg failed: {e:?}"));
6902        assert_every_pixel_eq(&dst, bg_color, &format!("{case}/decoded bg blit"));
6903
6904        let mut dst = make_dirty_dst(64, 64, None);
6905        let proto = {
6906            use edgefirst_tensor::{Tensor, TensorDyn};
6907            // Placeholder (no detections); shape [1, 4] to keep the tensor well-formed.
6908            let coeff_t = Tensor::<f32>::from_slice(&[0.0_f32; 4], &[1, 4]).unwrap();
6909            let protos_t =
6910                Tensor::<f32>::from_slice(&vec![0.0_f32; 8 * 8 * 4], &[8, 8, 4]).unwrap();
6911            ProtoData {
6912                mask_coefficients: TensorDyn::F32(coeff_t),
6913                protos: TensorDyn::F32(protos_t),
6914                layout: ProtoLayout::Nhwc,
6915            }
6916        };
6917        processor
6918            .draw_proto_masks(&mut dst, &[], &proto, overlay)
6919            .unwrap_or_else(|e| panic!("{case}/proto_masks empty+bg failed: {e:?}"));
6920        assert_every_pixel_eq(&dst, bg_color, &format!("{case}/proto bg blit"));
6921    }
6922
6923    /// Scenario 3: one detection with a fully-opaque segmentation fill,
6924    /// no background → outside the box dst must be 0x00, inside it must
6925    /// be a non-zero mask colour (the render_segmentation output).
6926    fn scenario_detect_no_bg(processor: &mut ImageProcessor, case: &str) {
6927        use edgefirst_decoder::Segmentation;
6928        use ndarray::Array3;
6929        processor
6930            .set_class_colors(&[[200, 80, 40, 255]])
6931            .expect("set_class_colors");
6932
6933        let detect = DetectBox {
6934            bbox: [0.25, 0.25, 0.75, 0.75].into(),
6935            score: 0.99,
6936            label: 0,
6937        };
6938        let seg_arr = Array3::from_shape_fn((4, 4, 1), |_| 255u8);
6939        let seg = Segmentation {
6940            segmentation: seg_arr,
6941            xmin: 0.25,
6942            ymin: 0.25,
6943            xmax: 0.75,
6944            ymax: 0.75,
6945        };
6946
6947        let mut dst = make_dirty_dst(64, 64, None);
6948        processor
6949            .draw_decoded_masks(&mut dst, &[detect], &[seg], MaskOverlay::default())
6950            .unwrap_or_else(|e| panic!("{case}/decoded_masks detect+no-bg failed: {e:?}"));
6951
6952        // Outside the bbox (corner): must be cleared black.
6953        let corner = pixel_at(&dst, 2, 2);
6954        assert_eq!(
6955            corner,
6956            [0, 0, 0, 0],
6957            "{case}/decoded: corner (2,2) leaked dirty pattern: {corner:?}"
6958        );
6959        // Inside the bbox (center): the mask colour must be visible.
6960        // Any non-zero pixel is acceptable — exact rendering varies
6961        // between backends (GL smoothstep, CPU nearest).
6962        let center = pixel_at(&dst, 32, 32);
6963        assert!(
6964            center != [0, 0, 0, 0],
6965            "{case}/decoded: center (32,32) was not coloured: {center:?}"
6966        );
6967    }
6968
6969    /// Scenario 4: detection + background. Outside the box must match
6970    /// bg; inside the box must NOT match bg (mask blended on top).
6971    fn scenario_detect_with_bg(processor: &mut ImageProcessor, case: &str) {
6972        use edgefirst_decoder::Segmentation;
6973        use ndarray::Array3;
6974        processor
6975            .set_class_colors(&[[200, 80, 40, 255]])
6976            .expect("set_class_colors");
6977        let bg_color = [10, 20, 30, 255];
6978        let bg = make_bg(64, 64, None, bg_color);
6979
6980        let detect = DetectBox {
6981            bbox: [0.25, 0.25, 0.75, 0.75].into(),
6982            score: 0.99,
6983            label: 0,
6984        };
6985        let seg_arr = Array3::from_shape_fn((4, 4, 1), |_| 255u8);
6986        let seg = Segmentation {
6987            segmentation: seg_arr,
6988            xmin: 0.25,
6989            ymin: 0.25,
6990            xmax: 0.75,
6991            ymax: 0.75,
6992        };
6993
6994        let overlay = MaskOverlay::new().with_background(&bg);
6995        let mut dst = make_dirty_dst(64, 64, None);
6996        processor
6997            .draw_decoded_masks(&mut dst, &[detect], &[seg], overlay)
6998            .unwrap_or_else(|e| panic!("{case}/decoded_masks detect+bg failed: {e:?}"));
6999
7000        // Outside the bbox (corner): bg colour.
7001        let corner = pixel_at(&dst, 2, 2);
7002        assert_eq!(
7003            corner, bg_color,
7004            "{case}/decoded: corner (2,2) should show bg {bg_color:?} got {corner:?}"
7005        );
7006        // Inside the bbox (center): mask blended on bg, must differ from
7007        // pure bg (alpha-blend with mask colour produces a distinct shade).
7008        let center = pixel_at(&dst, 32, 32);
7009        assert!(
7010            center != bg_color,
7011            "{case}/decoded: center (32,32) should differ from bg {bg_color:?}, got {center:?}"
7012        );
7013    }
7014
7015    /// Run all 4 scenarios against the processor. Skip gracefully if
7016    /// construction fails (backend unavailable on this host).
7017    fn run_all_scenarios(
7018        force_backend: Option<&'static str>,
7019        case: &'static str,
7020        require_dma_for_bg: bool,
7021    ) {
7022        if require_dma_for_bg && !edgefirst_tensor::is_dma_available() {
7023            eprintln!("SKIPPED: {case} — DMA not available on this host");
7024            return;
7025        }
7026        let processor_result = with_force_backend(force_backend, ImageProcessor::new);
7027        let mut processor = match processor_result {
7028            Ok(p) => p,
7029            Err(e) => {
7030                eprintln!("SKIPPED: {case} — backend init failed: {e:?}");
7031                return;
7032            }
7033        };
7034        scenario_empty_no_bg(&mut processor, case);
7035        scenario_empty_with_bg(&mut processor, case);
7036        scenario_detect_no_bg(&mut processor, case);
7037        scenario_detect_with_bg(&mut processor, case);
7038    }
7039
7040    #[test]
7041    fn test_draw_masks_4_scenarios_cpu() {
7042        run_all_scenarios(Some("cpu"), "cpu", false);
7043    }
7044
7045    #[test]
7046    fn test_draw_masks_4_scenarios_auto() {
7047        run_all_scenarios(None, "auto", false);
7048    }
7049
7050    #[cfg(target_os = "linux")]
7051    #[cfg(feature = "opengl")]
7052    #[test]
7053    fn test_draw_masks_4_scenarios_opengl() {
7054        run_all_scenarios(Some("opengl"), "opengl", false);
7055    }
7056
7057    /// G2D forced backend: exercises the zero-detection empty-frame
7058    /// paths via `g2d_clear` and `g2d_blit`. Scenarios 3 and 4 (with
7059    /// detections) expect `NotImplemented` since G2D has no rasterizer
7060    /// for boxes / masks.
7061    #[cfg(target_os = "linux")]
7062    #[test]
7063    fn test_draw_masks_zero_detection_g2d_forced() {
7064        if !edgefirst_tensor::is_dma_available() {
7065            eprintln!("SKIPPED: g2d forced — DMA not available on this host");
7066            return;
7067        }
7068        let processor_result = with_force_backend(Some("g2d"), ImageProcessor::new);
7069        let mut processor = match processor_result {
7070            Ok(p) => p,
7071            Err(e) => {
7072                eprintln!("SKIPPED: g2d forced — init failed: {e:?}");
7073                return;
7074            }
7075        };
7076
7077        // Case 1: empty + no bg. G2D requires DMA-backed dst.
7078        let mut dst = TensorDyn::image(
7079            64,
7080            64,
7081            PixelFormat::Rgba,
7082            DType::U8,
7083            Some(TensorMemory::Dma),
7084        )
7085        .unwrap();
7086        {
7087            use edgefirst_tensor::TensorMapTrait;
7088            let u8t = dst.as_u8_mut().unwrap();
7089            let mut map = u8t.map().unwrap();
7090            map.as_mut_slice().fill(0xBB);
7091        }
7092        processor
7093            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::default())
7094            .expect("g2d empty+no-bg");
7095        assert_every_pixel_eq(&dst, [0, 0, 0, 0], "g2d/case1 cleared");
7096
7097        // Case 2: empty + bg. Both surfaces DMA-backed for g2d_blit.
7098        let bg_color = [7, 11, 13, 255];
7099        let bg = {
7100            let t = TensorDyn::image(
7101                64,
7102                64,
7103                PixelFormat::Rgba,
7104                DType::U8,
7105                Some(TensorMemory::Dma),
7106            )
7107            .unwrap();
7108            {
7109                use edgefirst_tensor::TensorMapTrait;
7110                let u8t = t.as_u8().unwrap();
7111                let mut map = u8t.map().unwrap();
7112                for chunk in map.as_mut_slice().chunks_exact_mut(4) {
7113                    chunk.copy_from_slice(&bg_color);
7114                }
7115            }
7116            t
7117        };
7118        let mut dst = TensorDyn::image(
7119            64,
7120            64,
7121            PixelFormat::Rgba,
7122            DType::U8,
7123            Some(TensorMemory::Dma),
7124        )
7125        .unwrap();
7126        {
7127            use edgefirst_tensor::TensorMapTrait;
7128            let u8t = dst.as_u8_mut().unwrap();
7129            let mut map = u8t.map().unwrap();
7130            map.as_mut_slice().fill(0x55);
7131        }
7132        processor
7133            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::new().with_background(&bg))
7134            .expect("g2d empty+bg");
7135        assert_every_pixel_eq(&dst, bg_color, "g2d/case2 bg blit");
7136
7137        // Case 3 and 4: detect present — must return NotImplemented.
7138        let detect = DetectBox {
7139            bbox: [0.25, 0.25, 0.75, 0.75].into(),
7140            score: 0.9,
7141            label: 0,
7142        };
7143        let mut dst = TensorDyn::image(
7144            64,
7145            64,
7146            PixelFormat::Rgba,
7147            DType::U8,
7148            Some(TensorMemory::Dma),
7149        )
7150        .unwrap();
7151        let err = processor
7152            .draw_decoded_masks(&mut dst, &[detect], &[], MaskOverlay::default())
7153            .expect_err("g2d must reject detect-present draw_decoded_masks");
7154        assert!(
7155            matches!(err, Error::NotImplemented(_)),
7156            "g2d case3 wrong error: {err:?}"
7157        );
7158    }
7159
7160    #[test]
7161    fn test_set_format_then_cpu_convert() {
7162        // Force CPU backend (save/restore to avoid leaking into other tests)
7163        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
7164        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
7165        let mut processor = ImageProcessor::new().unwrap();
7166        match original {
7167            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
7168            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
7169        }
7170
7171        // Load a source image
7172        let image = include_bytes!(concat!(
7173            env!("CARGO_MANIFEST_DIR"),
7174            "/../../testdata/zidane.jpg"
7175        ));
7176        let src = load_image(image, Some(PixelFormat::Rgba), None).unwrap();
7177
7178        // Create a raw tensor, then attach format — simulating the from_fd workflow
7179        let mut dst =
7180            TensorDyn::new(&[640, 640, 3], DType::U8, Some(TensorMemory::Mem), None).unwrap();
7181        dst.set_format(PixelFormat::Rgb).unwrap();
7182
7183        // Convert should work with the set_format-annotated tensor
7184        processor
7185            .convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
7186            .unwrap();
7187
7188        // Verify format survived conversion
7189        assert_eq!(dst.format(), Some(PixelFormat::Rgb));
7190        assert_eq!(dst.width(), Some(640));
7191        assert_eq!(dst.height(), Some(640));
7192    }
7193
7194    /// Verify that creating multiple ImageProcessors on the same thread and
7195    /// performing a resize on each does not deadlock or error.
7196    ///
7197    /// Uses automatic memory allocation (DMA → PBO → Mem fallback) so that
7198    /// hardware backends (OpenGL, G2D) are exercised on capable targets.
7199    #[test]
7200    fn test_multiple_image_processors_same_thread() {
7201        let mut processors: Vec<ImageProcessor> = (0..4)
7202            .map(|_| ImageProcessor::new().expect("ImageProcessor::new() failed"))
7203            .collect();
7204
7205        for proc in &mut processors {
7206            let src = proc
7207                .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
7208                .expect("create src failed");
7209            let mut dst = proc
7210                .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
7211                .expect("create dst failed");
7212            proc.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
7213                .expect("convert failed");
7214            assert_eq!(dst.width(), Some(64));
7215            assert_eq!(dst.height(), Some(64));
7216        }
7217    }
7218
7219    /// Verify that creating ImageProcessors on separate threads and performing
7220    /// a resize on each does not deadlock or error.
7221    ///
7222    /// Uses automatic memory allocation (DMA → PBO → Mem fallback) so that
7223    /// hardware backends (OpenGL, G2D) are exercised on capable targets.
7224    /// A 60-second timeout prevents CI from hanging on deadlock regressions.
7225    #[test]
7226    fn test_multiple_image_processors_separate_threads() {
7227        use std::sync::mpsc;
7228        use std::time::Duration;
7229
7230        const TIMEOUT: Duration = Duration::from_secs(60);
7231
7232        let (tx, rx) = mpsc::channel::<()>();
7233
7234        std::thread::spawn(move || {
7235            let handles: Vec<_> = (0..4)
7236                .map(|i| {
7237                    std::thread::spawn(move || {
7238                        let mut proc = ImageProcessor::new().unwrap_or_else(|e| {
7239                            panic!("ImageProcessor::new() failed on thread {i}: {e}")
7240                        });
7241                        let src = proc
7242                            .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
7243                            .unwrap_or_else(|e| panic!("create src failed on thread {i}: {e}"));
7244                        let mut dst = proc
7245                            .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
7246                            .unwrap_or_else(|e| panic!("create dst failed on thread {i}: {e}"));
7247                        proc.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
7248                            .unwrap_or_else(|e| panic!("convert failed on thread {i}: {e}"));
7249                        assert_eq!(dst.width(), Some(64));
7250                        assert_eq!(dst.height(), Some(64));
7251                    })
7252                })
7253                .collect();
7254
7255            for (i, h) in handles.into_iter().enumerate() {
7256                h.join()
7257                    .unwrap_or_else(|e| panic!("thread {i} panicked: {e:?}"));
7258            }
7259
7260            let _ = tx.send(());
7261        });
7262
7263        rx.recv_timeout(TIMEOUT).unwrap_or_else(|_| {
7264            panic!("test_multiple_image_processors_separate_threads timed out after {TIMEOUT:?}")
7265        });
7266    }
7267
7268    /// Verify that 4 fully-initialized ImageProcessors on separate threads can
7269    /// all operate concurrently without deadlocking each other.
7270    ///
7271    /// All processors are created first, then a barrier synchronizes them so
7272    /// they all start converting at the same instant — maximizing contention.
7273    /// A 60-second timeout prevents CI from hanging on deadlock regressions.
7274    #[test]
7275    fn test_image_processors_concurrent_operations() {
7276        use std::sync::{mpsc, Arc, Barrier};
7277        use std::time::Duration;
7278
7279        const N: usize = 4;
7280        const ROUNDS: usize = 10;
7281        const TIMEOUT: Duration = Duration::from_secs(60);
7282
7283        let (tx, rx) = mpsc::channel::<()>();
7284
7285        std::thread::spawn(move || {
7286            let barrier = Arc::new(Barrier::new(N));
7287
7288            let handles: Vec<_> = (0..N)
7289                .map(|i| {
7290                    let barrier = Arc::clone(&barrier);
7291                    std::thread::spawn(move || {
7292                        let mut proc = ImageProcessor::new().unwrap_or_else(|e| {
7293                            panic!("ImageProcessor::new() failed on thread {i}: {e}")
7294                        });
7295
7296                        // All threads wait here until every processor is initialized.
7297                        barrier.wait();
7298
7299                        // Now all 4 hammer the GPU concurrently.
7300                        for round in 0..ROUNDS {
7301                            let src = proc
7302                                .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
7303                                .unwrap_or_else(|e| {
7304                                    panic!("create src failed on thread {i} round {round}: {e}")
7305                                });
7306                            let mut dst = proc
7307                                .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
7308                                .unwrap_or_else(|e| {
7309                                    panic!("create dst failed on thread {i} round {round}: {e}")
7310                                });
7311                            proc.convert(
7312                                &src,
7313                                &mut dst,
7314                                Rotation::None,
7315                                Flip::None,
7316                                Crop::default(),
7317                            )
7318                            .unwrap_or_else(|e| {
7319                                panic!("convert failed on thread {i} round {round}: {e}")
7320                            });
7321                            assert_eq!(dst.width(), Some(64));
7322                            assert_eq!(dst.height(), Some(64));
7323                        }
7324                    })
7325                })
7326                .collect();
7327
7328            for (i, h) in handles.into_iter().enumerate() {
7329                h.join()
7330                    .unwrap_or_else(|e| panic!("thread {i} panicked: {e:?}"));
7331            }
7332
7333            let _ = tx.send(());
7334        });
7335
7336        rx.recv_timeout(TIMEOUT).unwrap_or_else(|_| {
7337            panic!("test_image_processors_concurrent_operations timed out after {TIMEOUT:?}")
7338        });
7339    }
7340}