Skip to main content

edgefirst_image/
lib.rs

1// SPDX-FileCopyrightText: Copyright 2025 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4/*!
5
6## EdgeFirst HAL - Image Converter
7
8The `edgefirst_image` crate is part of the EdgeFirst Hardware Abstraction
9Layer (HAL) and provides functionality for converting images between
10different formats and sizes.  The crate is designed to work with hardware
11acceleration when available, but also provides a CPU-based fallback for
12environments where hardware acceleration is not present or not suitable.
13
14The main features of the `edgefirst_image` crate include:
15- Support for various image formats, including YUYV, RGB, RGBA, and GREY.
16- Support for source crop, destination crop, rotation, and flipping.
17- Image conversion using hardware acceleration (G2D, OpenGL) when available.
18- CPU-based image conversion as a fallback option.
19
20The crate uses [`TensorDyn`] from `edgefirst_tensor` to represent images,
21with [`PixelFormat`] metadata describing the pixel layout. The
22[`ImageProcessor`] struct manages the conversion process, selecting
23the appropriate conversion method based on the available hardware.
24
25## Examples
26
27```rust
28# use edgefirst_image::{ImageProcessor, Rotation, Flip, Crop, ImageProcessorTrait};
29# use edgefirst_codec::{peek_info, ImageDecoder, ImageLoad, DecodeOptions};
30# use edgefirst_tensor::{PixelFormat, DType, Tensor, TensorMemory};
31# fn main() -> Result<(), edgefirst_image::Error> {
32let image = edgefirst_bench::testdata::read("zidane.jpg");
33let opts = DecodeOptions::default().with_format(PixelFormat::Rgba);
34let info = peek_info(&image, &opts).expect("peek");
35let mut src = Tensor::<u8>::image(info.width, info.height, info.format,
36                                   Some(TensorMemory::Mem))?;
37let mut decoder = ImageDecoder::new();
38src.load_image(&mut decoder, &image, &opts).expect("decode");
39let mut converter = ImageProcessor::new()?;
40let mut dst = converter.create_image(640, 480, PixelFormat::Rgb, DType::U8, None)?;
41converter.convert(&src.into(), &mut dst, Rotation::None, Flip::None, Crop::default())?;
42# Ok(())
43# }
44```
45
46## Environment Variables
47The behavior of the `edgefirst_image::ImageProcessor` struct can be influenced by the
48following environment variables:
49- `EDGEFIRST_FORCE_BACKEND`: When set to `cpu`, `g2d`, or `opengl` (case-insensitive),
50  only that single backend is initialized and no fallback chain is used. If the
51  forced backend fails to initialize, an error is returned immediately. This is
52  useful for benchmarking individual backends in isolation. When this variable is
53  set, the `EDGEFIRST_DISABLE_*` variables are ignored.
54- `EDGEFIRST_DISABLE_GL`: If set to `1`, disables the use of OpenGL for image
55  conversion, forcing the use of CPU or other available hardware methods.
56- `EDGEFIRST_DISABLE_G2D`: If set to `1`, disables the use of G2D for image
57  conversion, forcing the use of CPU or other available hardware methods.
58- `EDGEFIRST_DISABLE_CPU`: If set to `1`, disables the use of CPU for image
59  conversion, forcing the use of hardware acceleration methods. If no hardware
60  acceleration methods are available, an error will be returned when attempting
61  to create an `ImageProcessor`.
62
63Additionally the TensorMemory used by default allocations can be controlled using the
64`EDGEFIRST_TENSOR_FORCE_MEM` environment variable. If set to `1`, default tensor memory
65uses system memory. This will disable the use of specialized memory regions for tensors
66and hardware acceleration. However, this will increase the performance of the CPU converter.
67*/
68#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
69
70/// Pitch alignment requirement for DMA-BUF tensors that may be imported as
71/// EGLImages by the GL backend. Mali Valhall (i.MX 95 / G310) rejects
72/// `eglCreateImageKHR` with `EGL_BAD_ALLOC` for any DMA-BUF whose row pitch
73/// is not a multiple of 64 bytes; Vivante GC7000UL (i.MX 8MP) accepts any
74/// pitch so the constant is harmless on that path. 64 is the smallest
75/// alignment that satisfies every embedded ARM GPU we ship to.
76///
77/// Applied automatically inside [`ImageProcessor::create_image`] when the
78/// allocation lands on `TensorMemory::Dma`. External callers that allocate
79/// their own DMA-BUF tensors (e.g. GStreamer plugins, video pipelines) can
80/// use [`align_width_for_gpu_pitch`] to compute a width whose resulting row
81/// stride satisfies this requirement.
82pub const GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES: usize = 64;
83
84/// Round `width` (in pixels) up so the resulting row stride
85/// `width * bpp` is a multiple of [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`]
86/// AND a multiple of `bpp` (so the rounded width is an integer pixel count).
87///
88/// `bpp` must be the per-pixel byte count for the image's primary plane
89/// (e.g. 4 for RGBA8/BGRA8, 3 for RGB888, 1 for Grey/NV12-luma).
90///
91/// External callers — GStreamer plugins, video pipelines, anyone wrapping a
92/// foreign DMA-BUF — should call this when sizing the destination so that
93/// `eglCreateImageKHR` doesn't reject the import on Mali. Pre-aligned widths
94/// (640, 1280, 1920, 3008, 3840 …) round-trip unchanged; misaligned widths
95/// are bumped up to the next valid value.
96///
97/// # Overflow behaviour
98///
99/// All arithmetic is checked. If the alignment computation or the rounded
100/// width would overflow `usize`, the function logs a warning and returns the
101/// original `width` unchanged rather than wrapping or producing a smaller
102/// value. Callers can rely on the returned width being **at least** the
103/// requested width.
104///
105/// `bpp == 0` and `width == 0` short-circuit to return the input unchanged.
106///
107/// # Examples
108///
109/// ```
110/// use edgefirst_image::align_width_for_gpu_pitch;
111///
112/// // RGBA8 (bpp=4): width must round to a multiple of 16 pixels (64-byte stride).
113/// assert_eq!(align_width_for_gpu_pitch(1920, 4), 1920); // already aligned
114/// assert_eq!(align_width_for_gpu_pitch(3004, 4), 3008); // crowd.png case: +4 px
115/// assert_eq!(align_width_for_gpu_pitch(1281, 4), 1296); // +15 px
116///
117/// // RGB888 (bpp=3): width must round to a multiple of 64 pixels (192-byte stride).
118/// assert_eq!(align_width_for_gpu_pitch(640, 3), 640);
119/// assert_eq!(align_width_for_gpu_pitch(641, 3), 704);
120/// ```
121pub fn align_width_for_gpu_pitch(width: usize, bpp: usize) -> usize {
122    if bpp == 0 || width == 0 {
123        return width;
124    }
125
126    // The minimum aligned stride must be a common multiple of both the
127    // GPU's pitch alignment and the per-pixel byte count. Using the LCM
128    // guarantees the rounded stride is an integer multiple of `bpp`, so
129    // converting back to a pixel count is exact.
130    //
131    // Compute the alignment in pixels (`width_alignment`) so we never need
132    // to multiply `width * bpp`, which is the only operation that could
133    // realistically overflow for large caller-supplied widths.
134    let Some(lcm_alignment) = checked_num_integer_lcm(GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES, bpp)
135    else {
136        log::warn!(
137            "align_width_for_gpu_pitch: lcm({GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES}, {bpp}) \
138             overflows usize, returning unaligned width {width}"
139        );
140        return width;
141    };
142    if lcm_alignment == 0 {
143        return width;
144    }
145
146    debug_assert_eq!(lcm_alignment % bpp, 0);
147    let width_alignment = lcm_alignment / bpp;
148    if width_alignment == 0 {
149        return width;
150    }
151
152    let remainder = width % width_alignment;
153    if remainder == 0 {
154        return width;
155    }
156
157    let pad = width_alignment - remainder;
158    match width.checked_add(pad) {
159        Some(aligned) => aligned,
160        None => {
161            log::warn!(
162                "align_width_for_gpu_pitch: width {width} + pad {pad} overflows usize, \
163                 returning unaligned (caller should use a smaller width or pre-aligned size)"
164            );
165            width
166        }
167    }
168}
169
170/// Round `min_pitch_bytes` up to the next multiple of
171/// [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`]. Returns `None` if the rounded
172/// value would overflow `usize`. Returns `Some(0)` for input 0.
173///
174/// Used internally by [`ImageProcessor::create_image`] to compute the
175/// padded row stride for DMA-backed image allocations. External callers
176/// that need pixel-counted alignment (instead of raw byte pitch) should
177/// use [`align_width_for_gpu_pitch`] instead.
178#[cfg(target_os = "linux")]
179pub(crate) fn align_pitch_bytes_to_gpu_alignment(min_pitch_bytes: usize) -> Option<usize> {
180    let alignment = GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES;
181    if min_pitch_bytes == 0 {
182        return Some(0);
183    }
184    let remainder = min_pitch_bytes % alignment;
185    if remainder == 0 {
186        return Some(min_pitch_bytes);
187    }
188    min_pitch_bytes.checked_add(alignment - remainder)
189}
190
191/// Overflow-safe least common multiple. Returns `None` when `(a / gcd) * b`
192/// would wrap.
193fn checked_num_integer_lcm(a: usize, b: usize) -> Option<usize> {
194    if a == 0 || b == 0 {
195        return Some(0);
196    }
197    let g = num_integer_gcd(a, b);
198    // a / g is exact (g divides a by definition) and at most a, so this
199    // division never panics. Only the subsequent multiply can overflow.
200    (a / g).checked_mul(b)
201}
202
203fn num_integer_gcd(a: usize, b: usize) -> usize {
204    if b == 0 {
205        a
206    } else {
207        num_integer_gcd(b, a % b)
208    }
209}
210
211/// Bytes-per-pixel for the primary plane of `format` at element size `elem`.
212/// Returns `None` for formats that don't have a single packed BPP (semi-planar
213/// chroma is handled separately, returning the luma-plane bpp).
214///
215/// External callers can use this together with [`align_width_for_gpu_pitch`]
216/// to size their own DMA-BUFs without having to remember per-format BPPs:
217///
218/// ```
219/// use edgefirst_image::{align_width_for_gpu_pitch, primary_plane_bpp};
220/// use edgefirst_tensor::PixelFormat;
221///
222/// let bpp = primary_plane_bpp(PixelFormat::Rgba, 1).unwrap();
223/// let aligned = align_width_for_gpu_pitch(3004, bpp);
224/// assert_eq!(aligned, 3008);
225/// ```
226pub fn primary_plane_bpp(format: PixelFormat, elem: usize) -> Option<usize> {
227    use edgefirst_tensor::PixelLayout;
228    match format.layout() {
229        PixelLayout::Packed => Some(format.channels() * elem),
230        PixelLayout::Planar => Some(elem),
231        // For NV12/NV16 the luma plane is single-channel so the pitch
232        // matches `elem`; the chroma plane uses the same pitch in bytes
233        // (UV is half-width but two interleaved channels = same pitch).
234        PixelLayout::SemiPlanar => Some(elem),
235        // `PixelLayout` is non-exhaustive — fall through unaligned for
236        // any future variant we don't yet recognise.
237        _ => None,
238    }
239}
240
241/// Return the GPU-aligned pitch in bytes when a DMA-backed image of
242/// `width × fmt` would need row-stride padding, or `None` when the
243/// natural pitch already satisfies `GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`
244/// or the caller has explicitly requested non-DMA memory.
245///
246/// Mali G310 (i.MX 95) rejects `eglCreateImage` from DMA-BUFs whose
247/// `PLANE0_PITCH_EXT` is not a multiple of 64 bytes, surfacing as
248/// `EGL_BAD_ALLOC`. The `load_image_test_helper` test-only helper
249/// in this crate uses this to decide whether to allocate a tensor
250/// with padded row stride before invoking the decode path; production
251/// callers do the equivalent peek → allocate → decode dance themselves
252/// (see crate-level docs).
253#[cfg(all(target_os = "linux", test))]
254pub(crate) fn padded_dma_pitch_for(
255    fmt: PixelFormat,
256    width: usize,
257    memory: &Option<TensorMemory>,
258) -> Option<usize> {
259    // Only pad when the caller explicitly requested DMA, or when they
260    // left memory selection to the allocator AND DMA is actually
261    // available. `Tensor::image_with_stride(..., None)` always routes
262    // through DMA allocation, so treating `None` as "DMA wanted"
263    // unconditionally would convert a normally-working image load into
264    // a hard failure on systems where DMA is unavailable (sandboxed
265    // CI, missing `/dev/dma_heap`, permission-denied containers) —
266    // whereas `Tensor::image(..., None)` would have fallen back to
267    // SHM/Mem there.
268    match memory {
269        Some(TensorMemory::Dma) => {}
270        None if edgefirst_tensor::is_dma_available() => {}
271        _ => return None,
272    }
273    // Padding only applies to packed layouts — `Tensor::image_with_stride`
274    // rejects semi-planar / planar formats, and those take their own
275    // per-plane pitches on import anyway.
276    if fmt.layout() != PixelLayout::Packed {
277        return None;
278    }
279    let bpp = primary_plane_bpp(fmt, 1)?;
280    let natural = width.checked_mul(bpp)?;
281    let aligned = align_pitch_bytes_to_gpu_alignment(natural)?;
282    if aligned > natural {
283        Some(aligned)
284    } else {
285        None
286    }
287}
288
289pub use cpu::CPUProcessor;
290pub use edgefirst_codec as codec;
291
292#[cfg(test)]
293use edgefirst_decoder::ProtoLayout;
294use edgefirst_decoder::{DetectBox, ProtoData, Segmentation};
295#[cfg(any(test, all(target_os = "linux", feature = "opengl")))]
296use edgefirst_tensor::Tensor;
297use edgefirst_tensor::{
298    DType, PixelFormat, PixelLayout, TensorDyn, TensorMemory, TensorTrait as _,
299};
300use enum_dispatch::enum_dispatch;
301pub use error::{Error, Result};
302#[cfg(target_os = "linux")]
303pub use g2d::G2DProcessor;
304#[cfg(target_os = "linux")]
305#[cfg(feature = "opengl")]
306pub use opengl_headless::GLProcessorThreaded;
307#[cfg(target_os = "linux")]
308#[cfg(feature = "opengl")]
309pub use opengl_headless::Int8InterpolationMode;
310#[cfg(target_os = "macos")]
311#[cfg(feature = "opengl")]
312pub use opengl_headless::MacosGlProcessor;
313#[cfg(target_os = "linux")]
314#[cfg(feature = "opengl")]
315pub use opengl_headless::{probe_egl_displays, EglDisplayInfo, EglDisplayKind};
316use std::{fmt::Display, time::Instant};
317
318mod cpu;
319mod error;
320mod g2d;
321#[path = "gl/mod.rs"]
322mod opengl_headless;
323
324// Use `edgefirst_tensor::PixelFormat` variants (Rgb, Rgba, Grey, etc.) and
325// `TensorDyn` / `Tensor<u8>` with `.format()` metadata instead.
326
327#[derive(Debug, Clone, Copy, PartialEq, Eq)]
328pub enum Rotation {
329    None = 0,
330    Clockwise90 = 1,
331    Rotate180 = 2,
332    CounterClockwise90 = 3,
333}
334impl Rotation {
335    /// Creates a Rotation enum from an angle in degrees. The angle must be a
336    /// multiple of 90.
337    ///
338    /// # Panics
339    /// Panics if the angle is not a multiple of 90.
340    ///
341    /// # Examples
342    /// ```rust
343    /// # use edgefirst_image::Rotation;
344    /// let rotation = Rotation::from_degrees_clockwise(270);
345    /// assert_eq!(rotation, Rotation::CounterClockwise90);
346    /// ```
347    pub fn from_degrees_clockwise(angle: usize) -> Rotation {
348        match angle.rem_euclid(360) {
349            0 => Rotation::None,
350            90 => Rotation::Clockwise90,
351            180 => Rotation::Rotate180,
352            270 => Rotation::CounterClockwise90,
353            _ => panic!("rotation angle is not a multiple of 90"),
354        }
355    }
356}
357
358#[derive(Debug, Clone, Copy, PartialEq, Eq)]
359pub enum Flip {
360    None = 0,
361    Vertical = 1,
362    Horizontal = 2,
363}
364
365/// Controls how the color palette index is chosen for each detected object.
366#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
367pub enum ColorMode {
368    /// Color is chosen by object class label (`det.label`). Default.
369    ///
370    /// Preserves backward compatibility and is correct for semantic
371    /// segmentation where colors carry class meaning.
372    #[default]
373    Class,
374    /// Color is chosen by instance order (loop index, zero-based).
375    ///
376    /// Each detected object gets a unique color regardless of class,
377    /// useful for instance segmentation.
378    Instance,
379    /// Color is chosen by track ID (future use; currently behaves like
380    /// [`Instance`](Self::Instance)).
381    Track,
382}
383
384impl ColorMode {
385    /// Return the palette index for a detection given its loop index and label.
386    #[inline]
387    pub fn index(self, idx: usize, label: usize) -> usize {
388        match self {
389            ColorMode::Class => label,
390            ColorMode::Instance | ColorMode::Track => idx,
391        }
392    }
393}
394
395/// Controls the resolution and coordinate frame of masks produced by
396/// [`ImageProcessor::materialize_masks`].
397///
398/// - [`Proto`](Self::Proto) returns per-detection tiles at proto-plane
399///   resolution (e.g. 48×32 u8 for a typical COCO bbox on a 160×160 proto
400///   plane). This is the historical behavior of `materialize_masks` and the
401///   fastest path because no upsample runs inside HAL. Mask values are
402///   continuous sigmoid output quantized to `uint8 [0, 255]`.
403/// - [`Scaled`](Self::Scaled) returns per-detection tiles at caller-specified
404///   pixel resolution by upsampling the full proto plane once and cropping by
405///   bbox after sigmoid. The upsample uses bilinear interpolation with
406///   edge-clamp sampling — semantically equivalent to Ultralytics'
407///   `process_masks_retina` reference. When a `letterbox` is also passed to
408///   [`materialize_masks`], the inverse letterbox transform is applied during
409///   the upsample so mask pixels land in original-content coordinates
410///   (drop-in for overlay on the original image). Mask values are binary
411///   `uint8 {0, 255}` after thresholding sigmoid > 0.5 — interchangeable
412///   with `Proto` output via the same `> 127` test.
413///
414/// [`materialize_masks`]: ImageProcessor::materialize_masks
415#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
416pub enum MaskResolution {
417    /// Per-detection tile at proto-plane resolution (default).
418    #[default]
419    Proto,
420    /// Per-detection tile at `(width, height)` pixel resolution in the
421    /// coordinate frame determined by the `letterbox` parameter of
422    /// [`ImageProcessor::materialize_masks`].
423    Scaled {
424        /// Target pixel width of the output coordinate frame.
425        width: u32,
426        /// Target pixel height of the output coordinate frame.
427        height: u32,
428    },
429}
430
431/// Options for mask overlay rendering.
432///
433/// Controls how segmentation masks are composited onto the destination image:
434/// - `background`: when set, the background image is drawn first and masks
435///   are composited over it (result written to `dst`). When `None`, `dst` is
436///   cleared to `0x00000000` (fully transparent) before masks are drawn.
437///   **`dst` is always fully overwritten — its prior contents are never
438///   preserved.** Callers who used to pre-load an image into `dst` before
439///   calling `draw_decoded_masks` / `draw_proto_masks` must now supply that
440///   image via `background` instead (behaviour changed in v0.16.4).
441/// - `opacity`: scales the alpha of rendered mask colors. `1.0` (default)
442///   preserves the class color's alpha unchanged; `0.5` makes masks
443///   semi-transparent.
444/// - `color_mode`: controls whether colors are assigned by class label,
445///   instance index, or track ID. Defaults to [`ColorMode::Class`].
446#[derive(Debug, Clone, Copy)]
447pub struct MaskOverlay<'a> {
448    /// Compositing source image. Must have the same dimensions and pixel
449    /// format as `dst`. When `Some`, the output is `background + masks`.
450    /// When `None`, `dst` is cleared to `0x00000000` before masks are drawn.
451    pub background: Option<&'a TensorDyn>,
452    pub opacity: f32,
453    /// Normalized letterbox region `[xmin, ymin, xmax, ymax]` in model-input
454    /// space that contains actual image content (the rest is padding).
455    ///
456    /// When set, bounding boxes and mask coordinates from the decoder (which
457    /// are in model-input normalized space) are mapped back to the original
458    /// image coordinate space before rendering.
459    ///
460    /// Use [`with_letterbox_crop`](Self::with_letterbox_crop) to compute this
461    /// from the [`Crop`] that was used in the model input [`convert`](crate::ImageProcessorTrait::convert) call.
462    pub letterbox: Option<[f32; 4]>,
463    pub color_mode: ColorMode,
464}
465
466impl Default for MaskOverlay<'_> {
467    fn default() -> Self {
468        Self {
469            background: None,
470            opacity: 1.0,
471            letterbox: None,
472            color_mode: ColorMode::Class,
473        }
474    }
475}
476
477impl<'a> MaskOverlay<'a> {
478    pub fn new() -> Self {
479        Self::default()
480    }
481
482    /// Set the compositing source image.
483    ///
484    /// `bg` must have the same dimensions and pixel format as the `dst` passed
485    /// to [`draw_decoded_masks`](crate::ImageProcessorTrait::draw_decoded_masks) /
486    /// [`draw_proto_masks`](crate::ImageProcessorTrait::draw_proto_masks).
487    /// The output will be `bg + masks`. Without a background, `dst` is cleared
488    /// to `0x00000000`.
489    pub fn with_background(mut self, bg: &'a TensorDyn) -> Self {
490        self.background = Some(bg);
491        self
492    }
493
494    pub fn with_opacity(mut self, opacity: f32) -> Self {
495        self.opacity = opacity.clamp(0.0, 1.0);
496        self
497    }
498
499    pub fn with_color_mode(mut self, mode: ColorMode) -> Self {
500        self.color_mode = mode;
501        self
502    }
503
504    /// Set the letterbox transform from the [`Crop`] used when preparing the
505    /// model input, so that bounding boxes and masks are correctly mapped back
506    /// to the original image coordinate space during rendering.
507    ///
508    /// Pass the same `crop` that was given to
509    /// [`convert`](crate::ImageProcessorTrait::convert) along with the model
510    /// input dimensions (`model_w` × `model_h`).
511    ///
512    /// Has no effect when `crop.dst_rect` is `None` (no letterbox applied).
513    pub fn with_letterbox_crop(mut self, crop: &Crop, model_w: usize, model_h: usize) -> Self {
514        if let Some(r) = crop.dst_rect {
515            self.letterbox = Some([
516                r.left as f32 / model_w as f32,
517                r.top as f32 / model_h as f32,
518                (r.left + r.width) as f32 / model_w as f32,
519                (r.top + r.height) as f32 / model_h as f32,
520            ]);
521        }
522        self
523    }
524}
525
526/// Apply the inverse letterbox transform to a bounding box.
527///
528/// `letterbox` is `[lx0, ly0, lx1, ly1]` — the normalized region of the model
529/// input that contains actual image content (output of
530/// [`MaskOverlay::with_letterbox_crop`]).
531///
532/// Converts model-input-normalized coords to output-image-normalized coords,
533/// clamped to `[0.0, 1.0]`. Also canonicalises the bbox (ensures xmin ≤ xmax).
534#[inline]
535fn unletter_bbox(bbox: DetectBox, lb: [f32; 4]) -> DetectBox {
536    let b = bbox.bbox.to_canonical();
537    let [lx0, ly0, lx1, ly1] = lb;
538    let inv_w = if lx1 > lx0 { 1.0 / (lx1 - lx0) } else { 1.0 };
539    let inv_h = if ly1 > ly0 { 1.0 / (ly1 - ly0) } else { 1.0 };
540    DetectBox {
541        bbox: edgefirst_decoder::BoundingBox {
542            xmin: ((b.xmin - lx0) * inv_w).clamp(0.0, 1.0),
543            ymin: ((b.ymin - ly0) * inv_h).clamp(0.0, 1.0),
544            xmax: ((b.xmax - lx0) * inv_w).clamp(0.0, 1.0),
545            ymax: ((b.ymax - ly0) * inv_h).clamp(0.0, 1.0),
546        },
547        ..bbox
548    }
549}
550
551#[derive(Debug, Clone, Copy, PartialEq, Eq)]
552pub struct Crop {
553    pub src_rect: Option<Rect>,
554    pub dst_rect: Option<Rect>,
555    pub dst_color: Option<[u8; 4]>,
556}
557
558impl Default for Crop {
559    fn default() -> Self {
560        Crop::new()
561    }
562}
563impl Crop {
564    // Creates a new Crop with default values (no cropping).
565    pub fn new() -> Self {
566        Crop {
567            src_rect: None,
568            dst_rect: None,
569            dst_color: None,
570        }
571    }
572
573    // Sets the source rectangle for cropping.
574    pub fn with_src_rect(mut self, src_rect: Option<Rect>) -> Self {
575        self.src_rect = src_rect;
576        self
577    }
578
579    // Sets the destination rectangle for cropping.
580    pub fn with_dst_rect(mut self, dst_rect: Option<Rect>) -> Self {
581        self.dst_rect = dst_rect;
582        self
583    }
584
585    // Sets the destination color for areas outside the cropped region.
586    pub fn with_dst_color(mut self, dst_color: Option<[u8; 4]>) -> Self {
587        self.dst_color = dst_color;
588        self
589    }
590
591    // Creates a new Crop with no cropping.
592    pub fn no_crop() -> Self {
593        Crop::new()
594    }
595
596    /// Validate crop rectangles against explicit dimensions.
597    pub(crate) fn check_crop_dims(
598        &self,
599        src_w: usize,
600        src_h: usize,
601        dst_w: usize,
602        dst_h: usize,
603    ) -> Result<(), Error> {
604        let src_ok = self
605            .src_rect
606            .is_none_or(|r| r.left + r.width <= src_w && r.top + r.height <= src_h);
607        let dst_ok = self
608            .dst_rect
609            .is_none_or(|r| r.left + r.width <= dst_w && r.top + r.height <= dst_h);
610        match (src_ok, dst_ok) {
611            (true, true) => Ok(()),
612            (true, false) => Err(Error::CropInvalid(format!(
613                "Dest crop invalid: {:?}",
614                self.dst_rect
615            ))),
616            (false, true) => Err(Error::CropInvalid(format!(
617                "Src crop invalid: {:?}",
618                self.src_rect
619            ))),
620            (false, false) => Err(Error::CropInvalid(format!(
621                "Dest and Src crop invalid: {:?} {:?}",
622                self.dst_rect, self.src_rect
623            ))),
624        }
625    }
626
627    /// Validate crop rectangles against TensorDyn source and destination.
628    pub fn check_crop_dyn(
629        &self,
630        src: &edgefirst_tensor::TensorDyn,
631        dst: &edgefirst_tensor::TensorDyn,
632    ) -> Result<(), Error> {
633        self.check_crop_dims(
634            src.width().unwrap_or(0),
635            src.height().unwrap_or(0),
636            dst.width().unwrap_or(0),
637            dst.height().unwrap_or(0),
638        )
639    }
640}
641
642#[derive(Debug, Clone, Copy, PartialEq, Eq)]
643pub struct Rect {
644    pub left: usize,
645    pub top: usize,
646    pub width: usize,
647    pub height: usize,
648}
649
650impl Rect {
651    // Creates a new Rect with the specified left, top, width, and height.
652    pub fn new(left: usize, top: usize, width: usize, height: usize) -> Self {
653        Self {
654            left,
655            top,
656            width,
657            height,
658        }
659    }
660
661    // Checks if the rectangle is valid for the given TensorDyn image.
662    pub fn check_rect_dyn(&self, image: &TensorDyn) -> bool {
663        let w = image.width().unwrap_or(0);
664        let h = image.height().unwrap_or(0);
665        self.left + self.width <= w && self.top + self.height <= h
666    }
667}
668
669#[enum_dispatch(ImageProcessor)]
670pub trait ImageProcessorTrait {
671    /// Converts the source image to the destination image format and size. The
672    /// image is cropped first, then flipped, then rotated
673    ///
674    /// # Arguments
675    ///
676    /// * `dst` - The destination image to be converted to.
677    /// * `src` - The source image to convert from.
678    /// * `rotation` - The rotation to apply to the destination image.
679    /// * `flip` - Flips the image
680    /// * `crop` - An optional rectangle specifying the area to crop from the
681    ///   source image
682    ///
683    /// # Returns
684    ///
685    /// A `Result` indicating success or failure of the conversion.
686    fn convert(
687        &mut self,
688        src: &TensorDyn,
689        dst: &mut TensorDyn,
690        rotation: Rotation,
691        flip: Flip,
692        crop: Crop,
693    ) -> Result<()>;
694
695    /// Draw pre-decoded detection boxes and segmentation masks onto `dst`.
696    ///
697    /// Supports two segmentation modes based on the mask channel count:
698    /// - **Instance segmentation** (`C=1`): one `Segmentation` per detection,
699    ///   `segmentation` and `detect` are zipped.
700    /// - **Semantic segmentation** (`C>1`): a single `Segmentation` covering
701    ///   all classes; only the first element is used.
702    ///
703    /// # Format requirements
704    ///
705    /// - CPU backend: `dst` must be `RGBA` or `RGB`.
706    /// - OpenGL backend: `dst` must be `RGBA`, `BGRA`, or `RGB`.
707    /// - G2D backend: only produces the base frame (empty detections);
708    ///   returns `NotImplemented` when any detection or segmentation is
709    ///   supplied.
710    ///
711    /// # Output contract
712    ///
713    /// This function always fully writes `dst` — it never relies on the
714    /// caller having pre-cleared the destination. The four cases are:
715    ///
716    /// | detections | background | output                              |
717    /// |------------|------------|-------------------------------------|
718    /// | none       | none       | dst cleared to `0x00000000`         |
719    /// | none       | set        | dst ← background                    |
720    /// | set        | none       | masks drawn over cleared dst        |
721    /// | set        | set        | masks drawn over background         |
722    ///
723    /// Each backend implements this with its native primitives: G2D uses
724    /// `g2d_clear` / `g2d_blit`, OpenGL uses `glClear` / DMA-BUF GPU blit
725    /// plus the mask program, and CPU uses direct buffer fill / memcpy as
726    /// the terminal fallback. CPU-memcpy of DMA buffers is avoided on the
727    /// accelerated paths.
728    ///
729    /// An empty `segmentation` slice is valid — only bounding boxes are drawn.
730    ///
731    /// `overlay` controls compositing: `background` is the compositing source
732    /// (must match `dst` in size and format); `opacity` scales mask alpha.
733    ///
734    /// # Buffer aliasing
735    ///
736    /// `dst` and `overlay.background` must reference **distinct underlying
737    /// buffers**. An aliased pair returns [`Error::AliasedBuffers`] without
738    /// dispatching to any backend — the GL path would otherwise read and
739    /// write the same texture in a single draw, which is undefined behaviour
740    /// on most drivers. Aliasing is detected via
741    /// [`TensorDyn::aliases`](edgefirst_tensor::TensorDyn::aliases), which
742    /// catches both shared-allocation clones and separate imports over the
743    /// same dmabuf fd.
744    ///
745    /// # Migration from v0.16.3 and earlier
746    ///
747    /// Prior to v0.16.4 the call silently preserved `dst`'s contents on empty
748    /// detections. That invariant no longer holds — `dst` is always fully
749    /// written. Callers who pre-loaded an image into `dst` before calling this
750    /// function must now pass that image via `overlay.background` instead.
751    fn draw_decoded_masks(
752        &mut self,
753        dst: &mut TensorDyn,
754        detect: &[DetectBox],
755        segmentation: &[Segmentation],
756        overlay: MaskOverlay<'_>,
757    ) -> Result<()>;
758
759    /// Draw masks from proto data onto image (fused decode+draw).
760    ///
761    /// For YOLO segmentation models, this avoids materializing intermediate
762    /// `Array3<u8>` masks. The `ProtoData` contains mask coefficients and the
763    /// prototype tensor; the renderer computes `mask_coeff @ protos` directly
764    /// at the output resolution using bilinear sampling.
765    ///
766    /// `detect` and `proto_data.mask_coefficients` must have the same length
767    /// (enforced by zip — excess entries are silently ignored). An empty
768    /// `detect` slice is valid and produces the base frame — cleared or
769    /// background-blitted — via the selected backend's native primitive.
770    ///
771    /// # Format requirements and output contract
772    ///
773    /// Same as [`draw_decoded_masks`](Self::draw_decoded_masks), including
774    /// the "always fully writes dst" guarantee across all four
775    /// detection/background combinations.
776    ///
777    /// `overlay` controls compositing — see [`draw_decoded_masks`](Self::draw_decoded_masks).
778    fn draw_proto_masks(
779        &mut self,
780        dst: &mut TensorDyn,
781        detect: &[DetectBox],
782        proto_data: &ProtoData,
783        overlay: MaskOverlay<'_>,
784    ) -> Result<()>;
785
786    /// Sets the colors used for rendering segmentation masks. Up to 20 colors
787    /// can be set.
788    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()>;
789}
790
791/// Configuration for [`ImageProcessor`] construction.
792///
793/// Use with [`ImageProcessor::with_config`] to override the default EGL
794/// display auto-detection and backend selection. The default configuration
795/// preserves the existing auto-detection behaviour.
796#[derive(Debug, Clone, Default)]
797pub struct ImageProcessorConfig {
798    /// Force OpenGL to use this EGL display type instead of auto-detecting.
799    ///
800    /// When `None`, the processor probes displays in priority order: GBM,
801    /// PlatformDevice, Default. Use [`probe_egl_displays`] to discover
802    /// which displays are available on the current system.
803    ///
804    /// Ignored when `EDGEFIRST_DISABLE_GL=1` is set.
805    #[cfg(target_os = "linux")]
806    #[cfg(feature = "opengl")]
807    pub egl_display: Option<EglDisplayKind>,
808
809    /// Preferred compute backend.
810    ///
811    /// When set to a specific backend (not [`ComputeBackend::Auto`]), the
812    /// processor initializes that backend with no fallback — returns an error if the conversion is not supported.
813    /// This takes precedence over `EDGEFIRST_FORCE_BACKEND` and the
814    /// `EDGEFIRST_DISABLE_*` environment variables.
815    ///
816    /// - [`ComputeBackend::OpenGl`]: init OpenGL + CPU, skip G2D
817    /// - [`ComputeBackend::G2d`]: init G2D + CPU, skip OpenGL
818    /// - [`ComputeBackend::Cpu`]: init CPU only
819    /// - [`ComputeBackend::Auto`]: existing env-var-driven selection
820    pub backend: ComputeBackend,
821}
822
823/// Compute backend selection for [`ImageProcessor`].
824///
825/// Use with [`ImageProcessorConfig::backend`] to select which backend the
826/// processor should prefer. When a specific backend is selected, the
827/// processor initializes that backend plus CPU as a fallback. When `Auto`
828/// is used, the existing environment-variable-driven selection applies.
829#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
830pub enum ComputeBackend {
831    /// Auto-detect based on available hardware and environment variables.
832    #[default]
833    Auto,
834    /// CPU-only processing (no hardware acceleration).
835    Cpu,
836    /// Prefer G2D hardware blitter (+ CPU fallback).
837    G2d,
838    /// Prefer OpenGL ES (+ CPU fallback).
839    OpenGl,
840}
841
842/// Backend forced via the `EDGEFIRST_FORCE_BACKEND` environment variable
843/// or [`ImageProcessorConfig::backend`].
844///
845/// When set, the [`ImageProcessor`] only initializes and dispatches to the
846/// selected backend — no fallback chain is used.
847#[derive(Debug, Clone, Copy, PartialEq, Eq)]
848pub(crate) enum ForcedBackend {
849    Cpu,
850    G2d,
851    OpenGl,
852}
853
854/// Image converter that uses available hardware acceleration or CPU as a
855/// fallback.
856#[derive(Debug)]
857pub struct ImageProcessor {
858    /// CPU-based image converter as a fallback. This is only None if the
859    /// EDGEFIRST_DISABLE_CPU environment variable is set.
860    pub cpu: Option<CPUProcessor>,
861
862    #[cfg(target_os = "linux")]
863    /// G2D-based image converter for Linux systems. This is only available if
864    /// the EDGEFIRST_DISABLE_G2D environment variable is not set and libg2d.so
865    /// is available.
866    pub g2d: Option<G2DProcessor>,
867    #[cfg(target_os = "linux")]
868    #[cfg(feature = "opengl")]
869    /// OpenGL-based image converter for Linux systems. This is only available
870    /// if the EDGEFIRST_DISABLE_GL environment variable is not set and OpenGL
871    /// ES is available.
872    pub opengl: Option<GLProcessorThreaded>,
873    #[cfg(target_os = "macos")]
874    #[cfg(feature = "opengl")]
875    /// OpenGL-based image converter for macOS via ANGLE + IOSurface.
876    /// Available when ANGLE's libEGL.dylib can be loaded (see
877    /// README.md § macOS GPU Acceleration). Same field name as the
878    /// Linux variant so call sites can be written once.
879    pub opengl: Option<MacosGlProcessor>,
880
881    /// When set, only the specified backend is used — no fallback chain.
882    pub(crate) forced_backend: Option<ForcedBackend>,
883}
884
885unsafe impl Send for ImageProcessor {}
886unsafe impl Sync for ImageProcessor {}
887
888impl ImageProcessor {
889    /// Creates a new `ImageProcessor` instance, initializing available
890    /// hardware converters based on the system capabilities and environment
891    /// variables.
892    ///
893    /// # Examples
894    /// ```rust,no_run
895    /// # use edgefirst_image::{ImageProcessor, Rotation, Flip, Crop, ImageProcessorTrait};
896    /// # use edgefirst_codec::{peek_info, ImageDecoder, ImageLoad, DecodeOptions};
897    /// # use edgefirst_tensor::{PixelFormat, DType, Tensor, TensorMemory};
898    /// # fn main() -> Result<(), edgefirst_image::Error> {
899    /// let image = std::fs::read("zidane.jpg")?;
900    /// let opts = DecodeOptions::default().with_format(PixelFormat::Rgba);
901    /// let info = peek_info(&image, &opts).expect("peek");
902    /// let mut src = Tensor::<u8>::image(info.width, info.height, info.format,
903    ///                                    Some(TensorMemory::Mem))?;
904    /// let mut decoder = ImageDecoder::new();
905    /// src.load_image(&mut decoder, &image, &opts).expect("decode");
906    /// let mut converter = ImageProcessor::new()?;
907    /// let mut dst = converter.create_image(640, 480, PixelFormat::Rgb, DType::U8, None)?;
908    /// converter.convert(&src.into(), &mut dst, Rotation::None, Flip::None, Crop::default())?;
909    /// # Ok(())
910    /// # }
911    /// ```
912    pub fn new() -> Result<Self> {
913        Self::with_config(ImageProcessorConfig::default())
914    }
915
916    /// Creates a new `ImageProcessor` with the given configuration.
917    ///
918    /// When [`ImageProcessorConfig::backend`] is set to a specific backend,
919    /// environment variables are ignored and the processor initializes the
920    /// requested backend plus CPU as a fallback.
921    ///
922    /// When `Auto`, the existing `EDGEFIRST_FORCE_BACKEND` and
923    /// `EDGEFIRST_DISABLE_*` environment variables apply.
924    #[allow(unused_variables)]
925    pub fn with_config(config: ImageProcessorConfig) -> Result<Self> {
926        // ── Config-driven backend selection ──────────────────────────
927        // When the caller explicitly requests a backend via the config,
928        // skip all environment variable logic.
929        match config.backend {
930            ComputeBackend::Cpu => {
931                log::info!("ComputeBackend::Cpu — CPU only");
932                return Ok(Self {
933                    cpu: Some(CPUProcessor::new()),
934                    #[cfg(target_os = "linux")]
935                    g2d: None,
936                    #[cfg(target_os = "linux")]
937                    #[cfg(feature = "opengl")]
938                    opengl: None,
939                    #[cfg(target_os = "macos")]
940                    #[cfg(feature = "opengl")]
941                    opengl: None,
942                    forced_backend: None,
943                });
944            }
945            ComputeBackend::G2d => {
946                log::info!("ComputeBackend::G2d — G2D + CPU fallback");
947                #[cfg(target_os = "linux")]
948                {
949                    let g2d = match G2DProcessor::new() {
950                        Ok(g) => Some(g),
951                        Err(e) => {
952                            log::warn!("G2D requested but failed to initialize: {e:?}");
953                            None
954                        }
955                    };
956                    return Ok(Self {
957                        cpu: Some(CPUProcessor::new()),
958                        g2d,
959                        #[cfg(feature = "opengl")]
960                        opengl: None,
961                        forced_backend: None,
962                    });
963                }
964                #[cfg(not(target_os = "linux"))]
965                {
966                    log::warn!("G2D requested but not available on this platform, using CPU");
967                    return Ok(Self {
968                        cpu: Some(CPUProcessor::new()),
969                        #[cfg(target_os = "macos")]
970                        #[cfg(feature = "opengl")]
971                        opengl: None,
972                        forced_backend: None,
973                    });
974                }
975            }
976            ComputeBackend::OpenGl => {
977                log::info!("ComputeBackend::OpenGl — OpenGL + CPU fallback");
978                #[cfg(target_os = "linux")]
979                {
980                    #[cfg(feature = "opengl")]
981                    let opengl = match GLProcessorThreaded::new(config.egl_display) {
982                        Ok(gl) => Some(gl),
983                        Err(e) => {
984                            log::warn!("OpenGL requested but failed to initialize: {e:?}");
985                            None
986                        }
987                    };
988                    return Ok(Self {
989                        cpu: Some(CPUProcessor::new()),
990                        g2d: None,
991                        #[cfg(feature = "opengl")]
992                        opengl,
993                        forced_backend: None,
994                    });
995                }
996                #[cfg(target_os = "macos")]
997                {
998                    #[cfg(feature = "opengl")]
999                    let opengl = match MacosGlProcessor::new() {
1000                        Ok(gl) => Some(gl),
1001                        Err(e) => {
1002                            log::warn!(
1003                                "OpenGL requested on macOS but ANGLE init failed: {e:?} \
1004                                 (install ANGLE via `brew install startergo/angle/angle` \
1005                                 and re-sign the dylibs — see README.md § macOS GPU \
1006                                 Acceleration). Falling back to CPU."
1007                            );
1008                            None
1009                        }
1010                    };
1011                    return Ok(Self {
1012                        cpu: Some(CPUProcessor::new()),
1013                        #[cfg(feature = "opengl")]
1014                        opengl,
1015                        forced_backend: None,
1016                    });
1017                }
1018                #[cfg(not(any(target_os = "linux", target_os = "macos")))]
1019                {
1020                    log::warn!("OpenGL requested but not available on this platform, using CPU");
1021                    return Ok(Self {
1022                        cpu: Some(CPUProcessor::new()),
1023                        forced_backend: None,
1024                    });
1025                }
1026            }
1027            ComputeBackend::Auto => { /* fall through to env-var logic below */ }
1028        }
1029
1030        // ── EDGEFIRST_FORCE_BACKEND ──────────────────────────────────
1031        // When set, only the requested backend is initialised and no
1032        // fallback chain is used. Accepted values (case-insensitive):
1033        //   "cpu", "g2d", "opengl"
1034        if let Ok(val) = std::env::var("EDGEFIRST_FORCE_BACKEND") {
1035            let val_lower = val.to_lowercase();
1036            let forced = match val_lower.as_str() {
1037                "cpu" => ForcedBackend::Cpu,
1038                "g2d" => ForcedBackend::G2d,
1039                "opengl" => ForcedBackend::OpenGl,
1040                other => {
1041                    return Err(Error::ForcedBackendUnavailable(format!(
1042                        "unknown EDGEFIRST_FORCE_BACKEND value: {other:?} (expected cpu, g2d, or opengl)"
1043                    )));
1044                }
1045            };
1046
1047            log::info!("EDGEFIRST_FORCE_BACKEND={val} — only initializing {val_lower} backend");
1048
1049            return match forced {
1050                ForcedBackend::Cpu => Ok(Self {
1051                    cpu: Some(CPUProcessor::new()),
1052                    #[cfg(target_os = "linux")]
1053                    g2d: None,
1054                    #[cfg(target_os = "linux")]
1055                    #[cfg(feature = "opengl")]
1056                    opengl: None,
1057                    #[cfg(target_os = "macos")]
1058                    #[cfg(feature = "opengl")]
1059                    opengl: None,
1060                    forced_backend: Some(ForcedBackend::Cpu),
1061                }),
1062                ForcedBackend::G2d => {
1063                    #[cfg(target_os = "linux")]
1064                    {
1065                        let g2d = G2DProcessor::new().map_err(|e| {
1066                            Error::ForcedBackendUnavailable(format!(
1067                                "g2d forced but failed to initialize: {e:?}"
1068                            ))
1069                        })?;
1070                        Ok(Self {
1071                            cpu: None,
1072                            g2d: Some(g2d),
1073                            #[cfg(feature = "opengl")]
1074                            opengl: None,
1075                            forced_backend: Some(ForcedBackend::G2d),
1076                        })
1077                    }
1078                    #[cfg(not(target_os = "linux"))]
1079                    {
1080                        Err(Error::ForcedBackendUnavailable(
1081                            "g2d backend is only available on Linux".into(),
1082                        ))
1083                    }
1084                }
1085                ForcedBackend::OpenGl => {
1086                    #[cfg(target_os = "linux")]
1087                    #[cfg(feature = "opengl")]
1088                    {
1089                        let opengl = GLProcessorThreaded::new(config.egl_display).map_err(|e| {
1090                            Error::ForcedBackendUnavailable(format!(
1091                                "opengl forced but failed to initialize: {e:?}"
1092                            ))
1093                        })?;
1094                        Ok(Self {
1095                            cpu: None,
1096                            g2d: None,
1097                            opengl: Some(opengl),
1098                            forced_backend: Some(ForcedBackend::OpenGl),
1099                        })
1100                    }
1101                    #[cfg(target_os = "macos")]
1102                    #[cfg(feature = "opengl")]
1103                    {
1104                        let opengl = MacosGlProcessor::new().map_err(|e| {
1105                            Error::ForcedBackendUnavailable(format!(
1106                                "opengl forced on macOS but ANGLE init failed: {e:?}"
1107                            ))
1108                        })?;
1109                        Ok(Self {
1110                            cpu: None,
1111                            opengl: Some(opengl),
1112                            forced_backend: Some(ForcedBackend::OpenGl),
1113                        })
1114                    }
1115                    #[cfg(not(all(
1116                        any(target_os = "linux", target_os = "macos"),
1117                        feature = "opengl"
1118                    )))]
1119                    {
1120                        Err(Error::ForcedBackendUnavailable(
1121                            "opengl backend requires Linux or macOS with the 'opengl' feature \
1122                             enabled"
1123                                .into(),
1124                        ))
1125                    }
1126                }
1127            };
1128        }
1129
1130        // ── Existing DISABLE logic (unchanged) ──────────────────────
1131        #[cfg(target_os = "linux")]
1132        let g2d = if std::env::var("EDGEFIRST_DISABLE_G2D")
1133            .map(|x| x != "0" && x.to_lowercase() != "false")
1134            .unwrap_or(false)
1135        {
1136            log::debug!("EDGEFIRST_DISABLE_G2D is set");
1137            None
1138        } else {
1139            match G2DProcessor::new() {
1140                Ok(g2d_converter) => Some(g2d_converter),
1141                Err(err) => {
1142                    log::warn!("Failed to initialize G2D converter: {err:?}");
1143                    None
1144                }
1145            }
1146        };
1147
1148        #[cfg(target_os = "linux")]
1149        #[cfg(feature = "opengl")]
1150        let opengl = if std::env::var("EDGEFIRST_DISABLE_GL")
1151            .map(|x| x != "0" && x.to_lowercase() != "false")
1152            .unwrap_or(false)
1153        {
1154            log::debug!("EDGEFIRST_DISABLE_GL is set");
1155            None
1156        } else {
1157            match GLProcessorThreaded::new(config.egl_display) {
1158                Ok(gl_converter) => Some(gl_converter),
1159                Err(err) => {
1160                    log::warn!("Failed to initialize GL converter: {err:?}");
1161                    None
1162                }
1163            }
1164        };
1165
1166        #[cfg(target_os = "macos")]
1167        #[cfg(feature = "opengl")]
1168        let opengl = if std::env::var("EDGEFIRST_DISABLE_GL")
1169            .map(|x| x != "0" && x.to_lowercase() != "false")
1170            .unwrap_or(false)
1171        {
1172            log::debug!("EDGEFIRST_DISABLE_GL is set");
1173            None
1174        } else {
1175            match MacosGlProcessor::new() {
1176                Ok(gl_converter) => Some(gl_converter),
1177                Err(err) => {
1178                    log::debug!(
1179                        "macOS GL backend unavailable: {err:?} \
1180                         (CPU fallback will be used)"
1181                    );
1182                    None
1183                }
1184            }
1185        };
1186
1187        let cpu = if std::env::var("EDGEFIRST_DISABLE_CPU")
1188            .map(|x| x != "0" && x.to_lowercase() != "false")
1189            .unwrap_or(false)
1190        {
1191            log::debug!("EDGEFIRST_DISABLE_CPU is set");
1192            None
1193        } else {
1194            Some(CPUProcessor::new())
1195        };
1196        Ok(Self {
1197            cpu,
1198            #[cfg(target_os = "linux")]
1199            g2d,
1200            #[cfg(target_os = "linux")]
1201            #[cfg(feature = "opengl")]
1202            opengl,
1203            #[cfg(target_os = "macos")]
1204            #[cfg(feature = "opengl")]
1205            opengl,
1206            forced_backend: None,
1207        })
1208    }
1209
1210    /// Sets the interpolation mode for int8 proto textures on the OpenGL
1211    /// backend. No-op if OpenGL is not available.
1212    #[cfg(target_os = "linux")]
1213    #[cfg(feature = "opengl")]
1214    pub fn set_int8_interpolation_mode(&mut self, mode: Int8InterpolationMode) -> Result<()> {
1215        if let Some(ref mut gl) = self.opengl {
1216            gl.set_int8_interpolation_mode(mode)?;
1217        }
1218        Ok(())
1219    }
1220
1221    /// Create a [`TensorDyn`] image with the best available memory backend.
1222    ///
1223    /// Priority: DMA-buf → PBO (byte-sized types: u8, i8) → system memory.
1224    ///
1225    /// Use this method instead of [`TensorDyn::image()`] when the tensor will
1226    /// be used with [`ImageProcessor::convert()`]. It selects the optimal
1227    /// memory backing (including PBO for GPU zero-copy) which direct
1228    /// allocation cannot achieve.
1229    ///
1230    /// This method is on [`ImageProcessor`] rather than [`ImageProcessorTrait`]
1231    /// because optimal allocation requires knowledge of the active compute
1232    /// backends (e.g. the GL context handle for PBO allocation). Individual
1233    /// backend implementations ([`CPUProcessor`], etc.) do not have this
1234    /// cross-backend visibility.
1235    ///
1236    /// # Arguments
1237    ///
1238    /// * `width` - Image width in pixels
1239    /// * `height` - Image height in pixels
1240    /// * `format` - Pixel format
1241    /// * `dtype` - Element data type (e.g. `DType::U8`, `DType::I8`)
1242    /// * `memory` - Optional memory type override; when `None`, the best
1243    ///   available backend is selected automatically.
1244    ///
1245    /// # Returns
1246    ///
1247    /// A [`TensorDyn`] backed by the highest-performance memory type
1248    /// available on this system.
1249    ///
1250    /// # Pitch alignment for DMA-backed allocations
1251    ///
1252    /// DMA-BUF imports into the GL backend (Mali Valhall on i.MX 95
1253    /// specifically) require every row pitch to be a multiple of
1254    /// [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`] (currently 64). When this
1255    /// method lands on `TensorMemory::Dma`, the underlying allocation is
1256    /// silently padded so the row stride satisfies that requirement.
1257    ///
1258    /// **The user-requested `width` is preserved** — `tensor.width()`
1259    /// returns the same value you passed in. The padding is carried by
1260    /// [`TensorDyn::row_stride`] / `effective_row_stride()`, which the
1261    /// GL backend reads when importing the buffer as an EGLImage.
1262    /// Callers that compute byte offsets from the tensor must use the
1263    /// stride, not `width × bytes_per_pixel`; the CPU mapping spans the
1264    /// full `stride × height` bytes.
1265    ///
1266    /// Pre-aligned widths (640, 1280, 1920, 3008, 3840 …) allocate
1267    /// exactly `width × bpp × height` bytes with no padding. PBO and
1268    /// Mem fallbacks never pad — they don't go through EGLImage import.
1269    ///
1270    /// See also [`align_width_for_gpu_pitch`] for an advisory helper
1271    /// that external callers (GStreamer plugins, video pipelines) can
1272    /// use to size their own DMA-BUFs for GL compatibility.
1273    ///
1274    /// # Errors
1275    ///
1276    /// Returns an error if all allocation strategies fail.
1277    pub fn create_image(
1278        &self,
1279        width: usize,
1280        height: usize,
1281        format: PixelFormat,
1282        dtype: DType,
1283        memory: Option<TensorMemory>,
1284    ) -> Result<TensorDyn> {
1285        // Compute the GPU-aligned row stride in bytes for this image.
1286        // `None` means either the format has no defined primary-plane bpp
1287        // (unknown future layout) or the stride calculation would overflow
1288        // — in both cases we fall back to the natural layout via the plain
1289        // `TensorDyn::image` constructor, and the slow-path warning inside
1290        // `draw_*_masks` will fire if the subsequent GL import fails.
1291        //
1292        // DMA allocation is Linux-only (see `TensorMemory::Dma` cfg gate),
1293        // so both the stride computation and the helper closure are gated
1294        // accordingly — the callers below are already Linux-only.
1295        #[cfg(target_os = "linux")]
1296        let dma_stride_bytes: Option<usize> = primary_plane_bpp(format, dtype.size())
1297            .and_then(|bpp| width.checked_mul(bpp))
1298            .and_then(align_pitch_bytes_to_gpu_alignment);
1299
1300        // Helper: allocate a DMA image, using the padded-stride constructor
1301        // when the computed stride exceeds the natural pitch, otherwise the
1302        // plain constructor (byte-identical result in the common case).
1303        #[cfg(target_os = "linux")]
1304        let try_dma = || -> Result<TensorDyn> {
1305            // Stride padding is only meaningful for packed pixel layouts
1306            // (RGBA8, BGRA8, RGB888, Grey) — the formats the GL backend
1307            // renders into. Semi-planar (NV12, NV16) and planar (PlanarRgb,
1308            // PlanarRgba) tensors go through `TensorDyn::image(...)` with
1309            // their natural layout; they're imported from camera capture
1310            // via `from_fd` far more often than allocated here, and
1311            // `Tensor::image_with_stride` explicitly rejects them.
1312            let packed = format.layout() == edgefirst_tensor::PixelLayout::Packed;
1313            match dma_stride_bytes {
1314                Some(stride)
1315                    if packed
1316                        && primary_plane_bpp(format, dtype.size())
1317                            .and_then(|bpp| width.checked_mul(bpp))
1318                            .is_some_and(|natural| stride > natural) =>
1319                {
1320                    log::debug!(
1321                        "create_image: padding row stride for {format:?} {width}x{height} \
1322                         from natural pitch to {stride} bytes for GPU alignment"
1323                    );
1324                    Ok(TensorDyn::image_with_stride(
1325                        width,
1326                        height,
1327                        format,
1328                        dtype,
1329                        stride,
1330                        Some(edgefirst_tensor::TensorMemory::Dma),
1331                    )?)
1332                }
1333                _ => Ok(TensorDyn::image(
1334                    width,
1335                    height,
1336                    format,
1337                    dtype,
1338                    Some(edgefirst_tensor::TensorMemory::Dma),
1339                )?),
1340            }
1341        };
1342
1343        // If an explicit memory type is requested, honour it directly.
1344        // On Linux, `TensorMemory::Dma` gets the padded-stride treatment;
1345        // other memory types take the user-requested width verbatim.
1346        // On macOS, `TensorMemory::Dma` dispatches through `TensorDyn::image`
1347        // which selects the IOSurface allocation path (FourCC-formatted)
1348        // for image-mappable formats, or falls back to SHM/Mem otherwise.
1349        match memory {
1350            #[cfg(target_os = "linux")]
1351            Some(TensorMemory::Dma) => {
1352                return try_dma();
1353            }
1354            Some(mem) => {
1355                return Ok(TensorDyn::image(width, height, format, dtype, Some(mem))?);
1356            }
1357            None => {}
1358        }
1359
1360        // macOS: when the GL backend is active with the IOSurface
1361        // transfer path, prefer Dma (IOSurface) for zero-copy import.
1362        // The Tensor allocator falls through to SHM/Mem automatically
1363        // for formats without an IOSurface mapping (NV12, planar, etc.).
1364        #[cfg(target_os = "macos")]
1365        #[cfg(feature = "opengl")]
1366        if let Some(gl) = self.opengl.as_ref() {
1367            let _ = gl; // probe_transfer_backend lives behind the platform trait
1368            if let Ok(img) = TensorDyn::image(
1369                width,
1370                height,
1371                format,
1372                dtype,
1373                Some(edgefirst_tensor::TensorMemory::Dma),
1374            ) {
1375                return Ok(img);
1376            }
1377        }
1378
1379        // Try DMA first on Linux — skip only when GL has explicitly selected PBO
1380        // as the preferred transfer path (PBO is better than DMA in that case).
1381        #[cfg(target_os = "linux")]
1382        {
1383            #[cfg(feature = "opengl")]
1384            let gl_uses_pbo = self
1385                .opengl
1386                .as_ref()
1387                .is_some_and(|gl| gl.transfer_backend() == opengl_headless::TransferBackend::Pbo);
1388            #[cfg(not(feature = "opengl"))]
1389            let gl_uses_pbo = false;
1390
1391            if !gl_uses_pbo {
1392                if let Ok(img) = try_dma() {
1393                    return Ok(img);
1394                }
1395            }
1396        }
1397
1398        // Try PBO (if GL available).
1399        // PBO buffers are u8-sized; the int8 shader emulates i8 output via
1400        // XOR 0x80 on the same underlying buffer, so both U8 and I8 work.
1401        #[cfg(target_os = "linux")]
1402        #[cfg(feature = "opengl")]
1403        if dtype.size() == 1 {
1404            if let Some(gl) = &self.opengl {
1405                match gl.create_pbo_image(width, height, format) {
1406                    Ok(t) => {
1407                        if dtype == DType::I8 {
1408                            // SAFETY: Tensor<u8> and Tensor<i8> are layout-
1409                            // identical (same element size, no T-dependent
1410                            // drop glue). The int8 shader applies XOR 0x80
1411                            // on the same PBO buffer. Same rationale as
1412                            // gl::processor::tensor_i8_as_u8_mut.
1413                            // Invariant: PBO tensors never have chroma
1414                            // (create_pbo_image → Tensor::wrap sets it None).
1415                            debug_assert!(
1416                                t.chroma().is_none(),
1417                                "PBO i8 transmute requires chroma == None"
1418                            );
1419                            let t_i8: Tensor<i8> = unsafe { std::mem::transmute(t) };
1420                            return Ok(TensorDyn::from(t_i8));
1421                        }
1422                        return Ok(TensorDyn::from(t));
1423                    }
1424                    Err(e) => log::debug!("PBO image creation failed, falling back to Mem: {e:?}"),
1425                }
1426            }
1427        }
1428
1429        // Fallback to Mem
1430        Ok(TensorDyn::image(
1431            width,
1432            height,
1433            format,
1434            dtype,
1435            Some(edgefirst_tensor::TensorMemory::Mem),
1436        )?)
1437    }
1438
1439    /// Import an external DMA-BUF image.
1440    ///
1441    /// Each [`PlaneDescriptor`] owns an already-duped fd; this method
1442    /// consumes the descriptors and takes ownership of those fds (whether
1443    /// the call succeeds or fails).
1444    ///
1445    /// The caller must ensure the DMA-BUF allocation is large enough for the
1446    /// specified width, height, format, and any stride/offset on the plane
1447    /// descriptors. No buffer-size validation is performed; an undersized
1448    /// buffer may cause GPU faults or EGL import failure.
1449    ///
1450    /// # Arguments
1451    ///
1452    /// * `image` - Plane descriptor for the primary (or only) plane
1453    /// * `chroma` - Optional plane descriptor for the UV chroma plane
1454    ///   (required for multiplane NV12)
1455    /// * `width` - Image width in pixels
1456    /// * `height` - Image height in pixels
1457    /// * `format` - Pixel format of the buffer
1458    /// * `dtype` - Element data type (e.g. `DType::U8`)
1459    ///
1460    /// # Returns
1461    ///
1462    /// A `TensorDyn` configured as an image.
1463    ///
1464    /// # Errors
1465    ///
1466    /// * [`Error::NotSupported`] if `chroma` is `Some` for a non-semi-planar
1467    ///   format, or multiplane NV16 (not yet supported), or the fd is not
1468    ///   DMA-backed
1469    /// * [`Error::InvalidShape`] if NV12 height is odd
1470    ///
1471    /// # Platform
1472    ///
1473    /// Linux only.
1474    ///
1475    /// # Examples
1476    ///
1477    /// ```rust,ignore
1478    /// use edgefirst_tensor::PlaneDescriptor;
1479    ///
1480    /// // Single-plane RGBA
1481    /// let pd = PlaneDescriptor::new(fd.as_fd())?;
1482    /// let src = proc.import_image(pd, None, 1920, 1080, PixelFormat::Rgba, DType::U8)?;
1483    ///
1484    /// // Multi-plane NV12 with stride
1485    /// let y_pd = PlaneDescriptor::new(y_fd.as_fd())?.with_stride(2048);
1486    /// let uv_pd = PlaneDescriptor::new(uv_fd.as_fd())?.with_stride(2048);
1487    /// let src = proc.import_image(y_pd, Some(uv_pd), 1920, 1080,
1488    ///                             PixelFormat::Nv12, DType::U8)?;
1489    /// ```
1490    #[cfg(target_os = "linux")]
1491    pub fn import_image(
1492        &self,
1493        image: edgefirst_tensor::PlaneDescriptor,
1494        chroma: Option<edgefirst_tensor::PlaneDescriptor>,
1495        width: usize,
1496        height: usize,
1497        format: PixelFormat,
1498        dtype: DType,
1499    ) -> Result<TensorDyn> {
1500        use edgefirst_tensor::{Tensor, TensorMemory};
1501
1502        // Capture stride/offset from descriptors before consuming them
1503        let image_stride = image.stride();
1504        let image_offset = image.offset();
1505        let chroma_stride = chroma.as_ref().and_then(|c| c.stride());
1506        let chroma_offset = chroma.as_ref().and_then(|c| c.offset());
1507
1508        if let Some(chroma_pd) = chroma {
1509            // ── Multiplane path ──────────────────────────────────────
1510            // Multiplane tensors are backed by Tensor<u8> (or transmuted to
1511            // Tensor<i8>). Reject other dtypes to avoid silently returning a
1512            // tensor with the wrong element type.
1513            if dtype != DType::U8 && dtype != DType::I8 {
1514                return Err(Error::NotSupported(format!(
1515                    "multiplane import only supports U8/I8, got {dtype:?}"
1516                )));
1517            }
1518            if format.layout() != PixelLayout::SemiPlanar {
1519                return Err(Error::NotSupported(format!(
1520                    "import_image with chroma requires a semi-planar format, got {format:?}"
1521                )));
1522            }
1523
1524            let chroma_h = match format {
1525                PixelFormat::Nv12 => {
1526                    if !height.is_multiple_of(2) {
1527                        return Err(Error::InvalidShape(format!(
1528                            "NV12 requires even height, got {height}"
1529                        )));
1530                    }
1531                    height / 2
1532                }
1533                // NV16 multiplane will be supported in a future release;
1534                // the GL backend currently only handles NV12 plane1 attributes.
1535                PixelFormat::Nv16 => {
1536                    return Err(Error::NotSupported(
1537                        "multiplane NV16 is not yet supported; use contiguous NV16 instead".into(),
1538                    ))
1539                }
1540                _ => {
1541                    return Err(Error::NotSupported(format!(
1542                        "unsupported semi-planar format: {format:?}"
1543                    )))
1544                }
1545            };
1546
1547            let luma = Tensor::<u8>::from_fd(image.into_fd(), &[height, width], Some("luma"))?;
1548            if luma.memory() != TensorMemory::Dma {
1549                return Err(Error::NotSupported(format!(
1550                    "luma fd must be DMA-backed, got {:?}",
1551                    luma.memory()
1552                )));
1553            }
1554
1555            let chroma_tensor =
1556                Tensor::<u8>::from_fd(chroma_pd.into_fd(), &[chroma_h, width], Some("chroma"))?;
1557            if chroma_tensor.memory() != TensorMemory::Dma {
1558                return Err(Error::NotSupported(format!(
1559                    "chroma fd must be DMA-backed, got {:?}",
1560                    chroma_tensor.memory()
1561                )));
1562            }
1563
1564            // from_planes creates the combined tensor with format set,
1565            // preserving luma's row_stride (currently None since luma was raw).
1566            let mut tensor = Tensor::<u8>::from_planes(luma, chroma_tensor, format)?;
1567
1568            // Apply stride/offset to the combined tensor (luma plane)
1569            if let Some(s) = image_stride {
1570                tensor.set_row_stride(s)?;
1571            }
1572            if let Some(o) = image_offset {
1573                tensor.set_plane_offset(o);
1574            }
1575
1576            // Apply stride/offset to the chroma sub-tensor.
1577            // The chroma tensor is a raw 2D [chroma_h, width] tensor without
1578            // format metadata, so we validate stride manually rather than
1579            // using set_row_stride (which requires format).
1580            if let Some(chroma_ref) = tensor.chroma_mut() {
1581                if let Some(s) = chroma_stride {
1582                    if s < width {
1583                        return Err(Error::InvalidShape(format!(
1584                            "chroma stride {s} < minimum {width} for {format:?}"
1585                        )));
1586                    }
1587                    chroma_ref.set_row_stride_unchecked(s);
1588                }
1589                if let Some(o) = chroma_offset {
1590                    chroma_ref.set_plane_offset(o);
1591                }
1592            }
1593
1594            if dtype == DType::I8 {
1595                // SAFETY: Tensor<u8> and Tensor<i8> have identical layout because
1596                // the struct contains only type-erased storage (OwnedFd, shape, name),
1597                // no inline T values. This assertion catches layout drift at compile time.
1598                const {
1599                    assert!(std::mem::size_of::<Tensor<u8>>() == std::mem::size_of::<Tensor<i8>>());
1600                    assert!(
1601                        std::mem::align_of::<Tensor<u8>>() == std::mem::align_of::<Tensor<i8>>()
1602                    );
1603                }
1604                let tensor_i8: Tensor<i8> = unsafe { std::mem::transmute(tensor) };
1605                return Ok(TensorDyn::from(tensor_i8));
1606            }
1607            Ok(TensorDyn::from(tensor))
1608        } else {
1609            // ── Single-plane path ────────────────────────────────────
1610            let shape = match format.layout() {
1611                PixelLayout::Packed => vec![height, width, format.channels()],
1612                PixelLayout::Planar => vec![format.channels(), height, width],
1613                PixelLayout::SemiPlanar => {
1614                    let total_h = match format {
1615                        PixelFormat::Nv12 => {
1616                            if !height.is_multiple_of(2) {
1617                                return Err(Error::InvalidShape(format!(
1618                                    "NV12 requires even height, got {height}"
1619                                )));
1620                            }
1621                            height * 3 / 2
1622                        }
1623                        PixelFormat::Nv16 => height * 2,
1624                        _ => {
1625                            return Err(Error::InvalidShape(format!(
1626                                "unknown semi-planar height multiplier for {format:?}"
1627                            )))
1628                        }
1629                    };
1630                    vec![total_h, width]
1631                }
1632                _ => {
1633                    return Err(Error::NotSupported(format!(
1634                        "unsupported pixel layout for import_image: {:?}",
1635                        format.layout()
1636                    )));
1637                }
1638            };
1639            let tensor = TensorDyn::from_fd(image.into_fd(), &shape, dtype, None)?;
1640            if tensor.memory() != TensorMemory::Dma {
1641                return Err(Error::NotSupported(format!(
1642                    "import_image requires DMA-backed fd, got {:?}",
1643                    tensor.memory()
1644                )));
1645            }
1646            let mut tensor = tensor.with_format(format)?;
1647            if let Some(s) = image_stride {
1648                tensor.set_row_stride(s)?;
1649            }
1650            if let Some(o) = image_offset {
1651                tensor.set_plane_offset(o);
1652            }
1653            Ok(tensor)
1654        }
1655    }
1656
1657    /// Decode model outputs and draw segmentation masks onto `dst`.
1658    ///
1659    /// This is the primary mask rendering API. The processor decodes via the
1660    /// provided [`Decoder`], selects the optimal rendering path (hybrid
1661    /// CPU+GL or fused GPU), and composites masks onto `dst`.
1662    ///
1663    /// Returns the detected bounding boxes.
1664    pub fn draw_masks(
1665        &mut self,
1666        decoder: &edgefirst_decoder::Decoder,
1667        outputs: &[&TensorDyn],
1668        dst: &mut TensorDyn,
1669        overlay: MaskOverlay<'_>,
1670    ) -> Result<Vec<DetectBox>> {
1671        let mut output_boxes = Vec::with_capacity(100);
1672
1673        // Try proto path first (fused rendering without materializing masks)
1674        let proto_result = decoder
1675            .decode_proto(outputs, &mut output_boxes)
1676            .map_err(|e| Error::Internal(format!("decode_proto: {e:#?}")))?;
1677
1678        if let Some(proto_data) = proto_result {
1679            self.draw_proto_masks(dst, &output_boxes, &proto_data, overlay)?;
1680        } else {
1681            // Detection-only or unsupported model: full decode + render
1682            let mut output_masks = Vec::with_capacity(100);
1683            decoder
1684                .decode(outputs, &mut output_boxes, &mut output_masks)
1685                .map_err(|e| Error::Internal(format!("decode: {e:#?}")))?;
1686            self.draw_decoded_masks(dst, &output_boxes, &output_masks, overlay)?;
1687        }
1688        Ok(output_boxes)
1689    }
1690
1691    /// Decode tracked model outputs and draw segmentation masks onto `dst`.
1692    ///
1693    /// Like [`draw_masks`](Self::draw_masks) but integrates a tracker for
1694    /// maintaining object identities across frames. The tracker runs after
1695    /// NMS but before mask extraction.
1696    ///
1697    /// Returns detected boxes and track info.
1698    #[cfg(feature = "tracker")]
1699    pub fn draw_masks_tracked<TR: edgefirst_tracker::Tracker<DetectBox>>(
1700        &mut self,
1701        decoder: &edgefirst_decoder::Decoder,
1702        tracker: &mut TR,
1703        timestamp: u64,
1704        outputs: &[&TensorDyn],
1705        dst: &mut TensorDyn,
1706        overlay: MaskOverlay<'_>,
1707    ) -> Result<(Vec<DetectBox>, Vec<edgefirst_tracker::TrackInfo>)> {
1708        let mut output_boxes = Vec::with_capacity(100);
1709        let mut output_tracks = Vec::new();
1710
1711        let proto_result = decoder
1712            .decode_proto_tracked(
1713                tracker,
1714                timestamp,
1715                outputs,
1716                &mut output_boxes,
1717                &mut output_tracks,
1718            )
1719            .map_err(|e| Error::Internal(format!("decode_proto_tracked: {e:#?}")))?;
1720
1721        if let Some(proto_data) = proto_result {
1722            self.draw_proto_masks(dst, &output_boxes, &proto_data, overlay)?;
1723        } else {
1724            // Note: decode_proto_tracked returns None for detection-only/ModelPack
1725            // models WITHOUT calling the tracker. The else branch below is the
1726            // first (and only) tracker call for those model types.
1727            let mut output_masks = Vec::with_capacity(100);
1728            decoder
1729                .decode_tracked(
1730                    tracker,
1731                    timestamp,
1732                    outputs,
1733                    &mut output_boxes,
1734                    &mut output_masks,
1735                    &mut output_tracks,
1736                )
1737                .map_err(|e| Error::Internal(format!("decode_tracked: {e:#?}")))?;
1738            self.draw_decoded_masks(dst, &output_boxes, &output_masks, overlay)?;
1739        }
1740        Ok((output_boxes, output_tracks))
1741    }
1742
1743    /// Materialize per-instance segmentation masks from raw prototype data.
1744    ///
1745    /// Computes `mask_coeff @ protos` with sigmoid activation for each detection,
1746    /// producing compact masks at prototype resolution (e.g., 160×160 crops).
1747    /// Mask values are continuous sigmoid confidence outputs quantized to u8
1748    /// (0 = background, 255 = full confidence), NOT binary thresholded.
1749    ///
1750    /// The returned [`Vec<Segmentation>`] can be:
1751    /// - Inspected or exported for analytics, IoU computation, etc.
1752    /// - Passed directly to [`ImageProcessorTrait::draw_decoded_masks`] for
1753    ///   GPU-interpolated rendering.
1754    ///
1755    /// # Performance Note
1756    ///
1757    /// Calling `materialize_masks` + `draw_decoded_masks` separately prevents
1758    /// the HAL from using its internal fused optimization path. For render-only
1759    /// use cases, prefer [`ImageProcessorTrait::draw_proto_masks`] which selects
1760    /// the fastest path automatically (currently 1.6×–27× faster on tested
1761    /// platforms). Use this method when you need access to the intermediate masks.
1762    ///
1763    /// # Errors
1764    ///
1765    /// Returns [`Error::NoConverter`] if the CPU backend is not available.
1766    pub fn materialize_masks(
1767        &mut self,
1768        detect: &[DetectBox],
1769        proto_data: &ProtoData,
1770        letterbox: Option<[f32; 4]>,
1771        resolution: MaskResolution,
1772    ) -> Result<Vec<Segmentation>> {
1773        let cpu = self.cpu.as_mut().ok_or(Error::NoConverter)?;
1774        match resolution {
1775            MaskResolution::Proto => cpu.materialize_segmentations(detect, proto_data, letterbox),
1776            MaskResolution::Scaled { width, height } => {
1777                cpu.materialize_scaled_segmentations(detect, proto_data, letterbox, width, height)
1778            }
1779        }
1780    }
1781}
1782
1783impl ImageProcessorTrait for ImageProcessor {
1784    /// Converts the source image to the destination image format and size. The
1785    /// image is cropped first, then flipped, then rotated
1786    ///
1787    /// Prefer hardware accelerators when available, falling back to CPU if
1788    /// necessary.
1789    fn convert(
1790        &mut self,
1791        src: &TensorDyn,
1792        dst: &mut TensorDyn,
1793        rotation: Rotation,
1794        flip: Flip,
1795        crop: Crop,
1796    ) -> Result<()> {
1797        let start = Instant::now();
1798        let src_fmt = src.format();
1799        let dst_fmt = dst.format();
1800        let _span = tracing::trace_span!(
1801            "image.convert",
1802            ?src_fmt,
1803            ?dst_fmt,
1804            src_memory = ?src.memory(),
1805            dst_memory = ?dst.memory(),
1806            ?rotation,
1807            ?flip,
1808        )
1809        .entered();
1810        log::trace!(
1811            "convert: {src_fmt:?}({:?}/{:?}) → {dst_fmt:?}({:?}/{:?}), \
1812             rotation={rotation:?}, flip={flip:?}, backend={:?}",
1813            src.dtype(),
1814            src.memory(),
1815            dst.dtype(),
1816            dst.memory(),
1817            self.forced_backend,
1818        );
1819
1820        // ── Forced backend: no fallback chain ────────────────────────
1821        if let Some(forced) = self.forced_backend {
1822            return match forced {
1823                ForcedBackend::Cpu => {
1824                    if let Some(cpu) = self.cpu.as_mut() {
1825                        let r = cpu.convert(src, dst, rotation, flip, crop);
1826                        log::trace!(
1827                            "convert: forced=cpu result={} ({:?})",
1828                            if r.is_ok() { "ok" } else { "err" },
1829                            start.elapsed()
1830                        );
1831                        return r;
1832                    }
1833                    Err(Error::ForcedBackendUnavailable("cpu".into()))
1834                }
1835                ForcedBackend::G2d => {
1836                    #[cfg(target_os = "linux")]
1837                    if let Some(g2d) = self.g2d.as_mut() {
1838                        let r = g2d.convert(src, dst, rotation, flip, crop);
1839                        log::trace!(
1840                            "convert: forced=g2d result={} ({:?})",
1841                            if r.is_ok() { "ok" } else { "err" },
1842                            start.elapsed()
1843                        );
1844                        return r;
1845                    }
1846                    Err(Error::ForcedBackendUnavailable("g2d".into()))
1847                }
1848                ForcedBackend::OpenGl => {
1849                    #[cfg(any(target_os = "linux", target_os = "macos"))]
1850                    #[cfg(feature = "opengl")]
1851                    if let Some(opengl) = self.opengl.as_mut() {
1852                        let r = opengl.convert(src, dst, rotation, flip, crop);
1853                        log::trace!(
1854                            "convert: forced=opengl result={} ({:?})",
1855                            if r.is_ok() { "ok" } else { "err" },
1856                            start.elapsed()
1857                        );
1858                        return r;
1859                    }
1860                    Err(Error::ForcedBackendUnavailable("opengl".into()))
1861                }
1862            };
1863        }
1864
1865        // ── Auto fallback chain: OpenGL → G2D → CPU ──────────────────
1866        #[cfg(any(target_os = "linux", target_os = "macos"))]
1867        #[cfg(feature = "opengl")]
1868        if let Some(opengl) = self.opengl.as_mut() {
1869            match opengl.convert(src, dst, rotation, flip, crop) {
1870                Ok(_) => {
1871                    log::trace!(
1872                        "convert: auto selected=opengl for {src_fmt:?}→{dst_fmt:?} ({:?})",
1873                        start.elapsed()
1874                    );
1875                    return Ok(());
1876                }
1877                Err(e) => {
1878                    log::trace!("convert: auto opengl declined {src_fmt:?}→{dst_fmt:?}: {e}");
1879                }
1880            }
1881        }
1882
1883        #[cfg(target_os = "linux")]
1884        if let Some(g2d) = self.g2d.as_mut() {
1885            match g2d.convert(src, dst, rotation, flip, crop) {
1886                Ok(_) => {
1887                    log::trace!(
1888                        "convert: auto selected=g2d for {src_fmt:?}→{dst_fmt:?} ({:?})",
1889                        start.elapsed()
1890                    );
1891                    return Ok(());
1892                }
1893                Err(e) => {
1894                    log::trace!("convert: auto g2d declined {src_fmt:?}→{dst_fmt:?}: {e}");
1895                }
1896            }
1897        }
1898
1899        if let Some(cpu) = self.cpu.as_mut() {
1900            match cpu.convert(src, dst, rotation, flip, crop) {
1901                Ok(_) => {
1902                    log::trace!(
1903                        "convert: auto selected=cpu for {src_fmt:?}→{dst_fmt:?} ({:?})",
1904                        start.elapsed()
1905                    );
1906                    return Ok(());
1907                }
1908                Err(e) => {
1909                    log::trace!("convert: auto cpu failed {src_fmt:?}→{dst_fmt:?}: {e}");
1910                    return Err(e);
1911                }
1912            }
1913        }
1914        Err(Error::NoConverter)
1915    }
1916
1917    fn draw_decoded_masks(
1918        &mut self,
1919        dst: &mut TensorDyn,
1920        detect: &[DetectBox],
1921        segmentation: &[Segmentation],
1922        overlay: MaskOverlay<'_>,
1923    ) -> Result<()> {
1924        let _span = tracing::trace_span!(
1925            "image.draw_decoded_masks",
1926            n_detections = detect.len(),
1927            n_segmentations = segmentation.len(),
1928        )
1929        .entered();
1930        let start = Instant::now();
1931
1932        if let Some(bg) = overlay.background {
1933            if bg.aliases(dst) {
1934                return Err(Error::AliasedBuffers(
1935                    "background must not reference the same buffer as dst".to_string(),
1936                ));
1937            }
1938        }
1939
1940        // Un-letterbox detect boxes and segmentation bboxes for rendering when
1941        // a letterbox was applied to prepare the model input.
1942        let lb_boxes: Vec<DetectBox>;
1943        let lb_segs: Vec<Segmentation>;
1944        let (detect, segmentation) = if let Some(lb) = overlay.letterbox {
1945            lb_boxes = detect.iter().map(|&d| unletter_bbox(d, lb)).collect();
1946            // Keep segmentation bboxes in sync with the transformed detect boxes
1947            // when we have a 1:1 correspondence (instance segmentation).
1948            lb_segs = if segmentation.len() == lb_boxes.len() {
1949                segmentation
1950                    .iter()
1951                    .zip(lb_boxes.iter())
1952                    .map(|(s, d)| Segmentation {
1953                        xmin: d.bbox.xmin,
1954                        ymin: d.bbox.ymin,
1955                        xmax: d.bbox.xmax,
1956                        ymax: d.bbox.ymax,
1957                        segmentation: s.segmentation.clone(),
1958                    })
1959                    .collect()
1960            } else {
1961                segmentation.to_vec()
1962            };
1963            (lb_boxes.as_slice(), lb_segs.as_slice())
1964        } else {
1965            (detect, segmentation)
1966        };
1967        #[cfg(target_os = "linux")]
1968        let is_empty_frame = detect.is_empty() && segmentation.is_empty();
1969
1970        // ── Forced backend: no fallback chain ────────────────────────
1971        if let Some(forced) = self.forced_backend {
1972            return match forced {
1973                ForcedBackend::Cpu => {
1974                    if let Some(cpu) = self.cpu.as_mut() {
1975                        return cpu.draw_decoded_masks(dst, detect, segmentation, overlay);
1976                    }
1977                    Err(Error::ForcedBackendUnavailable("cpu".into()))
1978                }
1979                ForcedBackend::G2d => {
1980                    // G2D can only produce empty frames (clear / bg blit).
1981                    // For populated frames it has no rasterizer — fail loudly.
1982                    #[cfg(target_os = "linux")]
1983                    if let Some(g2d) = self.g2d.as_mut() {
1984                        return g2d.draw_decoded_masks(dst, detect, segmentation, overlay);
1985                    }
1986                    Err(Error::ForcedBackendUnavailable("g2d".into()))
1987                }
1988                ForcedBackend::OpenGl => {
1989                    // GL handles background natively via GPU blit, and now
1990                    // actively clears when there is no background.
1991                    #[cfg(target_os = "linux")]
1992                    #[cfg(feature = "opengl")]
1993                    if let Some(opengl) = self.opengl.as_mut() {
1994                        return opengl.draw_decoded_masks(dst, detect, segmentation, overlay);
1995                    }
1996                    Err(Error::ForcedBackendUnavailable("opengl".into()))
1997                }
1998            };
1999        }
2000
2001        // ── Auto dispatch ──────────────────────────────────────────
2002        // Empty frames prefer G2D when available — a single g2d_clear or
2003        // g2d_blit is the cheapest HW path to produce the correct output
2004        // and avoids spinning up the GL pipeline every zero-detection
2005        // frame in a triple-buffered display loop.
2006        #[cfg(target_os = "linux")]
2007        if is_empty_frame {
2008            if let Some(g2d) = self.g2d.as_mut() {
2009                match g2d.draw_decoded_masks(dst, detect, segmentation, overlay) {
2010                    Ok(_) => {
2011                        log::trace!(
2012                            "draw_decoded_masks empty frame via g2d in {:?}",
2013                            start.elapsed()
2014                        );
2015                        return Ok(());
2016                    }
2017                    Err(e) => log::trace!("g2d empty-frame path unavailable: {e:?}"),
2018                }
2019            }
2020        }
2021
2022        // Populated frames (or G2D unavailable): GL first, CPU fallback.
2023        // Both backends now own their own base-layer handling (bg blit
2024        // or clear), so we hand the overlay through untouched.
2025        #[cfg(target_os = "linux")]
2026        #[cfg(feature = "opengl")]
2027        if let Some(opengl) = self.opengl.as_mut() {
2028            log::trace!(
2029                "draw_decoded_masks started with opengl in {:?}",
2030                start.elapsed()
2031            );
2032            match opengl.draw_decoded_masks(dst, detect, segmentation, overlay) {
2033                Ok(_) => {
2034                    log::trace!("draw_decoded_masks with opengl in {:?}", start.elapsed());
2035                    return Ok(());
2036                }
2037                Err(e) => {
2038                    log::trace!("draw_decoded_masks didn't work with opengl: {e:?}")
2039                }
2040            }
2041        }
2042
2043        log::trace!(
2044            "draw_decoded_masks started with cpu in {:?}",
2045            start.elapsed()
2046        );
2047        if let Some(cpu) = self.cpu.as_mut() {
2048            match cpu.draw_decoded_masks(dst, detect, segmentation, overlay) {
2049                Ok(_) => {
2050                    log::trace!("draw_decoded_masks with cpu in {:?}", start.elapsed());
2051                    return Ok(());
2052                }
2053                Err(e) => {
2054                    log::trace!("draw_decoded_masks didn't work with cpu: {e:?}");
2055                    return Err(e);
2056                }
2057            }
2058        }
2059        Err(Error::NoConverter)
2060    }
2061
2062    fn draw_proto_masks(
2063        &mut self,
2064        dst: &mut TensorDyn,
2065        detect: &[DetectBox],
2066        proto_data: &ProtoData,
2067        overlay: MaskOverlay<'_>,
2068    ) -> Result<()> {
2069        let start = Instant::now();
2070
2071        if let Some(bg) = overlay.background {
2072            if bg.aliases(dst) {
2073                return Err(Error::AliasedBuffers(
2074                    "background must not reference the same buffer as dst".to_string(),
2075                ));
2076            }
2077        }
2078
2079        // Un-letterbox detect boxes for rendering when a letterbox was applied
2080        // to prepare the model input.  The original `detect` coords are still
2081        // passed to `materialize_segmentations` (which needs model-space coords
2082        // to correctly crop the proto tensor) alongside `overlay.letterbox` so
2083        // it can emit `Segmentation` structs in output-image space.
2084        let lb_boxes: Vec<DetectBox>;
2085        let render_detect = if let Some(lb) = overlay.letterbox {
2086            lb_boxes = detect.iter().map(|&d| unletter_bbox(d, lb)).collect();
2087            lb_boxes.as_slice()
2088        } else {
2089            detect
2090        };
2091        #[cfg(target_os = "linux")]
2092        let is_empty_frame = detect.is_empty();
2093
2094        // ── Forced backend: no fallback chain ────────────────────────
2095        if let Some(forced) = self.forced_backend {
2096            return match forced {
2097                ForcedBackend::Cpu => {
2098                    if let Some(cpu) = self.cpu.as_mut() {
2099                        return cpu.draw_proto_masks(dst, render_detect, proto_data, overlay);
2100                    }
2101                    Err(Error::ForcedBackendUnavailable("cpu".into()))
2102                }
2103                ForcedBackend::G2d => {
2104                    #[cfg(target_os = "linux")]
2105                    if let Some(g2d) = self.g2d.as_mut() {
2106                        return g2d.draw_proto_masks(dst, render_detect, proto_data, overlay);
2107                    }
2108                    Err(Error::ForcedBackendUnavailable("g2d".into()))
2109                }
2110                ForcedBackend::OpenGl => {
2111                    #[cfg(target_os = "linux")]
2112                    #[cfg(feature = "opengl")]
2113                    if let Some(opengl) = self.opengl.as_mut() {
2114                        return opengl.draw_proto_masks(dst, render_detect, proto_data, overlay);
2115                    }
2116                    Err(Error::ForcedBackendUnavailable("opengl".into()))
2117                }
2118            };
2119        }
2120
2121        // ── Auto dispatch ──────────────────────────────────────────
2122        // Empty frames: prefer G2D — cheapest HW path (clear or bg blit).
2123        #[cfg(target_os = "linux")]
2124        if is_empty_frame {
2125            if let Some(g2d) = self.g2d.as_mut() {
2126                match g2d.draw_proto_masks(dst, render_detect, proto_data, overlay) {
2127                    Ok(_) => {
2128                        log::trace!(
2129                            "draw_proto_masks empty frame via g2d in {:?}",
2130                            start.elapsed()
2131                        );
2132                        return Ok(());
2133                    }
2134                    Err(e) => log::trace!("g2d empty-frame path unavailable: {e:?}"),
2135                }
2136            }
2137        }
2138
2139        // Hybrid path: CPU materialize + GL overlay (benchmarked faster than
2140        // full-GPU draw_proto_masks on all tested platforms: 27× on imx8mp,
2141        // 4× on imx95, 2.5× on rpi5, 1.6× on x86).
2142        // GL owns its own bg-blit / glClear — we pass the overlay through.
2143        //
2144        // CPU materialize needs `&mut` for its MaskScratch buffers; GL also
2145        // needs `&mut`. The CPU borrow is scoped to its block so the
2146        // subsequent GL borrow is free to take over `self`.
2147        #[cfg(target_os = "linux")]
2148        #[cfg(feature = "opengl")]
2149        if let (Some(_), Some(_)) = (self.cpu.as_ref(), self.opengl.as_ref()) {
2150            let segmentation = match self.cpu.as_mut() {
2151                Some(cpu) => {
2152                    log::trace!(
2153                        "draw_proto_masks started with hybrid (cpu+opengl) in {:?}",
2154                        start.elapsed()
2155                    );
2156                    cpu.materialize_segmentations(detect, proto_data, overlay.letterbox)?
2157                }
2158                None => unreachable!("cpu presence checked above"),
2159            };
2160            if let Some(opengl) = self.opengl.as_mut() {
2161                match opengl.draw_decoded_masks(dst, render_detect, &segmentation, overlay) {
2162                    Ok(_) => {
2163                        log::trace!(
2164                            "draw_proto_masks with hybrid (cpu+opengl) in {:?}",
2165                            start.elapsed()
2166                        );
2167                        return Ok(());
2168                    }
2169                    Err(e) => {
2170                        log::trace!(
2171                            "draw_proto_masks hybrid path failed, falling back to cpu: {e:?}"
2172                        );
2173                    }
2174                }
2175            }
2176        }
2177
2178        let Some(cpu) = self.cpu.as_mut() else {
2179            return Err(Error::Internal(
2180                "draw_proto_masks requires CPU backend for fallback path".into(),
2181            ));
2182        };
2183        log::trace!("draw_proto_masks started with cpu in {:?}", start.elapsed());
2184        cpu.draw_proto_masks(dst, render_detect, proto_data, overlay)
2185    }
2186
2187    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()> {
2188        let start = Instant::now();
2189
2190        // ── Forced backend: no fallback chain ────────────────────────
2191        if let Some(forced) = self.forced_backend {
2192            return match forced {
2193                ForcedBackend::Cpu => {
2194                    if let Some(cpu) = self.cpu.as_mut() {
2195                        return cpu.set_class_colors(colors);
2196                    }
2197                    Err(Error::ForcedBackendUnavailable("cpu".into()))
2198                }
2199                ForcedBackend::G2d => Err(Error::NotSupported(
2200                    "g2d does not support set_class_colors".into(),
2201                )),
2202                ForcedBackend::OpenGl => {
2203                    #[cfg(target_os = "linux")]
2204                    #[cfg(feature = "opengl")]
2205                    if let Some(opengl) = self.opengl.as_mut() {
2206                        return opengl.set_class_colors(colors);
2207                    }
2208                    Err(Error::ForcedBackendUnavailable("opengl".into()))
2209                }
2210            };
2211        }
2212
2213        // skip G2D as it doesn't support rendering to image
2214
2215        #[cfg(target_os = "linux")]
2216        #[cfg(feature = "opengl")]
2217        if let Some(opengl) = self.opengl.as_mut() {
2218            log::trace!("image started with opengl in {:?}", start.elapsed());
2219            match opengl.set_class_colors(colors) {
2220                Ok(_) => {
2221                    log::trace!("colors set with opengl in {:?}", start.elapsed());
2222                    return Ok(());
2223                }
2224                Err(e) => {
2225                    log::trace!("colors didn't set with opengl: {e:?}")
2226                }
2227            }
2228        }
2229        log::trace!("image started with cpu in {:?}", start.elapsed());
2230        if let Some(cpu) = self.cpu.as_mut() {
2231            match cpu.set_class_colors(colors) {
2232                Ok(_) => {
2233                    log::trace!("colors set with cpu in {:?}", start.elapsed());
2234                    return Ok(());
2235                }
2236                Err(e) => {
2237                    log::trace!("colors didn't set with cpu: {e:?}");
2238                    return Err(e);
2239                }
2240            }
2241        }
2242        Err(Error::NoConverter)
2243    }
2244}
2245
2246// ---------------------------------------------------------------------------
2247// Image loading / saving helpers
2248// ---------------------------------------------------------------------------
2249
2250/// Test-only convenience helper that peeks the image header, allocates a
2251/// tensor sized to the image (honoring DMA pitch padding on Linux when
2252/// requested), and decodes via [`edgefirst_codec`]. Mirrors the semantics of
2253/// the removed public `load_image` API for test sites; production callers
2254/// should use the explicit peek → allocate → decode pattern directly.
2255#[cfg(test)]
2256pub(crate) fn load_image_test_helper(
2257    image: &[u8],
2258    format: Option<PixelFormat>,
2259    memory: Option<TensorMemory>,
2260) -> Result<TensorDyn> {
2261    use edgefirst_codec::{peek_info, DecodeOptions, ImageDecoder, ImageLoad};
2262
2263    let opts = match format {
2264        Some(f) => DecodeOptions::default().with_format(f),
2265        None => DecodeOptions::default(),
2266    };
2267    let info = peek_info(image, &opts)?;
2268    let dest_fmt = info.format;
2269    let w = info.width;
2270    let h = info.height;
2271
2272    let mut decoder = ImageDecoder::new();
2273
2274    #[cfg(target_os = "linux")]
2275    if let Some(aligned_pitch) = padded_dma_pitch_for(dest_fmt, w, &memory) {
2276        let mut dma = Tensor::<u8>::image_with_stride(
2277            w,
2278            h,
2279            dest_fmt,
2280            aligned_pitch,
2281            Some(TensorMemory::Dma),
2282        )?;
2283        dma.load_image(&mut decoder, image, &opts)?;
2284        return Ok(TensorDyn::from(dma));
2285    }
2286
2287    let mut img = Tensor::<u8>::image(w, h, dest_fmt, memory)?;
2288    img.load_image(&mut decoder, image, &opts)?;
2289    Ok(TensorDyn::from(img))
2290}
2291
2292/// Save a [`TensorDyn`] image as a JPEG file.
2293///
2294/// Only packed RGB and RGBA formats are supported.
2295pub fn save_jpeg(tensor: &TensorDyn, path: impl AsRef<std::path::Path>, quality: u8) -> Result<()> {
2296    let t = tensor.as_u8().ok_or(Error::UnsupportedFormat(
2297        "save_jpeg requires u8 tensor".to_string(),
2298    ))?;
2299    let fmt = t.format().ok_or(Error::NotAnImage)?;
2300    if fmt.layout() != PixelLayout::Packed {
2301        return Err(Error::NotImplemented(
2302            "Saving planar images is not supported".to_string(),
2303        ));
2304    }
2305
2306    let colour = match fmt {
2307        PixelFormat::Rgb => jpeg_encoder::ColorType::Rgb,
2308        PixelFormat::Rgba => jpeg_encoder::ColorType::Rgba,
2309        _ => {
2310            return Err(Error::NotImplemented(
2311                "Unsupported image format for saving".to_string(),
2312            ));
2313        }
2314    };
2315
2316    let w = t.width().ok_or(Error::NotAnImage)?;
2317    let h = t.height().ok_or(Error::NotAnImage)?;
2318    let encoder = jpeg_encoder::Encoder::new_file(path, quality)?;
2319    let tensor_map = t.map()?;
2320
2321    encoder.encode(&tensor_map, w as u16, h as u16, colour)?;
2322
2323    Ok(())
2324}
2325
2326pub(crate) struct FunctionTimer<T: Display> {
2327    name: T,
2328    start: std::time::Instant,
2329}
2330
2331impl<T: Display> FunctionTimer<T> {
2332    pub fn new(name: T) -> Self {
2333        Self {
2334            name,
2335            start: std::time::Instant::now(),
2336        }
2337    }
2338}
2339
2340impl<T: Display> Drop for FunctionTimer<T> {
2341    fn drop(&mut self) {
2342        log::trace!("{} elapsed: {:?}", self.name, self.start.elapsed())
2343    }
2344}
2345
2346const DEFAULT_COLORS: [[f32; 4]; 20] = [
2347    [0., 1., 0., 0.7],
2348    [1., 0.5568628, 0., 0.7],
2349    [0.25882353, 0.15294118, 0.13333333, 0.7],
2350    [0.8, 0.7647059, 0.78039216, 0.7],
2351    [0.3137255, 0.3137255, 0.3137255, 0.7],
2352    [0.1411765, 0.3098039, 0.1215686, 0.7],
2353    [1., 0.95686275, 0.5137255, 0.7],
2354    [0.3529412, 0.32156863, 0., 0.7],
2355    [0.4235294, 0.6235294, 0.6509804, 0.7],
2356    [0.5098039, 0.5098039, 0.7294118, 0.7],
2357    [0.00784314, 0.18823529, 0.29411765, 0.7],
2358    [0.0, 0.2706, 1.0, 0.7],
2359    [0.0, 0.0, 0.0, 0.7],
2360    [0.0, 0.5, 0.0, 0.7],
2361    [1.0, 0.0, 0.0, 0.7],
2362    [0.0, 0.0, 1.0, 0.7],
2363    [1.0, 0.5, 0.5, 0.7],
2364    [0.1333, 0.5451, 0.1333, 0.7],
2365    [0.1176, 0.4118, 0.8235, 0.7],
2366    [1., 1., 1., 0.7],
2367];
2368
2369const fn denorm<const M: usize, const N: usize>(a: [[f32; M]; N]) -> [[u8; M]; N] {
2370    let mut result = [[0; M]; N];
2371    let mut i = 0;
2372    while i < N {
2373        let mut j = 0;
2374        while j < M {
2375            result[i][j] = (a[i][j] * 255.0).round() as u8;
2376            j += 1;
2377        }
2378        i += 1;
2379    }
2380    result
2381}
2382
2383const DEFAULT_COLORS_U8: [[u8; 4]; 20] = denorm(DEFAULT_COLORS);
2384
2385#[cfg(test)]
2386#[cfg_attr(coverage_nightly, coverage(off))]
2387mod alignment_tests {
2388    use super::*;
2389
2390    #[test]
2391    fn align_width_rgba8_common_widths() {
2392        // RGBA8 (bpp=4, lcm(64,4)=64, so width must round to multiple of 16 px).
2393        assert_eq!(align_width_for_gpu_pitch(640, 4), 640); // 2560 byte pitch — already aligned
2394        assert_eq!(align_width_for_gpu_pitch(1280, 4), 1280); // 5120
2395        assert_eq!(align_width_for_gpu_pitch(1920, 4), 1920); // 7680
2396        assert_eq!(align_width_for_gpu_pitch(3840, 4), 3840); // 15360
2397                                                              // crowd.png case from the imx95 investigation:
2398        assert_eq!(align_width_for_gpu_pitch(3004, 4), 3008); // 12016 → 12032
2399        assert_eq!(align_width_for_gpu_pitch(3000, 4), 3008); // 12000 → 12032
2400        assert_eq!(align_width_for_gpu_pitch(17, 4), 32); // 68 → 128
2401        assert_eq!(align_width_for_gpu_pitch(1, 4), 16); // 4 → 64
2402    }
2403
2404    #[test]
2405    fn align_width_rgb888_packed() {
2406        // RGB888 (bpp=3, lcm(64,3)=192, so width must round to multiple of 64 px).
2407        assert_eq!(align_width_for_gpu_pitch(64, 3), 64); // 192 byte pitch
2408        assert_eq!(align_width_for_gpu_pitch(640, 3), 640); // 1920
2409        assert_eq!(align_width_for_gpu_pitch(1, 3), 64); // 3 → 192
2410        assert_eq!(align_width_for_gpu_pitch(65, 3), 128); // 195 → 384
2411                                                           // Verify the rounded width × bpp is a clean multiple of the LCM.
2412        for w in [3004usize, 1281, 100, 17] {
2413            let padded = align_width_for_gpu_pitch(w, 3);
2414            assert!(padded >= w);
2415            assert_eq!((padded * 3) % 64, 0);
2416            assert_eq!((padded * 3) % 3, 0);
2417        }
2418    }
2419
2420    #[test]
2421    fn align_width_grey_u8() {
2422        // Grey (bpp=1, lcm(64,1)=64, so width must round to multiple of 64 px).
2423        assert_eq!(align_width_for_gpu_pitch(64, 1), 64);
2424        assert_eq!(align_width_for_gpu_pitch(640, 1), 640);
2425        assert_eq!(align_width_for_gpu_pitch(1, 1), 64);
2426        assert_eq!(align_width_for_gpu_pitch(65, 1), 128);
2427    }
2428
2429    #[test]
2430    fn align_width_zero_inputs() {
2431        assert_eq!(align_width_for_gpu_pitch(0, 4), 0);
2432        assert_eq!(align_width_for_gpu_pitch(640, 0), 640);
2433    }
2434
2435    #[test]
2436    fn align_width_never_returns_smaller_than_input() {
2437        // Spot-check the "returned width >= input width" contract across a
2438        // range of values that would previously have hit `width * bpp`
2439        // overflow paths.
2440        for &bpp in &[1usize, 2, 3, 4, 8] {
2441            for &w in &[
2442                1usize,
2443                17,
2444                64,
2445                65,
2446                100,
2447                1280,
2448                1281,
2449                1920,
2450                3004,
2451                3072,
2452                3840,
2453                usize::MAX / 8,
2454                usize::MAX / 4,
2455                usize::MAX / 2,
2456                usize::MAX - 1,
2457                usize::MAX,
2458            ] {
2459                let aligned = align_width_for_gpu_pitch(w, bpp);
2460                assert!(
2461                    aligned >= w,
2462                    "align_width_for_gpu_pitch({w}, {bpp}) = {aligned} < {w}"
2463                );
2464            }
2465        }
2466    }
2467
2468    #[test]
2469    fn align_width_overflow_returns_unaligned_not_smaller() {
2470        // For width values close to usize::MAX, padding up would wrap. The
2471        // function must return the original width rather than wrapping or
2472        // panicking. A pre-aligned width round-trips unchanged even at the
2473        // extreme.
2474        let aligned_extreme = usize::MAX - 15; // 16-pixel boundary for RGBA8
2475        assert_eq!(
2476            align_width_for_gpu_pitch(aligned_extreme, 4),
2477            aligned_extreme
2478        );
2479        // A misaligned extreme value cannot be rounded up — the function
2480        // returns the original.
2481        let misaligned_extreme = usize::MAX - 1;
2482        let result = align_width_for_gpu_pitch(misaligned_extreme, 4);
2483        assert!(
2484            result == misaligned_extreme || result >= misaligned_extreme,
2485            "extreme misaligned width must not be rounded down to {result}"
2486        );
2487    }
2488
2489    #[test]
2490    fn checked_lcm_basic_and_overflow() {
2491        assert_eq!(checked_num_integer_lcm(64, 4), Some(64));
2492        assert_eq!(checked_num_integer_lcm(64, 3), Some(192));
2493        assert_eq!(checked_num_integer_lcm(64, 1), Some(64));
2494        assert_eq!(checked_num_integer_lcm(0, 4), Some(0));
2495        assert_eq!(checked_num_integer_lcm(64, 0), Some(0));
2496        // Coprime values whose product exceeds usize::MAX must return None.
2497        assert_eq!(
2498            checked_num_integer_lcm(usize::MAX, usize::MAX - 1),
2499            None,
2500            "coprime extreme values must overflow detect, not panic"
2501        );
2502    }
2503
2504    #[test]
2505    fn primary_plane_bpp_known_formats() {
2506        // Packed formats use channels × elem_size.
2507        assert_eq!(primary_plane_bpp(PixelFormat::Rgba, 1), Some(4));
2508        assert_eq!(primary_plane_bpp(PixelFormat::Bgra, 1), Some(4));
2509        assert_eq!(primary_plane_bpp(PixelFormat::Rgb, 1), Some(3));
2510        assert_eq!(primary_plane_bpp(PixelFormat::Grey, 1), Some(1));
2511        // Semi-planar (NV12) reports the luma plane's bpp.
2512        assert_eq!(primary_plane_bpp(PixelFormat::Nv12, 1), Some(1));
2513    }
2514}
2515
2516#[cfg(test)]
2517#[cfg_attr(coverage_nightly, coverage(off))]
2518#[allow(deprecated)]
2519mod image_tests {
2520    use super::*;
2521    use crate::{CPUProcessor, Rotation};
2522    #[cfg(target_os = "linux")]
2523    use edgefirst_tensor::is_dma_available;
2524    use edgefirst_tensor::{TensorMapTrait, TensorMemory, TensorTrait};
2525    use image::buffer::ConvertBuffer;
2526
2527    /// Test helper: call `ImageProcessorTrait::convert()` on two `TensorDyn`s
2528    /// by going through the `TensorDyn` API.
2529    ///
2530    /// Returns the `(src_image, dst_image)` reconstructed from the TensorDyn
2531    /// round-trip so the caller can feed them to `compare_images` etc.
2532    fn convert_img(
2533        proc: &mut dyn ImageProcessorTrait,
2534        src: TensorDyn,
2535        dst: TensorDyn,
2536        rotation: Rotation,
2537        flip: Flip,
2538        crop: Crop,
2539    ) -> (Result<()>, TensorDyn, TensorDyn) {
2540        let src_fourcc = src.format().unwrap();
2541        let dst_fourcc = dst.format().unwrap();
2542        let src_dyn = src;
2543        let mut dst_dyn = dst;
2544        let result = proc.convert(&src_dyn, &mut dst_dyn, rotation, flip, crop);
2545        let src_back = {
2546            let mut __t = src_dyn.into_u8().unwrap();
2547            __t.set_format(src_fourcc).unwrap();
2548            TensorDyn::from(__t)
2549        };
2550        let dst_back = {
2551            let mut __t = dst_dyn.into_u8().unwrap();
2552            __t.set_format(dst_fourcc).unwrap();
2553            TensorDyn::from(__t)
2554        };
2555        (result, src_back, dst_back)
2556    }
2557
2558    #[ctor::ctor]
2559    fn init() {
2560        env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
2561    }
2562
2563    macro_rules! function {
2564        () => {{
2565            fn f() {}
2566            fn type_name_of<T>(_: T) -> &'static str {
2567                std::any::type_name::<T>()
2568            }
2569            let name = type_name_of(f);
2570
2571            // Find and cut the rest of the path
2572            match &name[..name.len() - 3].rfind(':') {
2573                Some(pos) => &name[pos + 1..name.len() - 3],
2574                None => &name[..name.len() - 3],
2575            }
2576        }};
2577    }
2578
2579    #[test]
2580    fn test_invalid_crop() {
2581        let src = TensorDyn::image(100, 100, PixelFormat::Rgb, DType::U8, None).unwrap();
2582        let dst = TensorDyn::image(100, 100, PixelFormat::Rgb, DType::U8, None).unwrap();
2583
2584        let crop = Crop::new()
2585            .with_src_rect(Some(Rect::new(50, 50, 60, 60)))
2586            .with_dst_rect(Some(Rect::new(0, 0, 150, 150)));
2587
2588        let result = crop.check_crop_dyn(&src, &dst);
2589        assert!(matches!(
2590            result,
2591            Err(Error::CropInvalid(e)) if e.starts_with("Dest and Src crop invalid")
2592        ));
2593
2594        let crop = crop.with_src_rect(Some(Rect::new(0, 0, 10, 10)));
2595        let result = crop.check_crop_dyn(&src, &dst);
2596        assert!(matches!(
2597            result,
2598            Err(Error::CropInvalid(e)) if e.starts_with("Dest crop invalid")
2599        ));
2600
2601        let crop = crop
2602            .with_src_rect(Some(Rect::new(50, 50, 60, 60)))
2603            .with_dst_rect(Some(Rect::new(0, 0, 50, 50)));
2604        let result = crop.check_crop_dyn(&src, &dst);
2605        assert!(matches!(
2606            result,
2607            Err(Error::CropInvalid(e)) if e.starts_with("Src crop invalid")
2608        ));
2609
2610        let crop = crop.with_src_rect(Some(Rect::new(50, 50, 50, 50)));
2611
2612        let result = crop.check_crop_dyn(&src, &dst);
2613        assert!(result.is_ok());
2614    }
2615
2616    #[test]
2617    fn test_invalid_tensor_format() -> Result<(), Error> {
2618        // 4D tensor cannot be set to a 3-channel pixel format
2619        let mut tensor = Tensor::<u8>::new(&[720, 1280, 4, 1], None, None)?;
2620        let result = tensor.set_format(PixelFormat::Rgb);
2621        assert!(result.is_err(), "4D tensor should reject set_format");
2622
2623        // Tensor with wrong channel count for the format
2624        let mut tensor = Tensor::<u8>::new(&[720, 1280, 4], None, None)?;
2625        let result = tensor.set_format(PixelFormat::Rgb);
2626        assert!(result.is_err(), "4-channel tensor should reject RGB format");
2627
2628        Ok(())
2629    }
2630
2631    #[test]
2632    fn test_invalid_image_file() -> Result<(), Error> {
2633        let result = crate::load_image_test_helper(&[123; 5000], None, None);
2634        assert!(
2635            matches!(result, Err(Error::Codec(_))),
2636            "unrecognised bytes should surface as Error::Codec, got {result:?}"
2637        );
2638        Ok(())
2639    }
2640
2641    #[test]
2642    fn test_invalid_jpeg_format() -> Result<(), Error> {
2643        let result = crate::load_image_test_helper(&[123; 5000], Some(PixelFormat::Yuyv), None);
2644        // YUYV is not a valid decode target; peek_info fails before the magic-
2645        // bytes check, so the precise variant depends on which error fires first.
2646        assert!(
2647            matches!(result, Err(Error::Codec(_))),
2648            "Yuyv target with garbage bytes should surface as Error::Codec, got {result:?}"
2649        );
2650        Ok(())
2651    }
2652
2653    #[test]
2654    fn test_load_resize_save() {
2655        let file = edgefirst_bench::testdata::read("zidane.jpg");
2656        let img = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
2657        assert_eq!(img.width(), Some(1280));
2658        assert_eq!(img.height(), Some(720));
2659
2660        let dst = TensorDyn::image(640, 360, PixelFormat::Rgba, DType::U8, None).unwrap();
2661        let mut converter = CPUProcessor::new();
2662        let (result, _img, dst) = convert_img(
2663            &mut converter,
2664            img,
2665            dst,
2666            Rotation::None,
2667            Flip::None,
2668            Crop::no_crop(),
2669        );
2670        result.unwrap();
2671        assert_eq!(dst.width(), Some(640));
2672        assert_eq!(dst.height(), Some(360));
2673
2674        crate::save_jpeg(&dst, "zidane_resized.jpg", 80).unwrap();
2675
2676        let file = std::fs::read("zidane_resized.jpg").unwrap();
2677        let img = crate::load_image_test_helper(&file, None, None).unwrap();
2678        assert_eq!(img.width(), Some(640));
2679        assert_eq!(img.height(), Some(360));
2680        assert_eq!(img.format().unwrap(), PixelFormat::Rgb);
2681    }
2682
2683    #[test]
2684    fn test_from_tensor_planar() -> Result<(), Error> {
2685        let mut tensor = Tensor::new(&[3, 720, 1280], None, None)?;
2686        tensor
2687            .map()?
2688            .copy_from_slice(&edgefirst_bench::testdata::read("camera720p.8bps"));
2689        let planar = {
2690            tensor
2691                .set_format(PixelFormat::PlanarRgb)
2692                .map_err(|e| crate::Error::Internal(e.to_string()))?;
2693            TensorDyn::from(tensor)
2694        };
2695
2696        let rbga = load_bytes_to_tensor(
2697            1280,
2698            720,
2699            PixelFormat::Rgba,
2700            None,
2701            &edgefirst_bench::testdata::read("camera720p.rgba"),
2702        )?;
2703        compare_images_convert_to_rgb(&planar, &rbga, 0.98, function!());
2704
2705        Ok(())
2706    }
2707
2708    #[test]
2709    fn test_from_tensor_invalid_format() {
2710        // PixelFormat::from_fourcc_str returns None for unknown FourCC codes.
2711        // Since there's no "TEST" pixel format, this validates graceful handling.
2712        assert!(PixelFormat::from_fourcc(u32::from_le_bytes(*b"TEST")).is_none());
2713    }
2714
2715    #[test]
2716    #[should_panic(expected = "Failed to save planar RGB image")]
2717    fn test_save_planar() {
2718        let planar_img = load_bytes_to_tensor(
2719            1280,
2720            720,
2721            PixelFormat::PlanarRgb,
2722            None,
2723            &edgefirst_bench::testdata::read("camera720p.8bps"),
2724        )
2725        .unwrap();
2726
2727        let save_path = "/tmp/planar_rgb.jpg";
2728        crate::save_jpeg(&planar_img, save_path, 90).expect("Failed to save planar RGB image");
2729    }
2730
2731    #[test]
2732    #[should_panic(expected = "Failed to save YUYV image")]
2733    fn test_save_yuyv() {
2734        let planar_img = load_bytes_to_tensor(
2735            1280,
2736            720,
2737            PixelFormat::Yuyv,
2738            None,
2739            &edgefirst_bench::testdata::read("camera720p.yuyv"),
2740        )
2741        .unwrap();
2742
2743        let save_path = "/tmp/yuyv.jpg";
2744        crate::save_jpeg(&planar_img, save_path, 90).expect("Failed to save YUYV image");
2745    }
2746
2747    #[test]
2748    fn test_rotation_angle() {
2749        assert_eq!(Rotation::from_degrees_clockwise(0), Rotation::None);
2750        assert_eq!(Rotation::from_degrees_clockwise(90), Rotation::Clockwise90);
2751        assert_eq!(Rotation::from_degrees_clockwise(180), Rotation::Rotate180);
2752        assert_eq!(
2753            Rotation::from_degrees_clockwise(270),
2754            Rotation::CounterClockwise90
2755        );
2756        assert_eq!(Rotation::from_degrees_clockwise(360), Rotation::None);
2757        assert_eq!(Rotation::from_degrees_clockwise(450), Rotation::Clockwise90);
2758        assert_eq!(Rotation::from_degrees_clockwise(540), Rotation::Rotate180);
2759        assert_eq!(
2760            Rotation::from_degrees_clockwise(630),
2761            Rotation::CounterClockwise90
2762        );
2763    }
2764
2765    #[test]
2766    #[should_panic(expected = "rotation angle is not a multiple of 90")]
2767    fn test_rotation_angle_panic() {
2768        Rotation::from_degrees_clockwise(361);
2769    }
2770
2771    #[test]
2772    fn test_disable_env_var() -> Result<(), Error> {
2773        // EDGEFIRST_FORCE_BACKEND takes precedence over EDGEFIRST_DISABLE_*,
2774        // so clear it for the duration of this test to avoid races with
2775        // test_force_backend_cpu running in parallel.
2776        let saved_force = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
2777        unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") };
2778
2779        #[cfg(target_os = "linux")]
2780        {
2781            let original = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
2782            unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
2783            let converter = ImageProcessor::new()?;
2784            match original {
2785                Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
2786                None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
2787            }
2788            assert!(converter.g2d.is_none());
2789        }
2790
2791        #[cfg(target_os = "linux")]
2792        #[cfg(feature = "opengl")]
2793        {
2794            let original = std::env::var("EDGEFIRST_DISABLE_GL").ok();
2795            unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
2796            let converter = ImageProcessor::new()?;
2797            match original {
2798                Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
2799                None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
2800            }
2801            assert!(converter.opengl.is_none());
2802        }
2803
2804        let original = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
2805        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
2806        let converter = ImageProcessor::new()?;
2807        match original {
2808            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
2809            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
2810        }
2811        assert!(converter.cpu.is_none());
2812
2813        let original_cpu = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
2814        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
2815        let original_gl = std::env::var("EDGEFIRST_DISABLE_GL").ok();
2816        unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
2817        let original_g2d = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
2818        unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
2819        let mut converter = ImageProcessor::new()?;
2820
2821        let src = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None)?;
2822        let dst = TensorDyn::image(640, 360, PixelFormat::Rgba, DType::U8, None)?;
2823        let (result, _src, _dst) = convert_img(
2824            &mut converter,
2825            src,
2826            dst,
2827            Rotation::None,
2828            Flip::None,
2829            Crop::no_crop(),
2830        );
2831        assert!(matches!(result, Err(Error::NoConverter)));
2832
2833        match original_cpu {
2834            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
2835            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
2836        }
2837        match original_gl {
2838            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
2839            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
2840        }
2841        match original_g2d {
2842            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
2843            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
2844        }
2845        match saved_force {
2846            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
2847            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
2848        }
2849
2850        Ok(())
2851    }
2852
2853    #[test]
2854    fn test_unsupported_conversion() {
2855        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
2856        let dst = TensorDyn::image(640, 360, PixelFormat::Nv12, DType::U8, None).unwrap();
2857        let mut converter = ImageProcessor::new().unwrap();
2858        let (result, _src, _dst) = convert_img(
2859            &mut converter,
2860            src,
2861            dst,
2862            Rotation::None,
2863            Flip::None,
2864            Crop::no_crop(),
2865        );
2866        log::debug!("result: {:?}", result);
2867        assert!(matches!(
2868            result,
2869            Err(Error::NotSupported(e)) if e.starts_with("Conversion from NV12 to NV12")
2870        ));
2871    }
2872
2873    #[test]
2874    fn test_load_grey() {
2875        let grey_img = crate::load_image_test_helper(
2876            &edgefirst_bench::testdata::read("grey.jpg"),
2877            Some(PixelFormat::Rgba),
2878            None,
2879        )
2880        .unwrap();
2881
2882        let grey_but_rgb_img = crate::load_image_test_helper(
2883            &edgefirst_bench::testdata::read("grey-rgb.jpg"),
2884            Some(PixelFormat::Rgba),
2885            None,
2886        )
2887        .unwrap();
2888
2889        compare_images(&grey_img, &grey_but_rgb_img, 0.99, function!());
2890    }
2891
2892    #[test]
2893    fn test_new_nv12() {
2894        let nv12 = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
2895        assert_eq!(nv12.height(), Some(720));
2896        assert_eq!(nv12.width(), Some(1280));
2897        assert_eq!(nv12.format().unwrap(), PixelFormat::Nv12);
2898        // PixelFormat::Nv12.channels() returns 1 (luma plane channel count)
2899        assert_eq!(nv12.format().unwrap().channels(), 1);
2900        assert!(nv12.format().is_some_and(
2901            |f| f.layout() == PixelLayout::Planar || f.layout() == PixelLayout::SemiPlanar
2902        ))
2903    }
2904
2905    #[test]
2906    #[cfg(target_os = "linux")]
2907    fn test_new_image_converter() {
2908        let dst_width = 640;
2909        let dst_height = 360;
2910        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
2911        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
2912
2913        let mut converter = ImageProcessor::new().unwrap();
2914        let converter_dst = converter
2915            .create_image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None)
2916            .unwrap();
2917        let (result, src, converter_dst) = convert_img(
2918            &mut converter,
2919            src,
2920            converter_dst,
2921            Rotation::None,
2922            Flip::None,
2923            Crop::no_crop(),
2924        );
2925        result.unwrap();
2926
2927        let cpu_dst =
2928            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
2929        let mut cpu_converter = CPUProcessor::new();
2930        let (result, _src, cpu_dst) = convert_img(
2931            &mut cpu_converter,
2932            src,
2933            cpu_dst,
2934            Rotation::None,
2935            Flip::None,
2936            Crop::no_crop(),
2937        );
2938        result.unwrap();
2939
2940        compare_images(&converter_dst, &cpu_dst, 0.98, function!());
2941    }
2942
2943    #[test]
2944    #[cfg(target_os = "linux")]
2945    fn test_create_image_dtype_i8() {
2946        let mut converter = ImageProcessor::new().unwrap();
2947
2948        // I8 image should allocate successfully via create_image
2949        let dst = converter
2950            .create_image(320, 240, PixelFormat::Rgb, DType::I8, None)
2951            .unwrap();
2952        assert_eq!(dst.dtype(), DType::I8);
2953        assert!(dst.width() == Some(320));
2954        assert!(dst.height() == Some(240));
2955        assert_eq!(dst.format(), Some(PixelFormat::Rgb));
2956
2957        // U8 for comparison
2958        let dst_u8 = converter
2959            .create_image(320, 240, PixelFormat::Rgb, DType::U8, None)
2960            .unwrap();
2961        assert_eq!(dst_u8.dtype(), DType::U8);
2962
2963        // Convert into I8 dst should succeed
2964        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
2965        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
2966        let mut dst_i8 = converter
2967            .create_image(320, 240, PixelFormat::Rgb, DType::I8, None)
2968            .unwrap();
2969        converter
2970            .convert(
2971                &src,
2972                &mut dst_i8,
2973                Rotation::None,
2974                Flip::None,
2975                Crop::no_crop(),
2976            )
2977            .unwrap();
2978    }
2979
2980    #[test]
2981    #[cfg(target_os = "linux")]
2982    fn test_create_image_nv12_dma_non_aligned_width() {
2983        // Regression for C2: create_image must not apply stride padding to
2984        // non-packed formats. NV12 is semi-planar (PixelLayout::SemiPlanar),
2985        // so the try_dma path should fall through to the plain
2986        // TensorDyn::image allocation for any width, regardless of the
2987        // 64-byte GPU pitch alignment.
2988        let converter = ImageProcessor::new().unwrap();
2989
2990        // 100 is intentionally not a multiple of 64 (the Mali pitch
2991        // alignment) to prove that non-packed layouts do not take the
2992        // stride-padded branch.
2993        let result = converter.create_image(
2994            100,
2995            64,
2996            PixelFormat::Nv12,
2997            DType::U8,
2998            Some(TensorMemory::Dma),
2999        );
3000
3001        match result {
3002            Ok(img) => {
3003                assert_eq!(img.width(), Some(100));
3004                assert_eq!(img.height(), Some(64));
3005                assert_eq!(img.format(), Some(PixelFormat::Nv12));
3006                // Non-packed formats must never carry a row_stride override.
3007                assert!(
3008                    img.row_stride().is_none(),
3009                    "NV12 must not be stride-padded by create_image",
3010                );
3011            }
3012            Err(e) => {
3013                // Accept skip on hosts without a dma-heap, but never the
3014                // "NotImplemented" we used to return for non-packed layouts.
3015                let msg = format!("{e}");
3016                assert!(
3017                    !msg.contains("image_with_stride"),
3018                    "NV12 should not hit the stride-padded path: {msg}",
3019                );
3020            }
3021        }
3022    }
3023
3024    #[test]
3025    #[ignore] // Hangs on desktop platforms where DMA-buf is unavailable and PBO
3026              // fallback triggers a GPU driver hang during SHM→texture upload (e.g.,
3027              // NVIDIA without /dev/dma_heap permissions). Works on embedded targets.
3028    fn test_crop_skip() {
3029        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3030        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3031
3032        let mut converter = ImageProcessor::new().unwrap();
3033        let converter_dst = converter
3034            .create_image(1280, 720, PixelFormat::Rgba, DType::U8, None)
3035            .unwrap();
3036        let crop = Crop::new()
3037            .with_src_rect(Some(Rect::new(0, 0, 640, 640)))
3038            .with_dst_rect(Some(Rect::new(0, 0, 640, 640)));
3039        let (result, src, converter_dst) = convert_img(
3040            &mut converter,
3041            src,
3042            converter_dst,
3043            Rotation::None,
3044            Flip::None,
3045            crop,
3046        );
3047        result.unwrap();
3048
3049        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
3050        let mut cpu_converter = CPUProcessor::new();
3051        let (result, _src, cpu_dst) = convert_img(
3052            &mut cpu_converter,
3053            src,
3054            cpu_dst,
3055            Rotation::None,
3056            Flip::None,
3057            crop,
3058        );
3059        result.unwrap();
3060
3061        compare_images(&converter_dst, &cpu_dst, 0.99999, function!());
3062    }
3063
3064    #[test]
3065    fn test_invalid_pixel_format() {
3066        // PixelFormat::from_fourcc returns None for unknown formats,
3067        // so TensorDyn::image cannot be called with an invalid format.
3068        assert!(PixelFormat::from_fourcc(u32::from_le_bytes(*b"TEST")).is_none());
3069    }
3070
3071    // Helper function to check if G2D library is available (Linux/i.MX8 only)
3072    #[cfg(target_os = "linux")]
3073    static G2D_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
3074
3075    #[cfg(target_os = "linux")]
3076    fn is_g2d_available() -> bool {
3077        *G2D_AVAILABLE.get_or_init(|| G2DProcessor::new().is_ok())
3078    }
3079
3080    #[cfg(target_os = "linux")]
3081    #[cfg(feature = "opengl")]
3082    static GL_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
3083
3084    #[cfg(target_os = "linux")]
3085    #[cfg(feature = "opengl")]
3086    // Helper function to check if OpenGL is available
3087    fn is_opengl_available() -> bool {
3088        #[cfg(all(target_os = "linux", feature = "opengl"))]
3089        {
3090            *GL_AVAILABLE.get_or_init(|| GLProcessorThreaded::new(None).is_ok())
3091        }
3092
3093        #[cfg(not(all(target_os = "linux", feature = "opengl")))]
3094        {
3095            false
3096        }
3097    }
3098
3099    #[test]
3100    fn test_load_jpeg_with_exif() {
3101        let file = edgefirst_bench::testdata::read("zidane_rotated_exif.jpg").to_vec();
3102        let loaded = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3103
3104        assert_eq!(loaded.height(), Some(1280));
3105        assert_eq!(loaded.width(), Some(720));
3106
3107        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3108        let cpu_src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3109
3110        let (dst_width, dst_height) = (cpu_src.height().unwrap(), cpu_src.width().unwrap());
3111
3112        let cpu_dst =
3113            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3114        let mut cpu_converter = CPUProcessor::new();
3115
3116        let (result, _cpu_src, cpu_dst) = convert_img(
3117            &mut cpu_converter,
3118            cpu_src,
3119            cpu_dst,
3120            Rotation::Clockwise90,
3121            Flip::None,
3122            Crop::no_crop(),
3123        );
3124        result.unwrap();
3125
3126        compare_images(&loaded, &cpu_dst, 0.98, function!());
3127    }
3128
3129    #[test]
3130    fn test_load_png_with_exif() {
3131        let file = edgefirst_bench::testdata::read("zidane_rotated_exif_180.png").to_vec();
3132        let loaded = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3133
3134        assert_eq!(loaded.height(), Some(720));
3135        assert_eq!(loaded.width(), Some(1280));
3136
3137        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3138        let cpu_src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3139
3140        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
3141        let mut cpu_converter = CPUProcessor::new();
3142
3143        let (result, _cpu_src, cpu_dst) = convert_img(
3144            &mut cpu_converter,
3145            cpu_src,
3146            cpu_dst,
3147            Rotation::Rotate180,
3148            Flip::None,
3149            Crop::no_crop(),
3150        );
3151        result.unwrap();
3152
3153        compare_images(&loaded, &cpu_dst, 0.98, function!());
3154    }
3155
3156    /// Synthesise an RGB JPEG with a deterministic pattern at `(width, height)`
3157    /// using the workspace's `jpeg-encoder` crate (the `image` crate is
3158    /// compiled without its JPEG feature). Used to exercise the decoder /
3159    /// pitch-padding paths for arbitrary dimensions without having to bundle
3160    /// a fixture file per test size.
3161    #[cfg(target_os = "linux")]
3162    fn make_rgb_jpeg(width: u32, height: u32) -> Vec<u8> {
3163        let mut bytes = Vec::with_capacity((width * height * 3) as usize);
3164        for y in 0..height {
3165            for x in 0..width {
3166                bytes.push(((x + y) & 0xFF) as u8);
3167                bytes.push(((x.wrapping_mul(3)) & 0xFF) as u8);
3168                bytes.push(((y.wrapping_mul(5)) & 0xFF) as u8);
3169            }
3170        }
3171        let mut out = Vec::new();
3172        let encoder = jpeg_encoder::Encoder::new(&mut out, 85);
3173        encoder
3174            .encode(
3175                &bytes,
3176                width as u16,
3177                height as u16,
3178                jpeg_encoder::ColorType::Rgb,
3179            )
3180            .expect("jpeg-encoder must succeed on trivial input");
3181        out
3182    }
3183
3184    /// End-to-end: a 375×333 RGBA JPEG (width NOT divisible by 4) loaded
3185    /// via the pitch-padded DMA path and letterboxed through the GL
3186    /// backend must produce correct output. Before the Rgba/Bgra
3187    /// width%4 relaxation in `DmaImportAttrs::from_tensor`, this case
3188    /// failed the pre-check and forced a CPU texture upload fallback;
3189    /// with the relaxation, EGL import succeeds at the driver level and
3190    /// the GL fast path runs. Output correctness is checked against a
3191    /// CPU reference (convert ran with `EDGEFIRST_FORCE_BACKEND=cpu`).
3192    #[test]
3193    #[cfg(target_os = "linux")]
3194    #[cfg(feature = "opengl")]
3195    fn test_convert_rgba_non_4_aligned_width_end_to_end() {
3196        use edgefirst_tensor::is_dma_available;
3197        if !is_dma_available() {
3198            eprintln!(
3199                "SKIPPED: test_convert_rgba_non_4_aligned_width_end_to_end — DMA not available"
3200            );
3201            return;
3202        }
3203        // 375 is the canonical failure width from dataset loaders —
3204        // 375 * 4 = 1500 bytes/row, pitch-padded to 1536. Width%4 = 3,
3205        // so the old pre-check rejected it; new code accepts it.
3206        let jpeg = make_rgb_jpeg(375, 333);
3207        let src_gl = crate::load_image_test_helper(&jpeg, Some(PixelFormat::Rgba), None).unwrap();
3208        assert_eq!(src_gl.width(), Some(375));
3209        // Row stride must still be pitch-padded (separate concern from width).
3210        let stride = src_gl.row_stride().unwrap();
3211        assert_eq!(stride, 1536, "expected padded pitch 1536, got {stride}");
3212
3213        // GL-backed convert into a pitch-aligned 640×640 Rgba dest.
3214        let mut gl_proc = ImageProcessor::new().unwrap();
3215        let gl_dst = gl_proc
3216            .create_image(640, 640, PixelFormat::Rgba, DType::U8, None)
3217            .unwrap();
3218        let (r_gl, _src_gl, gl_dst) = convert_img(
3219            &mut gl_proc,
3220            src_gl,
3221            gl_dst,
3222            Rotation::None,
3223            Flip::None,
3224            Crop::no_crop(),
3225        );
3226        r_gl.expect("GL-backed convert must succeed for 375x333 Rgba src");
3227
3228        // CPU reference via a fresh load so the two paths start from
3229        // byte-identical inputs. `with_config(backend=Cpu)` forces the
3230        // CPU-only processor regardless of which backends the host has
3231        // available.
3232        let src_cpu =
3233            crate::load_image_test_helper(&jpeg, Some(PixelFormat::Rgba), Some(TensorMemory::Mem))
3234                .unwrap();
3235        let mut cpu_proc = ImageProcessor::with_config(ImageProcessorConfig {
3236            backend: ComputeBackend::Cpu,
3237            ..Default::default()
3238        })
3239        .unwrap();
3240        let cpu_dst = TensorDyn::image(
3241            640,
3242            640,
3243            PixelFormat::Rgba,
3244            DType::U8,
3245            Some(TensorMemory::Mem),
3246        )
3247        .unwrap();
3248        let (r_cpu, _src_cpu, cpu_dst) = convert_img(
3249            &mut cpu_proc,
3250            src_cpu,
3251            cpu_dst,
3252            Rotation::None,
3253            Flip::None,
3254            Crop::no_crop(),
3255        );
3256        r_cpu.unwrap();
3257
3258        // Structural similarity: the GL path may have gone through EGL
3259        // import OR fallen back to CPU texture upload — either way, the
3260        // output must match the CPU reference closely.
3261        compare_images(&gl_dst, &cpu_dst, 0.95, function!());
3262    }
3263
3264    /// Regression lock: loading a JPEG at a non-64-aligned RGBA pitch (e.g.
3265    /// 500×333 → natural pitch 2000, needs to be padded to 2048) must go
3266    /// through `image_with_stride` and set `row_stride()` / `effective_row_stride()`
3267    /// to the padded value. The earlier pitch-padding commit fixed this in
3268    /// `load_jpeg`; a regression would surface as `row_stride == None` or
3269    /// `effective_row_stride == 2000`.
3270    #[test]
3271    #[cfg(target_os = "linux")]
3272    fn test_load_jpeg_rgba_non_aligned_pitch_padded_dma() {
3273        use edgefirst_tensor::is_dma_available;
3274        if !is_dma_available() {
3275            eprintln!(
3276                "SKIPPED: test_load_jpeg_rgba_non_aligned_pitch_padded_dma — DMA not available"
3277            );
3278            return;
3279        }
3280        // Widths that force a non-64-aligned natural RGBA pitch. All three
3281        // are divisible by 4 so the EGL width-alignment pre-check passes.
3282        // The pitch-padding fix is what makes these importable at all.
3283        for &w in &[500u32, 612, 428] {
3284            let jpeg = make_rgb_jpeg(w, 333);
3285            let loaded =
3286                crate::load_image_test_helper(&jpeg, Some(PixelFormat::Rgba), None).unwrap();
3287            let natural = (w as usize) * 4;
3288            let aligned = crate::align_pitch_bytes_to_gpu_alignment(natural).unwrap();
3289            assert!(
3290                aligned > natural,
3291                "test sanity: width {w} should be unaligned"
3292            );
3293            let stride = loaded
3294                .row_stride()
3295                .expect("padded DMA path must set an explicit row_stride — regression if None");
3296            assert_eq!(
3297                stride, aligned,
3298                "width {w}: expected padded stride {aligned}, got {stride} \
3299                 (regression: pitch-padding branch skipped?)"
3300            );
3301            let eff = loaded.effective_row_stride().unwrap();
3302            assert_eq!(
3303                eff, aligned,
3304                "effective_row_stride must match stored stride"
3305            );
3306            assert_eq!(loaded.width(), Some(w as usize));
3307            assert_eq!(loaded.height(), Some(333));
3308        }
3309    }
3310
3311    /// `padded_dma_pitch_for` must respect the caller's memory choice and
3312    /// must NOT route into the pitch-padded DMA path when the caller left
3313    /// the choice to the allocator (`None`) but DMA is unavailable on the
3314    /// host. The padded path requires `image_with_stride`, which always
3315    /// allocates DMA — taking it on a system without `/dev/dma_heap`
3316    /// would convert a normally-working image load into a hard failure
3317    /// (since `Tensor::image(..., None)` would have fallen back to
3318    /// SHM/Mem).
3319    #[test]
3320    #[cfg(target_os = "linux")]
3321    fn test_padded_dma_pitch_for_respects_memory_choice() {
3322        use edgefirst_tensor::{is_dma_available, TensorMemory};
3323
3324        // 500×4 = 2000 → padded to 2048 by GPU alignment. Use it for
3325        // every case so any "no padding" answer is unambiguous.
3326        let unaligned_w = 500;
3327
3328        // Caller asks for Mem / Shm: never pad, regardless of DMA.
3329        assert_eq!(
3330            crate::padded_dma_pitch_for(PixelFormat::Rgba, unaligned_w, &Some(TensorMemory::Mem),),
3331            None,
3332            "Mem must never trigger DMA padding"
3333        );
3334        assert_eq!(
3335            crate::padded_dma_pitch_for(PixelFormat::Rgba, unaligned_w, &Some(TensorMemory::Shm),),
3336            None,
3337            "Shm must never trigger DMA padding"
3338        );
3339
3340        // Caller explicitly asks for DMA: always pad if width needs it.
3341        // Even if the runtime can't actually allocate DMA, the caller
3342        // owns that decision and the resulting allocation error is
3343        // their problem, not ours.
3344        assert_eq!(
3345            crate::padded_dma_pitch_for(PixelFormat::Rgba, unaligned_w, &Some(TensorMemory::Dma),),
3346            Some(2048),
3347            "explicit Dma must pad regardless of runtime DMA availability"
3348        );
3349
3350        // Caller leaves it to the allocator: behaviour depends on
3351        // host-runtime DMA availability. This is the case the fix
3352        // guards against.
3353        let none_result = crate::padded_dma_pitch_for(PixelFormat::Rgba, unaligned_w, &None);
3354        if is_dma_available() {
3355            assert_eq!(
3356                none_result,
3357                Some(2048),
3358                "memory=None + DMA available → pad (will route through DMA)"
3359            );
3360        } else {
3361            assert_eq!(
3362                none_result, None,
3363                "memory=None + DMA unavailable → must NOT pad (would force \
3364                 image_with_stride into a DMA-only allocation that fails). \
3365                 Regression: padded_dma_pitch_for ignored is_dma_available()."
3366            );
3367        }
3368    }
3369
3370    // Synthesise a small greyscale PNG in memory at `(width, height)` with a
3371    // deterministic ramp pattern so multiple tests can cross-check output
3372    // without bundling an extra fixture file.
3373    fn make_grey_png(width: u32, height: u32) -> Vec<u8> {
3374        let mut bytes = Vec::with_capacity((width * height) as usize);
3375        for y in 0..height {
3376            for x in 0..width {
3377                bytes.push(((x + y) & 0xFF) as u8);
3378            }
3379        }
3380        let img = image::GrayImage::from_vec(width, height, bytes).unwrap();
3381        let mut buf = Vec::new();
3382        img.write_to(&mut std::io::Cursor::new(&mut buf), image::ImageFormat::Png)
3383            .unwrap();
3384        buf
3385    }
3386
3387    /// Greyscale PNG with a width that forces a pitch-misaligned natural
3388    /// row stride (612 bytes is not a multiple of the 64-byte GPU pitch
3389    /// alignment) must still load via the pitch-padded DMA path. Gated on
3390    /// DMA availability because `image_with_stride` is DMA-only.
3391    #[test]
3392    #[cfg(target_os = "linux")]
3393    fn test_load_png_grey_misaligned_width_dma() {
3394        use edgefirst_tensor::is_dma_available;
3395        if !is_dma_available() {
3396            eprintln!("SKIPPED: test_load_png_grey_misaligned_width_dma — DMA not available");
3397            return;
3398        }
3399        let png = make_grey_png(612, 388);
3400        let loaded = crate::load_image_test_helper(&png, Some(PixelFormat::Grey), None).unwrap();
3401        assert_eq!(loaded.width(), Some(612));
3402        assert_eq!(loaded.height(), Some(388));
3403        assert_eq!(loaded.format(), Some(PixelFormat::Grey));
3404
3405        // Round-trip pixels — natural-pitch DMA-BUFs pad the stride so we
3406        // must indirect through row_stride() rather than assume width.
3407        let map = loaded.as_u8().unwrap().map().unwrap();
3408        let stride = loaded.row_stride().unwrap_or(612);
3409        assert!(stride >= 612);
3410        let bytes: &[u8] = &map;
3411        for y in 0..388usize {
3412            for x in 0..612usize {
3413                let expected = ((x + y) & 0xFF) as u8;
3414                let got = bytes[y * stride + x];
3415                assert_eq!(
3416                    got, expected,
3417                    "grey png mismatch at ({x},{y}): got {got} expected {expected}"
3418                );
3419            }
3420        }
3421    }
3422
3423    /// Greyscale PNG loaded with explicit Mem backing — runs on any
3424    /// platform (no DMA permission requirement) and covers the
3425    /// decoder-native Luma → Grey no-conversion path.
3426    #[test]
3427    fn test_load_png_grey_mem() {
3428        use edgefirst_tensor::TensorMemory;
3429        let png = make_grey_png(612, 100);
3430        let loaded =
3431            crate::load_image_test_helper(&png, Some(PixelFormat::Grey), Some(TensorMemory::Mem))
3432                .unwrap();
3433        assert_eq!(loaded.width(), Some(612));
3434        assert_eq!(loaded.height(), Some(100));
3435        assert_eq!(loaded.format(), Some(PixelFormat::Grey));
3436        let map = loaded.as_u8().unwrap().map().unwrap();
3437        let bytes: &[u8] = &map;
3438        // Mem allocation uses the natural pitch — 612 bytes per row, exact.
3439        assert_eq!(bytes.len(), 612 * 100);
3440        for y in 0..100 {
3441            for x in 0..612 {
3442                assert_eq!(bytes[y * 612 + x], ((x + y) & 0xFF) as u8);
3443            }
3444        }
3445    }
3446
3447    /// Greyscale PNG decoded into RGB — exercises the decoder-colorspace
3448    /// mismatch path (Luma → Rgb via CPU converter). Uses Mem memory to
3449    /// stay portable to host-side test environments.
3450    #[test]
3451    fn test_load_png_grey_to_rgb_mem() {
3452        use edgefirst_tensor::TensorMemory;
3453        let png = make_grey_png(620, 240);
3454        let loaded =
3455            crate::load_image_test_helper(&png, Some(PixelFormat::Rgb), Some(TensorMemory::Mem))
3456                .unwrap();
3457        assert_eq!(loaded.width(), Some(620));
3458        assert_eq!(loaded.height(), Some(240));
3459        assert_eq!(loaded.format(), Some(PixelFormat::Rgb));
3460
3461        // Greyscale promoted to RGB replicates luma into each channel.
3462        let map = loaded.as_u8().unwrap().map().unwrap();
3463        let bytes: &[u8] = &map;
3464        for (x, y) in [(0usize, 0usize), (100, 50), (619, 239)] {
3465            let expected = ((x + y) & 0xFF) as u8;
3466            let off = (y * 620 + x) * 3;
3467            assert_eq!(bytes[off], expected, "R@{x},{y}");
3468            assert_eq!(bytes[off + 1], expected, "G@{x},{y}");
3469            assert_eq!(bytes[off + 2], expected, "B@{x},{y}");
3470        }
3471    }
3472
3473    #[test]
3474    #[cfg(target_os = "linux")]
3475    fn test_g2d_resize() {
3476        if !is_g2d_available() {
3477            eprintln!("SKIPPED: test_g2d_resize - G2D library (libg2d.so.2) not available");
3478            return;
3479        }
3480        if !is_dma_available() {
3481            eprintln!(
3482                "SKIPPED: test_g2d_resize - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3483            );
3484            return;
3485        }
3486
3487        let dst_width = 640;
3488        let dst_height = 360;
3489        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3490        let src =
3491            crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), Some(TensorMemory::Dma))
3492                .unwrap();
3493
3494        let g2d_dst = TensorDyn::image(
3495            dst_width,
3496            dst_height,
3497            PixelFormat::Rgba,
3498            DType::U8,
3499            Some(TensorMemory::Dma),
3500        )
3501        .unwrap();
3502        let mut g2d_converter = G2DProcessor::new().unwrap();
3503        let (result, src, g2d_dst) = convert_img(
3504            &mut g2d_converter,
3505            src,
3506            g2d_dst,
3507            Rotation::None,
3508            Flip::None,
3509            Crop::no_crop(),
3510        );
3511        result.unwrap();
3512
3513        let cpu_dst =
3514            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3515        let mut cpu_converter = CPUProcessor::new();
3516        let (result, _src, cpu_dst) = convert_img(
3517            &mut cpu_converter,
3518            src,
3519            cpu_dst,
3520            Rotation::None,
3521            Flip::None,
3522            Crop::no_crop(),
3523        );
3524        result.unwrap();
3525
3526        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
3527    }
3528
3529    #[test]
3530    #[cfg(target_os = "linux")]
3531    #[cfg(feature = "opengl")]
3532    fn test_opengl_resize() {
3533        if !is_opengl_available() {
3534            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3535            return;
3536        }
3537
3538        let dst_width = 640;
3539        let dst_height = 360;
3540        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3541        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3542
3543        let cpu_dst =
3544            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3545        let mut cpu_converter = CPUProcessor::new();
3546        let (result, src, cpu_dst) = convert_img(
3547            &mut cpu_converter,
3548            src,
3549            cpu_dst,
3550            Rotation::None,
3551            Flip::None,
3552            Crop::no_crop(),
3553        );
3554        result.unwrap();
3555
3556        let mut src = src;
3557        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3558
3559        for _ in 0..5 {
3560            let gl_dst =
3561                TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None)
3562                    .unwrap();
3563            let (result, src_back, gl_dst) = convert_img(
3564                &mut gl_converter,
3565                src,
3566                gl_dst,
3567                Rotation::None,
3568                Flip::None,
3569                Crop::no_crop(),
3570            );
3571            result.unwrap();
3572            src = src_back;
3573
3574            compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3575        }
3576    }
3577
3578    #[test]
3579    #[cfg(target_os = "linux")]
3580    #[cfg(feature = "opengl")]
3581    fn test_opengl_10_threads() {
3582        if !is_opengl_available() {
3583            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3584            return;
3585        }
3586
3587        let handles: Vec<_> = (0..10)
3588            .map(|i| {
3589                std::thread::Builder::new()
3590                    .name(format!("Thread {i}"))
3591                    .spawn(test_opengl_resize)
3592                    .unwrap()
3593            })
3594            .collect();
3595        handles.into_iter().for_each(|h| {
3596            if let Err(e) = h.join() {
3597                std::panic::resume_unwind(e)
3598            }
3599        });
3600    }
3601
3602    #[test]
3603    #[cfg(target_os = "linux")]
3604    #[cfg(feature = "opengl")]
3605    fn test_opengl_grey() {
3606        if !is_opengl_available() {
3607            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3608            return;
3609        }
3610
3611        let img = crate::load_image_test_helper(
3612            &edgefirst_bench::testdata::read("grey.jpg"),
3613            Some(PixelFormat::Grey),
3614            None,
3615        )
3616        .unwrap();
3617
3618        let gl_dst = TensorDyn::image(640, 640, PixelFormat::Grey, DType::U8, None).unwrap();
3619        let cpu_dst = TensorDyn::image(640, 640, PixelFormat::Grey, DType::U8, None).unwrap();
3620
3621        let mut converter = CPUProcessor::new();
3622
3623        let (result, img, cpu_dst) = convert_img(
3624            &mut converter,
3625            img,
3626            cpu_dst,
3627            Rotation::None,
3628            Flip::None,
3629            Crop::no_crop(),
3630        );
3631        result.unwrap();
3632
3633        let mut gl = GLProcessorThreaded::new(None).unwrap();
3634        let (result, _img, gl_dst) = convert_img(
3635            &mut gl,
3636            img,
3637            gl_dst,
3638            Rotation::None,
3639            Flip::None,
3640            Crop::no_crop(),
3641        );
3642        result.unwrap();
3643
3644        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3645    }
3646
3647    #[test]
3648    #[cfg(target_os = "linux")]
3649    fn test_g2d_src_crop() {
3650        if !is_g2d_available() {
3651            eprintln!("SKIPPED: test_g2d_src_crop - G2D library (libg2d.so.2) not available");
3652            return;
3653        }
3654        if !is_dma_available() {
3655            eprintln!(
3656                "SKIPPED: test_g2d_src_crop - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3657            );
3658            return;
3659        }
3660
3661        let dst_width = 640;
3662        let dst_height = 640;
3663        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3664        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3665
3666        let cpu_dst =
3667            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3668        let mut cpu_converter = CPUProcessor::new();
3669        let crop = Crop {
3670            src_rect: Some(Rect {
3671                left: 0,
3672                top: 0,
3673                width: 640,
3674                height: 360,
3675            }),
3676            dst_rect: None,
3677            dst_color: None,
3678        };
3679        let (result, src, cpu_dst) = convert_img(
3680            &mut cpu_converter,
3681            src,
3682            cpu_dst,
3683            Rotation::None,
3684            Flip::None,
3685            crop,
3686        );
3687        result.unwrap();
3688
3689        let g2d_dst =
3690            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3691        let mut g2d_converter = G2DProcessor::new().unwrap();
3692        let (result, _src, g2d_dst) = convert_img(
3693            &mut g2d_converter,
3694            src,
3695            g2d_dst,
3696            Rotation::None,
3697            Flip::None,
3698            crop,
3699        );
3700        result.unwrap();
3701
3702        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
3703    }
3704
3705    #[test]
3706    #[cfg(target_os = "linux")]
3707    fn test_g2d_dst_crop() {
3708        if !is_g2d_available() {
3709            eprintln!("SKIPPED: test_g2d_dst_crop - G2D library (libg2d.so.2) not available");
3710            return;
3711        }
3712        if !is_dma_available() {
3713            eprintln!(
3714                "SKIPPED: test_g2d_dst_crop - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3715            );
3716            return;
3717        }
3718
3719        let dst_width = 640;
3720        let dst_height = 640;
3721        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3722        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3723
3724        let cpu_dst =
3725            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3726        let mut cpu_converter = CPUProcessor::new();
3727        let crop = Crop {
3728            src_rect: None,
3729            dst_rect: Some(Rect::new(100, 100, 512, 288)),
3730            dst_color: None,
3731        };
3732        let (result, src, cpu_dst) = convert_img(
3733            &mut cpu_converter,
3734            src,
3735            cpu_dst,
3736            Rotation::None,
3737            Flip::None,
3738            crop,
3739        );
3740        result.unwrap();
3741
3742        let g2d_dst =
3743            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3744        let mut g2d_converter = G2DProcessor::new().unwrap();
3745        let (result, _src, g2d_dst) = convert_img(
3746            &mut g2d_converter,
3747            src,
3748            g2d_dst,
3749            Rotation::None,
3750            Flip::None,
3751            crop,
3752        );
3753        result.unwrap();
3754
3755        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
3756    }
3757
3758    #[test]
3759    #[cfg(target_os = "linux")]
3760    fn test_g2d_all_rgba() {
3761        if !is_g2d_available() {
3762            eprintln!("SKIPPED: test_g2d_all_rgba - G2D library (libg2d.so.2) not available");
3763            return;
3764        }
3765        if !is_dma_available() {
3766            eprintln!(
3767                "SKIPPED: test_g2d_all_rgba - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3768            );
3769            return;
3770        }
3771
3772        let dst_width = 640;
3773        let dst_height = 640;
3774        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3775        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3776        let src_dyn = src;
3777
3778        let mut cpu_dst =
3779            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3780        let mut cpu_converter = CPUProcessor::new();
3781        let mut g2d_dst =
3782            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3783        let mut g2d_converter = G2DProcessor::new().unwrap();
3784
3785        let crop = Crop {
3786            src_rect: Some(Rect::new(50, 120, 1024, 576)),
3787            dst_rect: Some(Rect::new(100, 100, 512, 288)),
3788            dst_color: None,
3789        };
3790
3791        for rot in [
3792            Rotation::None,
3793            Rotation::Clockwise90,
3794            Rotation::Rotate180,
3795            Rotation::CounterClockwise90,
3796        ] {
3797            cpu_dst
3798                .as_u8()
3799                .unwrap()
3800                .map()
3801                .unwrap()
3802                .as_mut_slice()
3803                .fill(114);
3804            g2d_dst
3805                .as_u8()
3806                .unwrap()
3807                .map()
3808                .unwrap()
3809                .as_mut_slice()
3810                .fill(114);
3811            for flip in [Flip::None, Flip::Horizontal, Flip::Vertical] {
3812                let mut cpu_dst_dyn = cpu_dst;
3813                cpu_converter
3814                    .convert(&src_dyn, &mut cpu_dst_dyn, Rotation::None, Flip::None, crop)
3815                    .unwrap();
3816                cpu_dst = {
3817                    let mut __t = cpu_dst_dyn.into_u8().unwrap();
3818                    __t.set_format(PixelFormat::Rgba).unwrap();
3819                    TensorDyn::from(__t)
3820                };
3821
3822                let mut g2d_dst_dyn = g2d_dst;
3823                g2d_converter
3824                    .convert(&src_dyn, &mut g2d_dst_dyn, Rotation::None, Flip::None, crop)
3825                    .unwrap();
3826                g2d_dst = {
3827                    let mut __t = g2d_dst_dyn.into_u8().unwrap();
3828                    __t.set_format(PixelFormat::Rgba).unwrap();
3829                    TensorDyn::from(__t)
3830                };
3831
3832                compare_images(
3833                    &g2d_dst,
3834                    &cpu_dst,
3835                    0.98,
3836                    &format!("{} {:?} {:?}", function!(), rot, flip),
3837                );
3838            }
3839        }
3840    }
3841
3842    #[test]
3843    #[cfg(target_os = "linux")]
3844    #[cfg(feature = "opengl")]
3845    fn test_opengl_src_crop() {
3846        if !is_opengl_available() {
3847            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3848            return;
3849        }
3850
3851        let dst_width = 640;
3852        let dst_height = 360;
3853        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3854        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3855        let crop = Crop {
3856            src_rect: Some(Rect {
3857                left: 320,
3858                top: 180,
3859                width: 1280 - 320,
3860                height: 720 - 180,
3861            }),
3862            dst_rect: None,
3863            dst_color: None,
3864        };
3865
3866        let cpu_dst =
3867            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3868        let mut cpu_converter = CPUProcessor::new();
3869        let (result, src, cpu_dst) = convert_img(
3870            &mut cpu_converter,
3871            src,
3872            cpu_dst,
3873            Rotation::None,
3874            Flip::None,
3875            crop,
3876        );
3877        result.unwrap();
3878
3879        let gl_dst =
3880            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3881        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3882        let (result, _src, gl_dst) = convert_img(
3883            &mut gl_converter,
3884            src,
3885            gl_dst,
3886            Rotation::None,
3887            Flip::None,
3888            crop,
3889        );
3890        result.unwrap();
3891
3892        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3893    }
3894
3895    #[test]
3896    #[cfg(target_os = "linux")]
3897    #[cfg(feature = "opengl")]
3898    fn test_opengl_dst_crop() {
3899        if !is_opengl_available() {
3900            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3901            return;
3902        }
3903
3904        let dst_width = 640;
3905        let dst_height = 640;
3906        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3907        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3908
3909        let cpu_dst =
3910            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3911        let mut cpu_converter = CPUProcessor::new();
3912        let crop = Crop {
3913            src_rect: None,
3914            dst_rect: Some(Rect::new(100, 100, 512, 288)),
3915            dst_color: None,
3916        };
3917        let (result, src, cpu_dst) = convert_img(
3918            &mut cpu_converter,
3919            src,
3920            cpu_dst,
3921            Rotation::None,
3922            Flip::None,
3923            crop,
3924        );
3925        result.unwrap();
3926
3927        let gl_dst =
3928            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3929        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3930        let (result, _src, gl_dst) = convert_img(
3931            &mut gl_converter,
3932            src,
3933            gl_dst,
3934            Rotation::None,
3935            Flip::None,
3936            crop,
3937        );
3938        result.unwrap();
3939
3940        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3941    }
3942
3943    #[test]
3944    #[cfg(target_os = "linux")]
3945    #[cfg(feature = "opengl")]
3946    fn test_opengl_all_rgba() {
3947        if !is_opengl_available() {
3948            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3949            return;
3950        }
3951
3952        let dst_width = 640;
3953        let dst_height = 640;
3954        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3955
3956        let mut cpu_converter = CPUProcessor::new();
3957
3958        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3959
3960        let mut mem = vec![None, Some(TensorMemory::Mem), Some(TensorMemory::Shm)];
3961        if is_dma_available() {
3962            mem.push(Some(TensorMemory::Dma));
3963        }
3964        let crop = Crop {
3965            src_rect: Some(Rect::new(50, 120, 1024, 576)),
3966            dst_rect: Some(Rect::new(100, 100, 512, 288)),
3967            dst_color: None,
3968        };
3969        for m in mem {
3970            let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), m).unwrap();
3971            let src_dyn = src;
3972
3973            for rot in [
3974                Rotation::None,
3975                Rotation::Clockwise90,
3976                Rotation::Rotate180,
3977                Rotation::CounterClockwise90,
3978            ] {
3979                for flip in [Flip::None, Flip::Horizontal, Flip::Vertical] {
3980                    let cpu_dst =
3981                        TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, m)
3982                            .unwrap();
3983                    let gl_dst =
3984                        TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, m)
3985                            .unwrap();
3986                    cpu_dst
3987                        .as_u8()
3988                        .unwrap()
3989                        .map()
3990                        .unwrap()
3991                        .as_mut_slice()
3992                        .fill(114);
3993                    gl_dst
3994                        .as_u8()
3995                        .unwrap()
3996                        .map()
3997                        .unwrap()
3998                        .as_mut_slice()
3999                        .fill(114);
4000
4001                    let mut cpu_dst_dyn = cpu_dst;
4002                    cpu_converter
4003                        .convert(&src_dyn, &mut cpu_dst_dyn, Rotation::None, Flip::None, crop)
4004                        .unwrap();
4005                    let cpu_dst = {
4006                        let mut __t = cpu_dst_dyn.into_u8().unwrap();
4007                        __t.set_format(PixelFormat::Rgba).unwrap();
4008                        TensorDyn::from(__t)
4009                    };
4010
4011                    let mut gl_dst_dyn = gl_dst;
4012                    gl_converter
4013                        .convert(&src_dyn, &mut gl_dst_dyn, Rotation::None, Flip::None, crop)
4014                        .map_err(|e| {
4015                            log::error!("error mem {m:?} rot {rot:?} error: {e:?}");
4016                            e
4017                        })
4018                        .unwrap();
4019                    let gl_dst = {
4020                        let mut __t = gl_dst_dyn.into_u8().unwrap();
4021                        __t.set_format(PixelFormat::Rgba).unwrap();
4022                        TensorDyn::from(__t)
4023                    };
4024
4025                    compare_images(
4026                        &gl_dst,
4027                        &cpu_dst,
4028                        0.98,
4029                        &format!("{} {:?} {:?}", function!(), rot, flip),
4030                    );
4031                }
4032            }
4033        }
4034    }
4035
4036    #[test]
4037    #[cfg(target_os = "linux")]
4038    fn test_cpu_rotate() {
4039        for rot in [
4040            Rotation::Clockwise90,
4041            Rotation::Rotate180,
4042            Rotation::CounterClockwise90,
4043        ] {
4044            test_cpu_rotate_(rot);
4045        }
4046    }
4047
4048    #[cfg(target_os = "linux")]
4049    fn test_cpu_rotate_(rot: Rotation) {
4050        // This test rotates the image 4 times and checks that the image was returned to
4051        // be the same Currently doesn't check if rotations actually rotated in
4052        // right direction
4053        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
4054
4055        let unchanged_src =
4056            crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
4057        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
4058
4059        let (dst_width, dst_height) = match rot {
4060            Rotation::None | Rotation::Rotate180 => (src.width().unwrap(), src.height().unwrap()),
4061            Rotation::Clockwise90 | Rotation::CounterClockwise90 => {
4062                (src.height().unwrap(), src.width().unwrap())
4063            }
4064        };
4065
4066        let cpu_dst =
4067            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4068        let mut cpu_converter = CPUProcessor::new();
4069
4070        // After rotating 4 times, the image should be the same as the original
4071
4072        let (result, src, cpu_dst) = convert_img(
4073            &mut cpu_converter,
4074            src,
4075            cpu_dst,
4076            rot,
4077            Flip::None,
4078            Crop::no_crop(),
4079        );
4080        result.unwrap();
4081
4082        let (result, cpu_dst, src) = convert_img(
4083            &mut cpu_converter,
4084            cpu_dst,
4085            src,
4086            rot,
4087            Flip::None,
4088            Crop::no_crop(),
4089        );
4090        result.unwrap();
4091
4092        let (result, src, cpu_dst) = convert_img(
4093            &mut cpu_converter,
4094            src,
4095            cpu_dst,
4096            rot,
4097            Flip::None,
4098            Crop::no_crop(),
4099        );
4100        result.unwrap();
4101
4102        let (result, _cpu_dst, src) = convert_img(
4103            &mut cpu_converter,
4104            cpu_dst,
4105            src,
4106            rot,
4107            Flip::None,
4108            Crop::no_crop(),
4109        );
4110        result.unwrap();
4111
4112        compare_images(&src, &unchanged_src, 0.98, function!());
4113    }
4114
4115    #[test]
4116    #[cfg(target_os = "linux")]
4117    #[cfg(feature = "opengl")]
4118    fn test_opengl_rotate() {
4119        if !is_opengl_available() {
4120            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4121            return;
4122        }
4123
4124        let size = (1280, 720);
4125        let mut mem = vec![None, Some(TensorMemory::Shm), Some(TensorMemory::Mem)];
4126
4127        if is_dma_available() {
4128            mem.push(Some(TensorMemory::Dma));
4129        }
4130        for m in mem {
4131            for rot in [
4132                Rotation::Clockwise90,
4133                Rotation::Rotate180,
4134                Rotation::CounterClockwise90,
4135            ] {
4136                test_opengl_rotate_(size, rot, m);
4137            }
4138        }
4139    }
4140
4141    #[cfg(target_os = "linux")]
4142    #[cfg(feature = "opengl")]
4143    fn test_opengl_rotate_(
4144        size: (usize, usize),
4145        rot: Rotation,
4146        tensor_memory: Option<TensorMemory>,
4147    ) {
4148        let (dst_width, dst_height) = match rot {
4149            Rotation::None | Rotation::Rotate180 => size,
4150            Rotation::Clockwise90 | Rotation::CounterClockwise90 => (size.1, size.0),
4151        };
4152
4153        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
4154        let src =
4155            crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), tensor_memory).unwrap();
4156
4157        let cpu_dst =
4158            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4159        let mut cpu_converter = CPUProcessor::new();
4160
4161        let (result, mut src, cpu_dst) = convert_img(
4162            &mut cpu_converter,
4163            src,
4164            cpu_dst,
4165            rot,
4166            Flip::None,
4167            Crop::no_crop(),
4168        );
4169        result.unwrap();
4170
4171        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4172
4173        for _ in 0..5 {
4174            let gl_dst = TensorDyn::image(
4175                dst_width,
4176                dst_height,
4177                PixelFormat::Rgba,
4178                DType::U8,
4179                tensor_memory,
4180            )
4181            .unwrap();
4182            let (result, src_back, gl_dst) = convert_img(
4183                &mut gl_converter,
4184                src,
4185                gl_dst,
4186                rot,
4187                Flip::None,
4188                Crop::no_crop(),
4189            );
4190            result.unwrap();
4191            src = src_back;
4192            compare_images(&gl_dst, &cpu_dst, 0.98, function!());
4193        }
4194    }
4195
4196    #[test]
4197    #[cfg(target_os = "linux")]
4198    fn test_g2d_rotate() {
4199        if !is_g2d_available() {
4200            eprintln!("SKIPPED: test_g2d_rotate - G2D library (libg2d.so.2) not available");
4201            return;
4202        }
4203        if !is_dma_available() {
4204            eprintln!(
4205                "SKIPPED: test_g2d_rotate - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4206            );
4207            return;
4208        }
4209
4210        let size = (1280, 720);
4211        for rot in [
4212            Rotation::Clockwise90,
4213            Rotation::Rotate180,
4214            Rotation::CounterClockwise90,
4215        ] {
4216            test_g2d_rotate_(size, rot);
4217        }
4218    }
4219
4220    #[cfg(target_os = "linux")]
4221    fn test_g2d_rotate_(size: (usize, usize), rot: Rotation) {
4222        let (dst_width, dst_height) = match rot {
4223            Rotation::None | Rotation::Rotate180 => size,
4224            Rotation::Clockwise90 | Rotation::CounterClockwise90 => (size.1, size.0),
4225        };
4226
4227        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
4228        let src =
4229            crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), Some(TensorMemory::Dma))
4230                .unwrap();
4231
4232        let cpu_dst =
4233            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4234        let mut cpu_converter = CPUProcessor::new();
4235
4236        let (result, src, cpu_dst) = convert_img(
4237            &mut cpu_converter,
4238            src,
4239            cpu_dst,
4240            rot,
4241            Flip::None,
4242            Crop::no_crop(),
4243        );
4244        result.unwrap();
4245
4246        let g2d_dst = TensorDyn::image(
4247            dst_width,
4248            dst_height,
4249            PixelFormat::Rgba,
4250            DType::U8,
4251            Some(TensorMemory::Dma),
4252        )
4253        .unwrap();
4254        let mut g2d_converter = G2DProcessor::new().unwrap();
4255
4256        let (result, _src, g2d_dst) = convert_img(
4257            &mut g2d_converter,
4258            src,
4259            g2d_dst,
4260            rot,
4261            Flip::None,
4262            Crop::no_crop(),
4263        );
4264        result.unwrap();
4265
4266        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
4267    }
4268
4269    #[test]
4270    fn test_rgba_to_yuyv_resize_cpu() {
4271        let src = load_bytes_to_tensor(
4272            1280,
4273            720,
4274            PixelFormat::Rgba,
4275            None,
4276            &edgefirst_bench::testdata::read("camera720p.rgba"),
4277        )
4278        .unwrap();
4279
4280        let (dst_width, dst_height) = (640, 360);
4281
4282        let dst =
4283            TensorDyn::image(dst_width, dst_height, PixelFormat::Yuyv, DType::U8, None).unwrap();
4284
4285        let dst_through_yuyv =
4286            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4287        let dst_direct =
4288            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4289
4290        let mut cpu_converter = CPUProcessor::new();
4291
4292        let (result, src, dst) = convert_img(
4293            &mut cpu_converter,
4294            src,
4295            dst,
4296            Rotation::None,
4297            Flip::None,
4298            Crop::no_crop(),
4299        );
4300        result.unwrap();
4301
4302        let (result, _dst, dst_through_yuyv) = convert_img(
4303            &mut cpu_converter,
4304            dst,
4305            dst_through_yuyv,
4306            Rotation::None,
4307            Flip::None,
4308            Crop::no_crop(),
4309        );
4310        result.unwrap();
4311
4312        let (result, _src, dst_direct) = convert_img(
4313            &mut cpu_converter,
4314            src,
4315            dst_direct,
4316            Rotation::None,
4317            Flip::None,
4318            Crop::no_crop(),
4319        );
4320        result.unwrap();
4321
4322        compare_images(&dst_through_yuyv, &dst_direct, 0.98, function!());
4323    }
4324
4325    #[test]
4326    #[cfg(target_os = "linux")]
4327    #[cfg(feature = "opengl")]
4328    #[ignore = "opengl doesn't support rendering to PixelFormat::Yuyv texture"]
4329    fn test_rgba_to_yuyv_resize_opengl() {
4330        if !is_opengl_available() {
4331            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4332            return;
4333        }
4334
4335        if !is_dma_available() {
4336            eprintln!(
4337                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
4338                function!()
4339            );
4340            return;
4341        }
4342
4343        let src = load_bytes_to_tensor(
4344            1280,
4345            720,
4346            PixelFormat::Rgba,
4347            None,
4348            &edgefirst_bench::testdata::read("camera720p.rgba"),
4349        )
4350        .unwrap();
4351
4352        let (dst_width, dst_height) = (640, 360);
4353
4354        let dst = TensorDyn::image(
4355            dst_width,
4356            dst_height,
4357            PixelFormat::Yuyv,
4358            DType::U8,
4359            Some(TensorMemory::Dma),
4360        )
4361        .unwrap();
4362
4363        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4364
4365        let (result, src, dst) = convert_img(
4366            &mut gl_converter,
4367            src,
4368            dst,
4369            Rotation::None,
4370            Flip::None,
4371            Crop::new()
4372                .with_dst_rect(Some(Rect::new(100, 100, 100, 100)))
4373                .with_dst_color(Some([255, 255, 255, 255])),
4374        );
4375        result.unwrap();
4376
4377        std::fs::write(
4378            "rgba_to_yuyv_opengl.yuyv",
4379            dst.as_u8().unwrap().map().unwrap().as_slice(),
4380        )
4381        .unwrap();
4382        let cpu_dst = TensorDyn::image(
4383            dst_width,
4384            dst_height,
4385            PixelFormat::Yuyv,
4386            DType::U8,
4387            Some(TensorMemory::Dma),
4388        )
4389        .unwrap();
4390        let (result, _src, cpu_dst) = convert_img(
4391            &mut CPUProcessor::new(),
4392            src,
4393            cpu_dst,
4394            Rotation::None,
4395            Flip::None,
4396            Crop::no_crop(),
4397        );
4398        result.unwrap();
4399
4400        compare_images_convert_to_rgb(&dst, &cpu_dst, 0.98, function!());
4401    }
4402
4403    #[test]
4404    #[cfg(target_os = "linux")]
4405    fn test_rgba_to_yuyv_resize_g2d() {
4406        if !is_g2d_available() {
4407            eprintln!(
4408                "SKIPPED: test_rgba_to_yuyv_resize_g2d - G2D library (libg2d.so.2) not available"
4409            );
4410            return;
4411        }
4412        if !is_dma_available() {
4413            eprintln!(
4414                "SKIPPED: test_rgba_to_yuyv_resize_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4415            );
4416            return;
4417        }
4418
4419        let src = load_bytes_to_tensor(
4420            1280,
4421            720,
4422            PixelFormat::Rgba,
4423            Some(TensorMemory::Dma),
4424            &edgefirst_bench::testdata::read("camera720p.rgba"),
4425        )
4426        .unwrap();
4427
4428        let (dst_width, dst_height) = (1280, 720);
4429
4430        let cpu_dst = TensorDyn::image(
4431            dst_width,
4432            dst_height,
4433            PixelFormat::Yuyv,
4434            DType::U8,
4435            Some(TensorMemory::Dma),
4436        )
4437        .unwrap();
4438
4439        let g2d_dst = TensorDyn::image(
4440            dst_width,
4441            dst_height,
4442            PixelFormat::Yuyv,
4443            DType::U8,
4444            Some(TensorMemory::Dma),
4445        )
4446        .unwrap();
4447
4448        let mut g2d_converter = G2DProcessor::new().unwrap();
4449        let crop = Crop {
4450            src_rect: None,
4451            dst_rect: Some(Rect::new(100, 100, 2, 2)),
4452            dst_color: None,
4453        };
4454
4455        g2d_dst
4456            .as_u8()
4457            .unwrap()
4458            .map()
4459            .unwrap()
4460            .as_mut_slice()
4461            .fill(128);
4462        let (result, src, g2d_dst) = convert_img(
4463            &mut g2d_converter,
4464            src,
4465            g2d_dst,
4466            Rotation::None,
4467            Flip::None,
4468            crop,
4469        );
4470        result.unwrap();
4471
4472        let cpu_dst_img = cpu_dst;
4473        cpu_dst_img
4474            .as_u8()
4475            .unwrap()
4476            .map()
4477            .unwrap()
4478            .as_mut_slice()
4479            .fill(128);
4480        let (result, _src, cpu_dst) = convert_img(
4481            &mut CPUProcessor::new(),
4482            src,
4483            cpu_dst_img,
4484            Rotation::None,
4485            Flip::None,
4486            crop,
4487        );
4488        result.unwrap();
4489
4490        compare_images_convert_to_rgb(&cpu_dst, &g2d_dst, 0.98, function!());
4491    }
4492
4493    #[test]
4494    fn test_yuyv_to_rgba_cpu() {
4495        let file = edgefirst_bench::testdata::read("camera720p.yuyv").to_vec();
4496        let src = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
4497        src.as_u8()
4498            .unwrap()
4499            .map()
4500            .unwrap()
4501            .as_mut_slice()
4502            .copy_from_slice(&file);
4503
4504        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4505        let mut cpu_converter = CPUProcessor::new();
4506
4507        let (result, _src, dst) = convert_img(
4508            &mut cpu_converter,
4509            src,
4510            dst,
4511            Rotation::None,
4512            Flip::None,
4513            Crop::no_crop(),
4514        );
4515        result.unwrap();
4516
4517        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4518        target_image
4519            .as_u8()
4520            .unwrap()
4521            .map()
4522            .unwrap()
4523            .as_mut_slice()
4524            .copy_from_slice(&edgefirst_bench::testdata::read("camera720p.rgba"));
4525
4526        compare_images(&dst, &target_image, 0.98, function!());
4527    }
4528
4529    #[test]
4530    fn test_yuyv_to_rgb_cpu() {
4531        let file = edgefirst_bench::testdata::read("camera720p.yuyv").to_vec();
4532        let src = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
4533        src.as_u8()
4534            .unwrap()
4535            .map()
4536            .unwrap()
4537            .as_mut_slice()
4538            .copy_from_slice(&file);
4539
4540        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4541        let mut cpu_converter = CPUProcessor::new();
4542
4543        let (result, _src, dst) = convert_img(
4544            &mut cpu_converter,
4545            src,
4546            dst,
4547            Rotation::None,
4548            Flip::None,
4549            Crop::no_crop(),
4550        );
4551        result.unwrap();
4552
4553        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4554        target_image
4555            .as_u8()
4556            .unwrap()
4557            .map()
4558            .unwrap()
4559            .as_mut_slice()
4560            .as_chunks_mut::<3>()
4561            .0
4562            .iter_mut()
4563            .zip(
4564                edgefirst_bench::testdata::read("camera720p.rgba")
4565                    .as_chunks::<4>()
4566                    .0,
4567            )
4568            .for_each(|(dst, src)| *dst = [src[0], src[1], src[2]]);
4569
4570        compare_images(&dst, &target_image, 0.98, function!());
4571    }
4572
4573    #[test]
4574    #[cfg(target_os = "linux")]
4575    fn test_yuyv_to_rgba_g2d() {
4576        if !is_g2d_available() {
4577            eprintln!("SKIPPED: test_yuyv_to_rgba_g2d - G2D library (libg2d.so.2) not available");
4578            return;
4579        }
4580        if !is_dma_available() {
4581            eprintln!(
4582                "SKIPPED: test_yuyv_to_rgba_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4583            );
4584            return;
4585        }
4586
4587        let src = load_bytes_to_tensor(
4588            1280,
4589            720,
4590            PixelFormat::Yuyv,
4591            None,
4592            &edgefirst_bench::testdata::read("camera720p.yuyv"),
4593        )
4594        .unwrap();
4595
4596        let dst = TensorDyn::image(
4597            1280,
4598            720,
4599            PixelFormat::Rgba,
4600            DType::U8,
4601            Some(TensorMemory::Dma),
4602        )
4603        .unwrap();
4604        let mut g2d_converter = G2DProcessor::new().unwrap();
4605
4606        let (result, _src, dst) = convert_img(
4607            &mut g2d_converter,
4608            src,
4609            dst,
4610            Rotation::None,
4611            Flip::None,
4612            Crop::no_crop(),
4613        );
4614        result.unwrap();
4615
4616        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4617        target_image
4618            .as_u8()
4619            .unwrap()
4620            .map()
4621            .unwrap()
4622            .as_mut_slice()
4623            .copy_from_slice(&edgefirst_bench::testdata::read("camera720p.rgba"));
4624
4625        compare_images(&dst, &target_image, 0.98, function!());
4626    }
4627
4628    #[test]
4629    #[cfg(target_os = "linux")]
4630    #[cfg(feature = "opengl")]
4631    fn test_yuyv_to_rgba_opengl() {
4632        if !is_opengl_available() {
4633            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4634            return;
4635        }
4636        if !is_dma_available() {
4637            eprintln!(
4638                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
4639                function!()
4640            );
4641            return;
4642        }
4643
4644        let src = load_bytes_to_tensor(
4645            1280,
4646            720,
4647            PixelFormat::Yuyv,
4648            Some(TensorMemory::Dma),
4649            &edgefirst_bench::testdata::read("camera720p.yuyv"),
4650        )
4651        .unwrap();
4652
4653        let dst = TensorDyn::image(
4654            1280,
4655            720,
4656            PixelFormat::Rgba,
4657            DType::U8,
4658            Some(TensorMemory::Dma),
4659        )
4660        .unwrap();
4661        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4662
4663        let (result, _src, dst) = convert_img(
4664            &mut gl_converter,
4665            src,
4666            dst,
4667            Rotation::None,
4668            Flip::None,
4669            Crop::no_crop(),
4670        );
4671        result.unwrap();
4672
4673        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4674        target_image
4675            .as_u8()
4676            .unwrap()
4677            .map()
4678            .unwrap()
4679            .as_mut_slice()
4680            .copy_from_slice(&edgefirst_bench::testdata::read("camera720p.rgba"));
4681
4682        compare_images(&dst, &target_image, 0.98, function!());
4683    }
4684
4685    /// macOS analog of `test_yuyv_to_rgba_opengl` — drives the ANGLE +
4686    /// IOSurface backend end-to-end and compares against the same
4687    /// reference image. Skips silently if ANGLE isn't installed so the
4688    /// test suite still passes on CI hosts without the Homebrew tap.
4689    #[test]
4690    #[cfg(target_os = "macos")]
4691    #[cfg(feature = "opengl")]
4692    fn test_yuyv_to_rgba_opengl_macos() {
4693        let mut proc = match MacosGlProcessor::new() {
4694            Ok(p) => p,
4695            Err(e) => {
4696                eprintln!(
4697                    "SKIPPED: {} — MacosGlProcessor init failed ({e:?}). \
4698                     Install ANGLE via `brew install startergo/angle/angle` \
4699                     and re-sign per README.md § macOS GPU Acceleration to \
4700                     run this test.",
4701                    function!()
4702                );
4703                return;
4704            }
4705        };
4706
4707        let src = load_bytes_to_tensor(
4708            1280,
4709            720,
4710            PixelFormat::Yuyv,
4711            Some(TensorMemory::Dma),
4712            &edgefirst_bench::testdata::read("camera720p.yuyv"),
4713        )
4714        .unwrap();
4715
4716        let dst = TensorDyn::image(
4717            1280,
4718            720,
4719            PixelFormat::Rgba,
4720            DType::U8,
4721            Some(TensorMemory::Dma),
4722        )
4723        .unwrap();
4724
4725        let (result, _src, dst) = convert_img(
4726            &mut proc,
4727            src,
4728            dst,
4729            Rotation::None,
4730            Flip::None,
4731            Crop::no_crop(),
4732        );
4733        result.unwrap();
4734
4735        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4736        target_image
4737            .as_u8()
4738            .unwrap()
4739            .map()
4740            .unwrap()
4741            .as_mut_slice()
4742            .copy_from_slice(&edgefirst_bench::testdata::read("camera720p.rgba"));
4743
4744        compare_images(&dst, &target_image, 0.98, function!());
4745    }
4746
4747    /// Multi-resolution smoke test: convert YUYV→RGBA via the GL
4748    /// backend at a small (64×32) frame and a 4K (3840×2160) frame,
4749    /// both filled with a synthetic mid-grey pattern. Validates the
4750    /// shader math at the chroma-pairing boundary on small textures
4751    /// and exercises the IOSurface bytes-per-row alignment path at 4K
4752    /// (3840 pixels × 2 bytes/pixel = 7680 bytes, naturally 64-aligned).
4753    ///
4754    /// Resolutions below 32 pixels wide aren't tested because the
4755    /// IOSurface allocator pads bpr to 64 bytes — for a 4-px-wide
4756    /// YUYV surface that's 8 bytes data + 56 bytes padding per row,
4757    /// which exercises a sampling pattern that's ANGLE-version
4758    /// dependent rather than HAL-correctness dependent.
4759    ///
4760    /// This complements `test_yuyv_to_rgba_opengl_macos` (which checks
4761    /// pixel-exact correctness against a reference image at 720p) by
4762    /// ensuring the pipeline does not crash or produce gross errors at
4763    /// resolution extremes. Pixel-exact validation at 4K would require
4764    /// a 30 MB reference file we don't want to bundle.
4765    #[test]
4766    #[cfg(target_os = "macos")]
4767    #[cfg(feature = "opengl")]
4768    fn test_yuyv_to_rgba_opengl_macos_multi_resolution() {
4769        let mut proc = match MacosGlProcessor::new() {
4770            Ok(p) => p,
4771            Err(e) => {
4772                eprintln!(
4773                    "SKIPPED: {} — MacosGlProcessor init failed ({e:?})",
4774                    function!()
4775                );
4776                return;
4777            }
4778        };
4779
4780        for (w, h) in [(64usize, 32usize), (3840, 2160)] {
4781            // Synthetic YUYV: Y=128 (mid-grey luma), U=V=128 (neutral
4782            // chroma) → RGB grey at the output.
4783            let bytes_per_row = w * 2;
4784            let mut yuyv = vec![0u8; bytes_per_row * h];
4785            for chunk in yuyv.chunks_exact_mut(4) {
4786                chunk[0] = 128; // Y0
4787                chunk[1] = 128; // U
4788                chunk[2] = 128; // Y1
4789                chunk[3] = 128; // V
4790            }
4791
4792            let src = load_bytes_to_tensor(w, h, PixelFormat::Yuyv, Some(TensorMemory::Dma), &yuyv)
4793                .unwrap();
4794
4795            let dst = TensorDyn::image(w, h, PixelFormat::Rgba, DType::U8, Some(TensorMemory::Dma))
4796                .unwrap();
4797
4798            let (result, _src, dst) = convert_img(
4799                &mut proc,
4800                src,
4801                dst,
4802                Rotation::None,
4803                Flip::None,
4804                Crop::no_crop(),
4805            );
4806            result.expect("GL convert should succeed at this resolution");
4807
4808            // The neutral-chroma input must produce a near-grey output;
4809            // BT.709 limited-range maps Y=128/UV=128 → roughly
4810            // (130, 130, 130). Allow ±4 LSB for `mediump float` shader
4811            // rounding.
4812            let dst_u8 = dst.as_u8().unwrap();
4813            let dst_map = dst_u8.map().unwrap();
4814            let dst_bytes = dst_map.as_slice();
4815            assert_eq!(dst_bytes.len(), w * h * 4, "RGBA byte count");
4816            for px in dst_bytes.chunks_exact(4) {
4817                for (i, &c) in px[..3].iter().enumerate() {
4818                    assert!(
4819                        (120..=140).contains(&c),
4820                        "{}: channel {i} = {c} (expected ~128 ±12) at {w}×{h}",
4821                        function!(),
4822                    );
4823                }
4824                assert_eq!(px[3], 255, "alpha must be 1.0");
4825            }
4826        }
4827    }
4828
4829    /// Verify that two consecutive convert() calls on the same source
4830    /// tensor reuse the cached EGL pbuffer. Tests the cache hit path
4831    /// added with the macOS GL backend hardening — without it, each
4832    /// frame would pay `eglCreatePbufferFromClientBuffer` + destroy.
4833    ///
4834    /// This is a behaviour test rather than a perf test (the timing
4835    /// difference is 100-200µs which is too noisy to assert on); we
4836    /// check that the second call succeeds and produces a result
4837    /// identical to the first.
4838    #[test]
4839    #[cfg(target_os = "macos")]
4840    #[cfg(feature = "opengl")]
4841    fn test_macos_gl_pbuffer_cache_reuses_surfaces() {
4842        let mut proc = match MacosGlProcessor::new() {
4843            Ok(p) => p,
4844            Err(e) => {
4845                eprintln!(
4846                    "SKIPPED: {} — MacosGlProcessor init failed ({e:?})",
4847                    function!()
4848                );
4849                return;
4850            }
4851        };
4852
4853        // Allocate one source + one destination, run convert twice.
4854        let mut yuyv = vec![0u8; 64 * 32 * 2];
4855        for chunk in yuyv.chunks_exact_mut(4) {
4856            chunk[0] = 200;
4857            chunk[1] = 100;
4858            chunk[2] = 200;
4859            chunk[3] = 156;
4860        }
4861        let src = load_bytes_to_tensor(64, 32, PixelFormat::Yuyv, Some(TensorMemory::Dma), &yuyv)
4862            .unwrap();
4863        let dst = TensorDyn::image(
4864            64,
4865            32,
4866            PixelFormat::Rgba,
4867            DType::U8,
4868            Some(TensorMemory::Dma),
4869        )
4870        .unwrap();
4871
4872        let (r1, src, dst) = convert_img(
4873            &mut proc,
4874            src,
4875            dst,
4876            Rotation::None,
4877            Flip::None,
4878            Crop::no_crop(),
4879        );
4880        r1.unwrap();
4881        let first: Vec<u8> = dst.as_u8().unwrap().map().unwrap().as_slice().to_vec();
4882
4883        let (r2, _src, dst) = convert_img(
4884            &mut proc,
4885            src,
4886            dst,
4887            Rotation::None,
4888            Flip::None,
4889            Crop::no_crop(),
4890        );
4891        r2.unwrap();
4892        let second: Vec<u8> = dst.as_u8().unwrap().map().unwrap().as_slice().to_vec();
4893
4894        assert_eq!(first, second, "cache-hit conversion must be deterministic");
4895    }
4896
4897    #[test]
4898    #[cfg(target_os = "linux")]
4899    fn test_yuyv_to_rgb_g2d() {
4900        if !is_g2d_available() {
4901            eprintln!("SKIPPED: test_yuyv_to_rgb_g2d - G2D library (libg2d.so.2) not available");
4902            return;
4903        }
4904        if !is_dma_available() {
4905            eprintln!(
4906                "SKIPPED: test_yuyv_to_rgb_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4907            );
4908            return;
4909        }
4910
4911        let src = load_bytes_to_tensor(
4912            1280,
4913            720,
4914            PixelFormat::Yuyv,
4915            None,
4916            &edgefirst_bench::testdata::read("camera720p.yuyv"),
4917        )
4918        .unwrap();
4919
4920        let g2d_dst = TensorDyn::image(
4921            1280,
4922            720,
4923            PixelFormat::Rgb,
4924            DType::U8,
4925            Some(TensorMemory::Dma),
4926        )
4927        .unwrap();
4928        let mut g2d_converter = G2DProcessor::new().unwrap();
4929
4930        let (result, src, g2d_dst) = convert_img(
4931            &mut g2d_converter,
4932            src,
4933            g2d_dst,
4934            Rotation::None,
4935            Flip::None,
4936            Crop::no_crop(),
4937        );
4938        result.unwrap();
4939
4940        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4941        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
4942
4943        let (result, _src, cpu_dst) = convert_img(
4944            &mut cpu_converter,
4945            src,
4946            cpu_dst,
4947            Rotation::None,
4948            Flip::None,
4949            Crop::no_crop(),
4950        );
4951        result.unwrap();
4952
4953        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
4954    }
4955
4956    #[test]
4957    #[cfg(target_os = "linux")]
4958    fn test_yuyv_to_yuyv_resize_g2d() {
4959        if !is_g2d_available() {
4960            eprintln!(
4961                "SKIPPED: test_yuyv_to_yuyv_resize_g2d - G2D library (libg2d.so.2) not available"
4962            );
4963            return;
4964        }
4965        if !is_dma_available() {
4966            eprintln!(
4967                "SKIPPED: test_yuyv_to_yuyv_resize_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4968            );
4969            return;
4970        }
4971
4972        let src = load_bytes_to_tensor(
4973            1280,
4974            720,
4975            PixelFormat::Yuyv,
4976            None,
4977            &edgefirst_bench::testdata::read("camera720p.yuyv"),
4978        )
4979        .unwrap();
4980
4981        let g2d_dst = TensorDyn::image(
4982            600,
4983            400,
4984            PixelFormat::Yuyv,
4985            DType::U8,
4986            Some(TensorMemory::Dma),
4987        )
4988        .unwrap();
4989        let mut g2d_converter = G2DProcessor::new().unwrap();
4990
4991        let (result, src, g2d_dst) = convert_img(
4992            &mut g2d_converter,
4993            src,
4994            g2d_dst,
4995            Rotation::None,
4996            Flip::None,
4997            Crop::no_crop(),
4998        );
4999        result.unwrap();
5000
5001        let cpu_dst = TensorDyn::image(600, 400, PixelFormat::Yuyv, DType::U8, None).unwrap();
5002        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
5003
5004        let (result, _src, cpu_dst) = convert_img(
5005            &mut cpu_converter,
5006            src,
5007            cpu_dst,
5008            Rotation::None,
5009            Flip::None,
5010            Crop::no_crop(),
5011        );
5012        result.unwrap();
5013
5014        // TODO: compare PixelFormat::Yuyv and PixelFormat::Yuyv images without having to convert them to PixelFormat::Rgb
5015        compare_images_convert_to_rgb(&g2d_dst, &cpu_dst, 0.98, function!());
5016    }
5017
5018    #[test]
5019    fn test_yuyv_to_rgba_resize_cpu() {
5020        let src = load_bytes_to_tensor(
5021            1280,
5022            720,
5023            PixelFormat::Yuyv,
5024            None,
5025            &edgefirst_bench::testdata::read("camera720p.yuyv"),
5026        )
5027        .unwrap();
5028
5029        let (dst_width, dst_height) = (960, 540);
5030
5031        let dst =
5032            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
5033        let mut cpu_converter = CPUProcessor::new();
5034
5035        let (result, _src, dst) = convert_img(
5036            &mut cpu_converter,
5037            src,
5038            dst,
5039            Rotation::None,
5040            Flip::None,
5041            Crop::no_crop(),
5042        );
5043        result.unwrap();
5044
5045        let dst_target =
5046            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
5047        let src_target = load_bytes_to_tensor(
5048            1280,
5049            720,
5050            PixelFormat::Rgba,
5051            None,
5052            &edgefirst_bench::testdata::read("camera720p.rgba"),
5053        )
5054        .unwrap();
5055        let (result, _src_target, dst_target) = convert_img(
5056            &mut cpu_converter,
5057            src_target,
5058            dst_target,
5059            Rotation::None,
5060            Flip::None,
5061            Crop::no_crop(),
5062        );
5063        result.unwrap();
5064
5065        compare_images(&dst, &dst_target, 0.98, function!());
5066    }
5067
5068    #[test]
5069    #[cfg(target_os = "linux")]
5070    fn test_yuyv_to_rgba_crop_flip_g2d() {
5071        if !is_g2d_available() {
5072            eprintln!(
5073                "SKIPPED: test_yuyv_to_rgba_crop_flip_g2d - G2D library (libg2d.so.2) not available"
5074            );
5075            return;
5076        }
5077        if !is_dma_available() {
5078            eprintln!(
5079                "SKIPPED: test_yuyv_to_rgba_crop_flip_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
5080            );
5081            return;
5082        }
5083
5084        let src = load_bytes_to_tensor(
5085            1280,
5086            720,
5087            PixelFormat::Yuyv,
5088            Some(TensorMemory::Dma),
5089            &edgefirst_bench::testdata::read("camera720p.yuyv"),
5090        )
5091        .unwrap();
5092
5093        let (dst_width, dst_height) = (640, 640);
5094
5095        let dst_g2d = TensorDyn::image(
5096            dst_width,
5097            dst_height,
5098            PixelFormat::Rgba,
5099            DType::U8,
5100            Some(TensorMemory::Dma),
5101        )
5102        .unwrap();
5103        let mut g2d_converter = G2DProcessor::new().unwrap();
5104        let crop = Crop {
5105            src_rect: Some(Rect {
5106                left: 20,
5107                top: 15,
5108                width: 400,
5109                height: 300,
5110            }),
5111            dst_rect: None,
5112            dst_color: None,
5113        };
5114
5115        let (result, src, dst_g2d) = convert_img(
5116            &mut g2d_converter,
5117            src,
5118            dst_g2d,
5119            Rotation::None,
5120            Flip::Horizontal,
5121            crop,
5122        );
5123        result.unwrap();
5124
5125        let dst_cpu = TensorDyn::image(
5126            dst_width,
5127            dst_height,
5128            PixelFormat::Rgba,
5129            DType::U8,
5130            Some(TensorMemory::Dma),
5131        )
5132        .unwrap();
5133        let mut cpu_converter = CPUProcessor::new();
5134
5135        let (result, _src, dst_cpu) = convert_img(
5136            &mut cpu_converter,
5137            src,
5138            dst_cpu,
5139            Rotation::None,
5140            Flip::Horizontal,
5141            crop,
5142        );
5143        result.unwrap();
5144        compare_images(&dst_g2d, &dst_cpu, 0.98, function!());
5145    }
5146
5147    #[test]
5148    #[cfg(target_os = "linux")]
5149    #[cfg(feature = "opengl")]
5150    fn test_yuyv_to_rgba_crop_flip_opengl() {
5151        if !is_opengl_available() {
5152            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5153            return;
5154        }
5155
5156        if !is_dma_available() {
5157            eprintln!(
5158                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
5159                function!()
5160            );
5161            return;
5162        }
5163
5164        let src = load_bytes_to_tensor(
5165            1280,
5166            720,
5167            PixelFormat::Yuyv,
5168            Some(TensorMemory::Dma),
5169            &edgefirst_bench::testdata::read("camera720p.yuyv"),
5170        )
5171        .unwrap();
5172
5173        let (dst_width, dst_height) = (640, 640);
5174
5175        let dst_gl = TensorDyn::image(
5176            dst_width,
5177            dst_height,
5178            PixelFormat::Rgba,
5179            DType::U8,
5180            Some(TensorMemory::Dma),
5181        )
5182        .unwrap();
5183        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
5184        let crop = Crop {
5185            src_rect: Some(Rect {
5186                left: 20,
5187                top: 15,
5188                width: 400,
5189                height: 300,
5190            }),
5191            dst_rect: None,
5192            dst_color: None,
5193        };
5194
5195        let (result, src, dst_gl) = convert_img(
5196            &mut gl_converter,
5197            src,
5198            dst_gl,
5199            Rotation::None,
5200            Flip::Horizontal,
5201            crop,
5202        );
5203        result.unwrap();
5204
5205        let dst_cpu = TensorDyn::image(
5206            dst_width,
5207            dst_height,
5208            PixelFormat::Rgba,
5209            DType::U8,
5210            Some(TensorMemory::Dma),
5211        )
5212        .unwrap();
5213        let mut cpu_converter = CPUProcessor::new();
5214
5215        let (result, _src, dst_cpu) = convert_img(
5216            &mut cpu_converter,
5217            src,
5218            dst_cpu,
5219            Rotation::None,
5220            Flip::Horizontal,
5221            crop,
5222        );
5223        result.unwrap();
5224        compare_images(&dst_gl, &dst_cpu, 0.98, function!());
5225    }
5226
5227    #[test]
5228    fn test_vyuy_to_rgba_cpu() {
5229        let file = edgefirst_bench::testdata::read("camera720p.vyuy").to_vec();
5230        let src = TensorDyn::image(1280, 720, PixelFormat::Vyuy, DType::U8, None).unwrap();
5231        src.as_u8()
5232            .unwrap()
5233            .map()
5234            .unwrap()
5235            .as_mut_slice()
5236            .copy_from_slice(&file);
5237
5238        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5239        let mut cpu_converter = CPUProcessor::new();
5240
5241        let (result, _src, dst) = convert_img(
5242            &mut cpu_converter,
5243            src,
5244            dst,
5245            Rotation::None,
5246            Flip::None,
5247            Crop::no_crop(),
5248        );
5249        result.unwrap();
5250
5251        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5252        target_image
5253            .as_u8()
5254            .unwrap()
5255            .map()
5256            .unwrap()
5257            .as_mut_slice()
5258            .copy_from_slice(&edgefirst_bench::testdata::read("camera720p.rgba"));
5259
5260        compare_images(&dst, &target_image, 0.98, function!());
5261    }
5262
5263    #[test]
5264    fn test_vyuy_to_rgb_cpu() {
5265        let file = edgefirst_bench::testdata::read("camera720p.vyuy").to_vec();
5266        let src = TensorDyn::image(1280, 720, PixelFormat::Vyuy, DType::U8, None).unwrap();
5267        src.as_u8()
5268            .unwrap()
5269            .map()
5270            .unwrap()
5271            .as_mut_slice()
5272            .copy_from_slice(&file);
5273
5274        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5275        let mut cpu_converter = CPUProcessor::new();
5276
5277        let (result, _src, dst) = convert_img(
5278            &mut cpu_converter,
5279            src,
5280            dst,
5281            Rotation::None,
5282            Flip::None,
5283            Crop::no_crop(),
5284        );
5285        result.unwrap();
5286
5287        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5288        target_image
5289            .as_u8()
5290            .unwrap()
5291            .map()
5292            .unwrap()
5293            .as_mut_slice()
5294            .as_chunks_mut::<3>()
5295            .0
5296            .iter_mut()
5297            .zip(
5298                edgefirst_bench::testdata::read("camera720p.rgba")
5299                    .as_chunks::<4>()
5300                    .0,
5301            )
5302            .for_each(|(dst, src)| *dst = [src[0], src[1], src[2]]);
5303
5304        compare_images(&dst, &target_image, 0.98, function!());
5305    }
5306
5307    #[test]
5308    #[cfg(target_os = "linux")]
5309    #[ignore = "G2D does not support VYUY; re-enable when hardware support is added"]
5310    fn test_vyuy_to_rgba_g2d() {
5311        if !is_g2d_available() {
5312            eprintln!("SKIPPED: test_vyuy_to_rgba_g2d - G2D library (libg2d.so.2) not available");
5313            return;
5314        }
5315        if !is_dma_available() {
5316            eprintln!(
5317                "SKIPPED: test_vyuy_to_rgba_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
5318            );
5319            return;
5320        }
5321
5322        let src = load_bytes_to_tensor(
5323            1280,
5324            720,
5325            PixelFormat::Vyuy,
5326            None,
5327            &edgefirst_bench::testdata::read("camera720p.vyuy"),
5328        )
5329        .unwrap();
5330
5331        let dst = TensorDyn::image(
5332            1280,
5333            720,
5334            PixelFormat::Rgba,
5335            DType::U8,
5336            Some(TensorMemory::Dma),
5337        )
5338        .unwrap();
5339        let mut g2d_converter = G2DProcessor::new().unwrap();
5340
5341        let (result, _src, dst) = convert_img(
5342            &mut g2d_converter,
5343            src,
5344            dst,
5345            Rotation::None,
5346            Flip::None,
5347            Crop::no_crop(),
5348        );
5349        match result {
5350            Err(Error::G2D(_)) => {
5351                eprintln!("SKIPPED: test_vyuy_to_rgba_g2d - G2D does not support PixelFormat::Vyuy format");
5352                return;
5353            }
5354            r => r.unwrap(),
5355        }
5356
5357        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5358        target_image
5359            .as_u8()
5360            .unwrap()
5361            .map()
5362            .unwrap()
5363            .as_mut_slice()
5364            .copy_from_slice(&edgefirst_bench::testdata::read("camera720p.rgba"));
5365
5366        compare_images(&dst, &target_image, 0.98, function!());
5367    }
5368
5369    #[test]
5370    #[cfg(target_os = "linux")]
5371    #[ignore = "G2D does not support VYUY; re-enable when hardware support is added"]
5372    fn test_vyuy_to_rgb_g2d() {
5373        if !is_g2d_available() {
5374            eprintln!("SKIPPED: test_vyuy_to_rgb_g2d - G2D library (libg2d.so.2) not available");
5375            return;
5376        }
5377        if !is_dma_available() {
5378            eprintln!(
5379                "SKIPPED: test_vyuy_to_rgb_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
5380            );
5381            return;
5382        }
5383
5384        let src = load_bytes_to_tensor(
5385            1280,
5386            720,
5387            PixelFormat::Vyuy,
5388            None,
5389            &edgefirst_bench::testdata::read("camera720p.vyuy"),
5390        )
5391        .unwrap();
5392
5393        let g2d_dst = TensorDyn::image(
5394            1280,
5395            720,
5396            PixelFormat::Rgb,
5397            DType::U8,
5398            Some(TensorMemory::Dma),
5399        )
5400        .unwrap();
5401        let mut g2d_converter = G2DProcessor::new().unwrap();
5402
5403        let (result, src, g2d_dst) = convert_img(
5404            &mut g2d_converter,
5405            src,
5406            g2d_dst,
5407            Rotation::None,
5408            Flip::None,
5409            Crop::no_crop(),
5410        );
5411        match result {
5412            Err(Error::G2D(_)) => {
5413                eprintln!(
5414                    "SKIPPED: test_vyuy_to_rgb_g2d - G2D does not support PixelFormat::Vyuy format"
5415                );
5416                return;
5417            }
5418            r => r.unwrap(),
5419        }
5420
5421        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5422        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
5423
5424        let (result, _src, cpu_dst) = convert_img(
5425            &mut cpu_converter,
5426            src,
5427            cpu_dst,
5428            Rotation::None,
5429            Flip::None,
5430            Crop::no_crop(),
5431        );
5432        result.unwrap();
5433
5434        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
5435    }
5436
5437    #[test]
5438    #[cfg(target_os = "linux")]
5439    #[cfg(feature = "opengl")]
5440    fn test_vyuy_to_rgba_opengl() {
5441        if !is_opengl_available() {
5442            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5443            return;
5444        }
5445        if !is_dma_available() {
5446            eprintln!(
5447                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
5448                function!()
5449            );
5450            return;
5451        }
5452
5453        let src = load_bytes_to_tensor(
5454            1280,
5455            720,
5456            PixelFormat::Vyuy,
5457            Some(TensorMemory::Dma),
5458            &edgefirst_bench::testdata::read("camera720p.vyuy"),
5459        )
5460        .unwrap();
5461
5462        let dst = TensorDyn::image(
5463            1280,
5464            720,
5465            PixelFormat::Rgba,
5466            DType::U8,
5467            Some(TensorMemory::Dma),
5468        )
5469        .unwrap();
5470        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
5471
5472        let (result, _src, dst) = convert_img(
5473            &mut gl_converter,
5474            src,
5475            dst,
5476            Rotation::None,
5477            Flip::None,
5478            Crop::no_crop(),
5479        );
5480        match result {
5481            Err(Error::NotSupported(_)) => {
5482                eprintln!(
5483                    "SKIPPED: {} - OpenGL does not support PixelFormat::Vyuy DMA format",
5484                    function!()
5485                );
5486                return;
5487            }
5488            r => r.unwrap(),
5489        }
5490
5491        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5492        target_image
5493            .as_u8()
5494            .unwrap()
5495            .map()
5496            .unwrap()
5497            .as_mut_slice()
5498            .copy_from_slice(&edgefirst_bench::testdata::read("camera720p.rgba"));
5499
5500        compare_images(&dst, &target_image, 0.98, function!());
5501    }
5502
5503    #[test]
5504    fn test_nv12_to_rgba_cpu() {
5505        let file = edgefirst_bench::testdata::read("zidane.nv12").to_vec();
5506        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5507        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5508            .copy_from_slice(&file);
5509
5510        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5511        let mut cpu_converter = CPUProcessor::new();
5512
5513        let (result, _src, dst) = convert_img(
5514            &mut cpu_converter,
5515            src,
5516            dst,
5517            Rotation::None,
5518            Flip::None,
5519            Crop::no_crop(),
5520        );
5521        result.unwrap();
5522
5523        let target_image = crate::load_image_test_helper(
5524            &edgefirst_bench::testdata::read("zidane.jpg"),
5525            Some(PixelFormat::Rgba),
5526            None,
5527        )
5528        .unwrap();
5529
5530        compare_images(&dst, &target_image, 0.98, function!());
5531    }
5532
5533    #[test]
5534    fn test_nv12_to_rgb_cpu() {
5535        let file = edgefirst_bench::testdata::read("zidane.nv12").to_vec();
5536        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5537        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5538            .copy_from_slice(&file);
5539
5540        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5541        let mut cpu_converter = CPUProcessor::new();
5542
5543        let (result, _src, dst) = convert_img(
5544            &mut cpu_converter,
5545            src,
5546            dst,
5547            Rotation::None,
5548            Flip::None,
5549            Crop::no_crop(),
5550        );
5551        result.unwrap();
5552
5553        let target_image = crate::load_image_test_helper(
5554            &edgefirst_bench::testdata::read("zidane.jpg"),
5555            Some(PixelFormat::Rgb),
5556            None,
5557        )
5558        .unwrap();
5559
5560        compare_images(&dst, &target_image, 0.98, function!());
5561    }
5562
5563    #[test]
5564    fn test_nv12_to_grey_cpu() {
5565        let file = edgefirst_bench::testdata::read("zidane.nv12").to_vec();
5566        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5567        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5568            .copy_from_slice(&file);
5569
5570        let dst = TensorDyn::image(1280, 720, PixelFormat::Grey, DType::U8, None).unwrap();
5571        let mut cpu_converter = CPUProcessor::new();
5572
5573        let (result, _src, dst) = convert_img(
5574            &mut cpu_converter,
5575            src,
5576            dst,
5577            Rotation::None,
5578            Flip::None,
5579            Crop::no_crop(),
5580        );
5581        result.unwrap();
5582
5583        let target_image = crate::load_image_test_helper(
5584            &edgefirst_bench::testdata::read("zidane.jpg"),
5585            Some(PixelFormat::Grey),
5586            None,
5587        )
5588        .unwrap();
5589
5590        compare_images(&dst, &target_image, 0.98, function!());
5591    }
5592
5593    #[test]
5594    fn test_nv12_to_yuyv_cpu() {
5595        let file = edgefirst_bench::testdata::read("zidane.nv12").to_vec();
5596        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5597        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5598            .copy_from_slice(&file);
5599
5600        let dst = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
5601        let mut cpu_converter = CPUProcessor::new();
5602
5603        let (result, _src, dst) = convert_img(
5604            &mut cpu_converter,
5605            src,
5606            dst,
5607            Rotation::None,
5608            Flip::None,
5609            Crop::no_crop(),
5610        );
5611        result.unwrap();
5612
5613        let target_image = crate::load_image_test_helper(
5614            &edgefirst_bench::testdata::read("zidane.jpg"),
5615            Some(PixelFormat::Rgb),
5616            None,
5617        )
5618        .unwrap();
5619
5620        compare_images_convert_to_rgb(&dst, &target_image, 0.98, function!());
5621    }
5622
5623    #[test]
5624    fn test_cpu_resize_planar_rgb() {
5625        let src = TensorDyn::image(4, 4, PixelFormat::Rgba, DType::U8, None).unwrap();
5626        #[rustfmt::skip]
5627        let src_image = [
5628                    255, 0, 0, 255,     0, 255, 0, 255,     0, 0, 255, 255,     255, 255, 0, 255,
5629                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5630                    0, 0, 255, 0,       0, 255, 255, 255,   255, 255, 0, 0,     0, 0, 0, 255,
5631                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5632        ];
5633        src.as_u8()
5634            .unwrap()
5635            .map()
5636            .unwrap()
5637            .as_mut_slice()
5638            .copy_from_slice(&src_image);
5639
5640        let cpu_dst = TensorDyn::image(5, 5, PixelFormat::PlanarRgb, DType::U8, None).unwrap();
5641        let mut cpu_converter = CPUProcessor::new();
5642
5643        let (result, _src, cpu_dst) = convert_img(
5644            &mut cpu_converter,
5645            src,
5646            cpu_dst,
5647            Rotation::None,
5648            Flip::None,
5649            Crop::new()
5650                .with_dst_rect(Some(Rect {
5651                    left: 1,
5652                    top: 1,
5653                    width: 4,
5654                    height: 4,
5655                }))
5656                .with_dst_color(Some([114, 114, 114, 255])),
5657        );
5658        result.unwrap();
5659
5660        #[rustfmt::skip]
5661        let expected_dst = [
5662            114, 114, 114, 114, 114,    114, 255, 0, 0, 255,    114, 255, 0, 255, 255,      114, 0, 0, 255, 0,        114, 255, 0, 255, 255,
5663            114, 114, 114, 114, 114,    114, 0, 255, 0, 255,    114, 0, 0, 0, 0,            114, 0, 255, 255, 0,      114, 0, 0, 0, 0,
5664            114, 114, 114, 114, 114,    114, 0, 0, 255, 0,      114, 0, 0, 255, 255,        114, 255, 255, 0, 0,      114, 0, 0, 255, 255,
5665        ];
5666
5667        assert_eq!(
5668            cpu_dst.as_u8().unwrap().map().unwrap().as_slice(),
5669            &expected_dst
5670        );
5671    }
5672
5673    #[test]
5674    fn test_cpu_resize_planar_rgba() {
5675        let src = TensorDyn::image(4, 4, PixelFormat::Rgba, DType::U8, None).unwrap();
5676        #[rustfmt::skip]
5677        let src_image = [
5678                    255, 0, 0, 255,     0, 255, 0, 255,     0, 0, 255, 255,     255, 255, 0, 255,
5679                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5680                    0, 0, 255, 0,       0, 255, 255, 255,   255, 255, 0, 0,     0, 0, 0, 255,
5681                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5682        ];
5683        src.as_u8()
5684            .unwrap()
5685            .map()
5686            .unwrap()
5687            .as_mut_slice()
5688            .copy_from_slice(&src_image);
5689
5690        let cpu_dst = TensorDyn::image(5, 5, PixelFormat::PlanarRgba, DType::U8, None).unwrap();
5691        let mut cpu_converter = CPUProcessor::new();
5692
5693        let (result, _src, cpu_dst) = convert_img(
5694            &mut cpu_converter,
5695            src,
5696            cpu_dst,
5697            Rotation::None,
5698            Flip::None,
5699            Crop::new()
5700                .with_dst_rect(Some(Rect {
5701                    left: 1,
5702                    top: 1,
5703                    width: 4,
5704                    height: 4,
5705                }))
5706                .with_dst_color(Some([114, 114, 114, 255])),
5707        );
5708        result.unwrap();
5709
5710        #[rustfmt::skip]
5711        let expected_dst = [
5712            114, 114, 114, 114, 114,    114, 255, 0, 0, 255,        114, 255, 0, 255, 255,      114, 0, 0, 255, 0,        114, 255, 0, 255, 255,
5713            114, 114, 114, 114, 114,    114, 0, 255, 0, 255,        114, 0, 0, 0, 0,            114, 0, 255, 255, 0,      114, 0, 0, 0, 0,
5714            114, 114, 114, 114, 114,    114, 0, 0, 255, 0,          114, 0, 0, 255, 255,        114, 255, 255, 0, 0,      114, 0, 0, 255, 255,
5715            255, 255, 255, 255, 255,    255, 255, 255, 255, 255,    255, 0, 255, 0, 255,        255, 0, 255, 0, 255,      255, 0, 255, 0, 255,
5716        ];
5717
5718        assert_eq!(
5719            cpu_dst.as_u8().unwrap().map().unwrap().as_slice(),
5720            &expected_dst
5721        );
5722    }
5723
5724    #[test]
5725    #[cfg(target_os = "linux")]
5726    #[cfg(feature = "opengl")]
5727    fn test_opengl_resize_planar_rgb() {
5728        if !is_opengl_available() {
5729            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5730            return;
5731        }
5732
5733        if !is_dma_available() {
5734            eprintln!(
5735                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
5736                function!()
5737            );
5738            return;
5739        }
5740
5741        let dst_width = 640;
5742        let dst_height = 640;
5743        let file = edgefirst_bench::testdata::read("test_image.jpg").to_vec();
5744        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
5745
5746        let cpu_dst = TensorDyn::image(
5747            dst_width,
5748            dst_height,
5749            PixelFormat::PlanarRgb,
5750            DType::U8,
5751            None,
5752        )
5753        .unwrap();
5754        let mut cpu_converter = CPUProcessor::new();
5755        let (result, src, cpu_dst) = convert_img(
5756            &mut cpu_converter,
5757            src,
5758            cpu_dst,
5759            Rotation::None,
5760            Flip::None,
5761            Crop::no_crop(),
5762        );
5763        result.unwrap();
5764        let crop_letterbox = Crop::new()
5765            .with_dst_rect(Some(Rect {
5766                left: 102,
5767                top: 102,
5768                width: 440,
5769                height: 440,
5770            }))
5771            .with_dst_color(Some([114, 114, 114, 114]));
5772        let (result, src, cpu_dst) = convert_img(
5773            &mut cpu_converter,
5774            src,
5775            cpu_dst,
5776            Rotation::None,
5777            Flip::None,
5778            crop_letterbox,
5779        );
5780        result.unwrap();
5781
5782        let gl_dst = TensorDyn::image(
5783            dst_width,
5784            dst_height,
5785            PixelFormat::PlanarRgb,
5786            DType::U8,
5787            None,
5788        )
5789        .unwrap();
5790        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
5791
5792        let (result, _src, gl_dst) = convert_img(
5793            &mut gl_converter,
5794            src,
5795            gl_dst,
5796            Rotation::None,
5797            Flip::None,
5798            crop_letterbox,
5799        );
5800        result.unwrap();
5801        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
5802    }
5803
5804    #[test]
5805    fn test_cpu_resize_nv16() {
5806        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
5807        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
5808
5809        let cpu_nv16_dst = TensorDyn::image(640, 640, PixelFormat::Nv16, DType::U8, None).unwrap();
5810        let cpu_rgb_dst = TensorDyn::image(640, 640, PixelFormat::Rgb, DType::U8, None).unwrap();
5811        let mut cpu_converter = CPUProcessor::new();
5812        let crop = Crop::new()
5813            .with_dst_rect(Some(Rect {
5814                left: 20,
5815                top: 140,
5816                width: 600,
5817                height: 360,
5818            }))
5819            .with_dst_color(Some([255, 128, 0, 255]));
5820
5821        let (result, src, cpu_nv16_dst) = convert_img(
5822            &mut cpu_converter,
5823            src,
5824            cpu_nv16_dst,
5825            Rotation::None,
5826            Flip::None,
5827            crop,
5828        );
5829        result.unwrap();
5830
5831        let (result, _src, cpu_rgb_dst) = convert_img(
5832            &mut cpu_converter,
5833            src,
5834            cpu_rgb_dst,
5835            Rotation::None,
5836            Flip::None,
5837            crop,
5838        );
5839        result.unwrap();
5840        compare_images_convert_to_rgb(&cpu_nv16_dst, &cpu_rgb_dst, 0.99, function!());
5841    }
5842
5843    fn load_bytes_to_tensor(
5844        width: usize,
5845        height: usize,
5846        format: PixelFormat,
5847        memory: Option<TensorMemory>,
5848        bytes: &[u8],
5849    ) -> Result<TensorDyn, Error> {
5850        let src = TensorDyn::image(width, height, format, DType::U8, memory)?;
5851        src.as_u8()
5852            .unwrap()
5853            .map()?
5854            .as_mut_slice()
5855            .copy_from_slice(bytes);
5856        Ok(src)
5857    }
5858
5859    fn compare_images(img1: &TensorDyn, img2: &TensorDyn, threshold: f64, name: &str) {
5860        assert_eq!(img1.height(), img2.height(), "Heights differ");
5861        assert_eq!(img1.width(), img2.width(), "Widths differ");
5862        assert_eq!(
5863            img1.format().unwrap(),
5864            img2.format().unwrap(),
5865            "PixelFormat differ"
5866        );
5867        assert!(
5868            matches!(
5869                img1.format().unwrap(),
5870                PixelFormat::Rgb | PixelFormat::Rgba | PixelFormat::Grey | PixelFormat::PlanarRgb
5871            ),
5872            "format must be Rgb or Rgba for comparison"
5873        );
5874
5875        let image1 = match img1.format().unwrap() {
5876            PixelFormat::Rgb => image::RgbImage::from_vec(
5877                img1.width().unwrap() as u32,
5878                img1.height().unwrap() as u32,
5879                img1.as_u8().unwrap().map().unwrap().to_vec(),
5880            )
5881            .unwrap(),
5882            PixelFormat::Rgba => image::RgbaImage::from_vec(
5883                img1.width().unwrap() as u32,
5884                img1.height().unwrap() as u32,
5885                img1.as_u8().unwrap().map().unwrap().to_vec(),
5886            )
5887            .unwrap()
5888            .convert(),
5889            PixelFormat::Grey => image::GrayImage::from_vec(
5890                img1.width().unwrap() as u32,
5891                img1.height().unwrap() as u32,
5892                img1.as_u8().unwrap().map().unwrap().to_vec(),
5893            )
5894            .unwrap()
5895            .convert(),
5896            PixelFormat::PlanarRgb => image::GrayImage::from_vec(
5897                img1.width().unwrap() as u32,
5898                (img1.height().unwrap() * 3) as u32,
5899                img1.as_u8().unwrap().map().unwrap().to_vec(),
5900            )
5901            .unwrap()
5902            .convert(),
5903            _ => return,
5904        };
5905
5906        let image2 = match img2.format().unwrap() {
5907            PixelFormat::Rgb => image::RgbImage::from_vec(
5908                img2.width().unwrap() as u32,
5909                img2.height().unwrap() as u32,
5910                img2.as_u8().unwrap().map().unwrap().to_vec(),
5911            )
5912            .unwrap(),
5913            PixelFormat::Rgba => image::RgbaImage::from_vec(
5914                img2.width().unwrap() as u32,
5915                img2.height().unwrap() as u32,
5916                img2.as_u8().unwrap().map().unwrap().to_vec(),
5917            )
5918            .unwrap()
5919            .convert(),
5920            PixelFormat::Grey => image::GrayImage::from_vec(
5921                img2.width().unwrap() as u32,
5922                img2.height().unwrap() as u32,
5923                img2.as_u8().unwrap().map().unwrap().to_vec(),
5924            )
5925            .unwrap()
5926            .convert(),
5927            PixelFormat::PlanarRgb => image::GrayImage::from_vec(
5928                img2.width().unwrap() as u32,
5929                (img2.height().unwrap() * 3) as u32,
5930                img2.as_u8().unwrap().map().unwrap().to_vec(),
5931            )
5932            .unwrap()
5933            .convert(),
5934            _ => return,
5935        };
5936
5937        let similarity = image_compare::rgb_similarity_structure(
5938            &image_compare::Algorithm::RootMeanSquared,
5939            &image1,
5940            &image2,
5941        )
5942        .expect("Image Comparison failed");
5943        if similarity.score < threshold {
5944            // image1.save(format!("{name}_1.png"));
5945            // image2.save(format!("{name}_2.png"));
5946            similarity
5947                .image
5948                .to_color_map()
5949                .save(format!("{name}.png"))
5950                .unwrap();
5951            panic!(
5952                "{name}: converted image and target image have similarity score too low: {} < {}",
5953                similarity.score, threshold
5954            )
5955        }
5956    }
5957
5958    fn compare_images_convert_to_rgb(
5959        img1: &TensorDyn,
5960        img2: &TensorDyn,
5961        threshold: f64,
5962        name: &str,
5963    ) {
5964        assert_eq!(img1.height(), img2.height(), "Heights differ");
5965        assert_eq!(img1.width(), img2.width(), "Widths differ");
5966
5967        let mut img_rgb1 = TensorDyn::image(
5968            img1.width().unwrap(),
5969            img1.height().unwrap(),
5970            PixelFormat::Rgb,
5971            DType::U8,
5972            Some(TensorMemory::Mem),
5973        )
5974        .unwrap();
5975        let mut img_rgb2 = TensorDyn::image(
5976            img1.width().unwrap(),
5977            img1.height().unwrap(),
5978            PixelFormat::Rgb,
5979            DType::U8,
5980            Some(TensorMemory::Mem),
5981        )
5982        .unwrap();
5983        let mut __cv = CPUProcessor::default();
5984        let r1 = __cv.convert(
5985            img1,
5986            &mut img_rgb1,
5987            crate::Rotation::None,
5988            crate::Flip::None,
5989            crate::Crop::default(),
5990        );
5991        let r2 = __cv.convert(
5992            img2,
5993            &mut img_rgb2,
5994            crate::Rotation::None,
5995            crate::Flip::None,
5996            crate::Crop::default(),
5997        );
5998        if r1.is_err() || r2.is_err() {
5999            // Fallback: compare raw bytes as greyscale strip
6000            let w = img1.width().unwrap() as u32;
6001            let data1 = img1.as_u8().unwrap().map().unwrap().to_vec();
6002            let data2 = img2.as_u8().unwrap().map().unwrap().to_vec();
6003            let h1 = (data1.len() as u32) / w;
6004            let h2 = (data2.len() as u32) / w;
6005            let g1 = image::GrayImage::from_vec(w, h1, data1).unwrap();
6006            let g2 = image::GrayImage::from_vec(w, h2, data2).unwrap();
6007            let similarity = image_compare::gray_similarity_structure(
6008                &image_compare::Algorithm::RootMeanSquared,
6009                &g1,
6010                &g2,
6011            )
6012            .expect("Image Comparison failed");
6013            if similarity.score < threshold {
6014                panic!(
6015                    "{name}: converted image and target image have similarity score too low: {} < {}",
6016                    similarity.score, threshold
6017                )
6018            }
6019            return;
6020        }
6021
6022        let image1 = image::RgbImage::from_vec(
6023            img_rgb1.width().unwrap() as u32,
6024            img_rgb1.height().unwrap() as u32,
6025            img_rgb1.as_u8().unwrap().map().unwrap().to_vec(),
6026        )
6027        .unwrap();
6028
6029        let image2 = image::RgbImage::from_vec(
6030            img_rgb2.width().unwrap() as u32,
6031            img_rgb2.height().unwrap() as u32,
6032            img_rgb2.as_u8().unwrap().map().unwrap().to_vec(),
6033        )
6034        .unwrap();
6035
6036        let similarity = image_compare::rgb_similarity_structure(
6037            &image_compare::Algorithm::RootMeanSquared,
6038            &image1,
6039            &image2,
6040        )
6041        .expect("Image Comparison failed");
6042        if similarity.score < threshold {
6043            // image1.save(format!("{name}_1.png"));
6044            // image2.save(format!("{name}_2.png"));
6045            similarity
6046                .image
6047                .to_color_map()
6048                .save(format!("{name}.png"))
6049                .unwrap();
6050            panic!(
6051                "{name}: converted image and target image have similarity score too low: {} < {}",
6052                similarity.score, threshold
6053            )
6054        }
6055    }
6056
6057    // =========================================================================
6058    // PixelFormat::Nv12 Format Tests
6059    // =========================================================================
6060
6061    #[test]
6062    fn test_nv12_image_creation() {
6063        let width = 640;
6064        let height = 480;
6065        let img = TensorDyn::image(width, height, PixelFormat::Nv12, DType::U8, None).unwrap();
6066
6067        assert_eq!(img.width(), Some(width));
6068        assert_eq!(img.height(), Some(height));
6069        assert_eq!(img.format().unwrap(), PixelFormat::Nv12);
6070        // PixelFormat::Nv12 uses shape [H*3/2, W] to store Y plane + UV plane
6071        assert_eq!(img.as_u8().unwrap().shape(), &[height * 3 / 2, width]);
6072    }
6073
6074    #[test]
6075    fn test_nv12_channels() {
6076        let img = TensorDyn::image(640, 480, PixelFormat::Nv12, DType::U8, None).unwrap();
6077        // PixelFormat::Nv12.channels() returns 1 (luma plane)
6078        assert_eq!(img.format().unwrap().channels(), 1);
6079    }
6080
6081    // =========================================================================
6082    // Tensor Format Metadata Tests
6083    // =========================================================================
6084
6085    #[test]
6086    fn test_tensor_set_format_planar() {
6087        let mut tensor = Tensor::<u8>::new(&[3, 480, 640], None, None).unwrap();
6088        tensor.set_format(PixelFormat::PlanarRgb).unwrap();
6089        assert_eq!(tensor.format(), Some(PixelFormat::PlanarRgb));
6090        assert_eq!(tensor.width(), Some(640));
6091        assert_eq!(tensor.height(), Some(480));
6092    }
6093
6094    #[test]
6095    fn test_tensor_set_format_interleaved() {
6096        let mut tensor = Tensor::<u8>::new(&[480, 640, 4], None, None).unwrap();
6097        tensor.set_format(PixelFormat::Rgba).unwrap();
6098        assert_eq!(tensor.format(), Some(PixelFormat::Rgba));
6099        assert_eq!(tensor.width(), Some(640));
6100        assert_eq!(tensor.height(), Some(480));
6101    }
6102
6103    #[test]
6104    fn test_tensordyn_image_rgb() {
6105        let img = TensorDyn::image(640, 480, PixelFormat::Rgb, DType::U8, None).unwrap();
6106        assert_eq!(img.width(), Some(640));
6107        assert_eq!(img.height(), Some(480));
6108        assert_eq!(img.format(), Some(PixelFormat::Rgb));
6109    }
6110
6111    #[test]
6112    fn test_tensordyn_image_planar_rgb() {
6113        let img = TensorDyn::image(640, 480, PixelFormat::PlanarRgb, DType::U8, None).unwrap();
6114        assert_eq!(img.width(), Some(640));
6115        assert_eq!(img.height(), Some(480));
6116        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
6117    }
6118
6119    #[test]
6120    fn test_rgb_int8_format() {
6121        // Int8 variant: same PixelFormat::Rgb but with DType::I8
6122        let img = TensorDyn::image(
6123            1280,
6124            720,
6125            PixelFormat::Rgb,
6126            DType::I8,
6127            Some(TensorMemory::Mem),
6128        )
6129        .unwrap();
6130        assert_eq!(img.width(), Some(1280));
6131        assert_eq!(img.height(), Some(720));
6132        assert_eq!(img.format(), Some(PixelFormat::Rgb));
6133        assert_eq!(img.dtype(), DType::I8);
6134    }
6135
6136    #[test]
6137    fn test_planar_rgb_int8_format() {
6138        let img = TensorDyn::image(
6139            1280,
6140            720,
6141            PixelFormat::PlanarRgb,
6142            DType::I8,
6143            Some(TensorMemory::Mem),
6144        )
6145        .unwrap();
6146        assert_eq!(img.width(), Some(1280));
6147        assert_eq!(img.height(), Some(720));
6148        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
6149        assert_eq!(img.dtype(), DType::I8);
6150    }
6151
6152    #[test]
6153    fn test_rgb_from_tensor() {
6154        let mut tensor = Tensor::<u8>::new(&[720, 1280, 3], None, None).unwrap();
6155        tensor.set_format(PixelFormat::Rgb).unwrap();
6156        let img = TensorDyn::from(tensor);
6157        assert_eq!(img.width(), Some(1280));
6158        assert_eq!(img.height(), Some(720));
6159        assert_eq!(img.format(), Some(PixelFormat::Rgb));
6160    }
6161
6162    #[test]
6163    fn test_planar_rgb_from_tensor() {
6164        let mut tensor = Tensor::<u8>::new(&[3, 720, 1280], None, None).unwrap();
6165        tensor.set_format(PixelFormat::PlanarRgb).unwrap();
6166        let img = TensorDyn::from(tensor);
6167        assert_eq!(img.width(), Some(1280));
6168        assert_eq!(img.height(), Some(720));
6169        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
6170    }
6171
6172    #[test]
6173    fn test_dtype_determines_int8() {
6174        // DType::I8 indicates int8 data
6175        let u8_img = TensorDyn::image(64, 64, PixelFormat::Rgb, DType::U8, None).unwrap();
6176        let i8_img = TensorDyn::image(64, 64, PixelFormat::Rgb, DType::I8, None).unwrap();
6177        assert_eq!(u8_img.dtype(), DType::U8);
6178        assert_eq!(i8_img.dtype(), DType::I8);
6179    }
6180
6181    #[test]
6182    fn test_pixel_layout_packed_vs_planar() {
6183        // Packed vs planar layout classification
6184        assert_eq!(PixelFormat::Rgb.layout(), PixelLayout::Packed);
6185        assert_eq!(PixelFormat::Rgba.layout(), PixelLayout::Packed);
6186        assert_eq!(PixelFormat::PlanarRgb.layout(), PixelLayout::Planar);
6187        assert_eq!(PixelFormat::Nv12.layout(), PixelLayout::SemiPlanar);
6188    }
6189
6190    /// Integration test that exercises the PBO-to-PBO convert path.
6191    /// Uses ImageProcessor::create_image() to allocate PBO-backed tensors,
6192    /// then converts between them. Skipped when GL is unavailable or the
6193    /// backend is not PBO (e.g. DMA-buf systems).
6194    #[cfg(target_os = "linux")]
6195    #[cfg(feature = "opengl")]
6196    #[test]
6197    fn test_convert_pbo_to_pbo() {
6198        let mut converter = ImageProcessor::new().unwrap();
6199
6200        // Skip if GL is not available or backend is not PBO
6201        let is_pbo = converter
6202            .opengl
6203            .as_ref()
6204            .is_some_and(|gl| gl.transfer_backend() == opengl_headless::TransferBackend::Pbo);
6205        if !is_pbo {
6206            eprintln!("Skipping test_convert_pbo_to_pbo: backend is not PBO");
6207            return;
6208        }
6209
6210        let src_w = 640;
6211        let src_h = 480;
6212        let dst_w = 320;
6213        let dst_h = 240;
6214
6215        // Create PBO-backed source image
6216        let pbo_src = converter
6217            .create_image(src_w, src_h, PixelFormat::Rgba, DType::U8, None)
6218            .unwrap();
6219        assert_eq!(
6220            pbo_src.as_u8().unwrap().memory(),
6221            TensorMemory::Pbo,
6222            "create_image should produce a PBO tensor"
6223        );
6224
6225        // Fill source PBO with test pattern: load JPEG then convert Mem→PBO
6226        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
6227        let jpeg_src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
6228
6229        // Resize JPEG into a Mem temp of the right size, then copy into PBO
6230        let mem_src = TensorDyn::image(
6231            src_w,
6232            src_h,
6233            PixelFormat::Rgba,
6234            DType::U8,
6235            Some(TensorMemory::Mem),
6236        )
6237        .unwrap();
6238        let (result, _jpeg_src, mem_src) = convert_img(
6239            &mut CPUProcessor::new(),
6240            jpeg_src,
6241            mem_src,
6242            Rotation::None,
6243            Flip::None,
6244            Crop::no_crop(),
6245        );
6246        result.unwrap();
6247
6248        // Copy pixel data into the PBO source by mapping it
6249        {
6250            let src_data = mem_src.as_u8().unwrap().map().unwrap();
6251            let mut pbo_map = pbo_src.as_u8().unwrap().map().unwrap();
6252            pbo_map.copy_from_slice(&src_data);
6253        }
6254
6255        // Create PBO-backed destination image
6256        let pbo_dst = converter
6257            .create_image(dst_w, dst_h, PixelFormat::Rgba, DType::U8, None)
6258            .unwrap();
6259        assert_eq!(pbo_dst.as_u8().unwrap().memory(), TensorMemory::Pbo);
6260
6261        // Convert PBO→PBO (this exercises convert_pbo_to_pbo)
6262        let mut pbo_dst = pbo_dst;
6263        let result = converter.convert(
6264            &pbo_src,
6265            &mut pbo_dst,
6266            Rotation::None,
6267            Flip::None,
6268            Crop::no_crop(),
6269        );
6270        result.unwrap();
6271
6272        // Verify: compare with CPU-only conversion of the same input
6273        let cpu_dst = TensorDyn::image(
6274            dst_w,
6275            dst_h,
6276            PixelFormat::Rgba,
6277            DType::U8,
6278            Some(TensorMemory::Mem),
6279        )
6280        .unwrap();
6281        let (result, _mem_src, cpu_dst) = convert_img(
6282            &mut CPUProcessor::new(),
6283            mem_src,
6284            cpu_dst,
6285            Rotation::None,
6286            Flip::None,
6287            Crop::no_crop(),
6288        );
6289        result.unwrap();
6290
6291        let pbo_dst_img = {
6292            let mut __t = pbo_dst.into_u8().unwrap();
6293            __t.set_format(PixelFormat::Rgba).unwrap();
6294            TensorDyn::from(__t)
6295        };
6296        compare_images(&pbo_dst_img, &cpu_dst, 0.95, function!());
6297        log::info!("test_convert_pbo_to_pbo: PASS — PBO-to-PBO convert matches CPU reference");
6298    }
6299
6300    #[test]
6301    fn test_image_bgra() {
6302        let img = TensorDyn::image(
6303            640,
6304            480,
6305            PixelFormat::Bgra,
6306            DType::U8,
6307            Some(edgefirst_tensor::TensorMemory::Mem),
6308        )
6309        .unwrap();
6310        assert_eq!(img.width(), Some(640));
6311        assert_eq!(img.height(), Some(480));
6312        assert_eq!(img.format().unwrap().channels(), 4);
6313        assert_eq!(img.format().unwrap(), PixelFormat::Bgra);
6314    }
6315
6316    // ========================================================================
6317    // Tests for EDGEFIRST_FORCE_BACKEND env var
6318    // ========================================================================
6319
6320    #[test]
6321    fn test_force_backend_cpu() {
6322        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6323        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
6324        let result = ImageProcessor::new();
6325        match original {
6326            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6327            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6328        }
6329        let converter = result.unwrap();
6330        assert!(converter.cpu.is_some());
6331        assert_eq!(converter.forced_backend, Some(ForcedBackend::Cpu));
6332    }
6333
6334    #[test]
6335    fn test_force_backend_invalid() {
6336        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6337        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "invalid") };
6338        let result = ImageProcessor::new();
6339        match original {
6340            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6341            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6342        }
6343        assert!(
6344            matches!(&result, Err(Error::ForcedBackendUnavailable(s)) if s.contains("unknown")),
6345            "invalid backend value should return ForcedBackendUnavailable error: {result:?}"
6346        );
6347    }
6348
6349    #[test]
6350    fn test_force_backend_unset() {
6351        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6352        unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") };
6353        let result = ImageProcessor::new();
6354        match original {
6355            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6356            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6357        }
6358        let converter = result.unwrap();
6359        assert!(converter.forced_backend.is_none());
6360    }
6361
6362    // ========================================================================
6363    // Tests for hybrid mask path error handling
6364    // ========================================================================
6365
6366    #[test]
6367    fn test_draw_proto_masks_no_cpu_returns_error() {
6368        // Disable CPU backend to trigger the error path
6369        let original_cpu = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
6370        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
6371        let original_gl = std::env::var("EDGEFIRST_DISABLE_GL").ok();
6372        unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
6373        let original_g2d = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
6374        unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
6375
6376        let result = ImageProcessor::new();
6377
6378        match original_cpu {
6379            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
6380            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
6381        }
6382        match original_gl {
6383            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
6384            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
6385        }
6386        match original_g2d {
6387            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
6388            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
6389        }
6390
6391        let mut converter = result.unwrap();
6392        assert!(converter.cpu.is_none(), "CPU should be disabled");
6393
6394        let dst = TensorDyn::image(
6395            640,
6396            480,
6397            PixelFormat::Rgba,
6398            DType::U8,
6399            Some(TensorMemory::Mem),
6400        )
6401        .unwrap();
6402        let mut dst_dyn = dst;
6403        let det = [DetectBox {
6404            bbox: edgefirst_decoder::BoundingBox {
6405                xmin: 0.1,
6406                ymin: 0.1,
6407                xmax: 0.5,
6408                ymax: 0.5,
6409            },
6410            score: 0.9,
6411            label: 0,
6412        }];
6413        let proto_data = {
6414            use edgefirst_tensor::{Tensor, TensorDyn};
6415            let coeff_t = Tensor::<f32>::from_slice(&[0.5_f32; 4], &[1, 4]).unwrap();
6416            let protos_t =
6417                Tensor::<f32>::from_slice(&vec![0.0_f32; 8 * 8 * 4], &[8, 8, 4]).unwrap();
6418            ProtoData {
6419                mask_coefficients: TensorDyn::F32(coeff_t),
6420                protos: TensorDyn::F32(protos_t),
6421                layout: ProtoLayout::Nhwc,
6422            }
6423        };
6424        let result =
6425            converter.draw_proto_masks(&mut dst_dyn, &det, &proto_data, Default::default());
6426        assert!(
6427            matches!(&result, Err(Error::Internal(s)) if s.contains("CPU backend")),
6428            "draw_proto_masks without CPU should return Internal error: {result:?}"
6429        );
6430    }
6431
6432    #[test]
6433    fn test_draw_proto_masks_cpu_fallback_works() {
6434        // Force CPU-only backend to ensure the CPU fallback path executes
6435        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6436        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
6437        let result = ImageProcessor::new();
6438        match original {
6439            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6440            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6441        }
6442
6443        let mut converter = result.unwrap();
6444        assert!(converter.cpu.is_some());
6445
6446        let dst = TensorDyn::image(
6447            64,
6448            64,
6449            PixelFormat::Rgba,
6450            DType::U8,
6451            Some(TensorMemory::Mem),
6452        )
6453        .unwrap();
6454        let mut dst_dyn = dst;
6455        let det = [DetectBox {
6456            bbox: edgefirst_decoder::BoundingBox {
6457                xmin: 0.1,
6458                ymin: 0.1,
6459                xmax: 0.5,
6460                ymax: 0.5,
6461            },
6462            score: 0.9,
6463            label: 0,
6464        }];
6465        let proto_data = {
6466            use edgefirst_tensor::{Tensor, TensorDyn};
6467            let coeff_t = Tensor::<f32>::from_slice(&[0.5_f32; 4], &[1, 4]).unwrap();
6468            let protos_t =
6469                Tensor::<f32>::from_slice(&vec![0.0_f32; 8 * 8 * 4], &[8, 8, 4]).unwrap();
6470            ProtoData {
6471                mask_coefficients: TensorDyn::F32(coeff_t),
6472                protos: TensorDyn::F32(protos_t),
6473                layout: ProtoLayout::Nhwc,
6474            }
6475        };
6476        let result =
6477            converter.draw_proto_masks(&mut dst_dyn, &det, &proto_data, Default::default());
6478        assert!(result.is_ok(), "CPU fallback path should work: {result:?}");
6479    }
6480
6481    // ============================================================
6482    // draw_decoded_masks / draw_proto_masks — 4-scenario pixel-
6483    // verified tests. Exercises each backend against the full
6484    // output-contract matrix:
6485    //
6486    //   | detections | background | expected dst             |
6487    //   |------------|------------|--------------------------|
6488    //   | empty      | none       | fully cleared (0x00)     |
6489    //   | empty      | set        | fully equal to bg        |
6490    //   | set        | none       | cleared outside box +    |
6491    //   |            |            | mask-coloured inside     |
6492    //   | set        | set        | bg outside box + mask    |
6493    //   |            |            | blended inside           |
6494    //
6495    // Every test pre-fills dst with a non-zero "dirty" pattern so
6496    // that any silent `return Ok(())` leaks the pattern into the
6497    // asserted output and fails loudly.
6498    // ============================================================
6499
6500    /// Run `body` with `EDGEFIRST_FORCE_BACKEND` temporarily set (or
6501    /// removed), restoring the prior value afterward. Tests are mutated
6502    /// env-serialized via the process-wide `FORCE_BACKEND_MUTEX`.
6503    fn with_force_backend<R>(value: Option<&str>, body: impl FnOnce() -> R) -> R {
6504        use std::sync::{Mutex, MutexGuard, OnceLock};
6505        static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
6506        let _guard: MutexGuard<()> = LOCK
6507            .get_or_init(|| Mutex::new(()))
6508            .lock()
6509            .unwrap_or_else(|e| e.into_inner());
6510        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6511        match value {
6512            Some(v) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", v) },
6513            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6514        }
6515        let r = body();
6516        match original {
6517            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6518            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6519        }
6520        r
6521    }
6522
6523    /// Allocate an RGBA image tensor and pre-fill every byte with a
6524    /// distinctive non-zero pattern. Any test that relies on the old
6525    /// "dst is already cleared" assumption will see this pattern leak
6526    /// through to the output and fail.
6527    fn make_dirty_dst(w: usize, h: usize, mem: Option<TensorMemory>) -> TensorDyn {
6528        let dst = TensorDyn::image(w, h, PixelFormat::Rgba, DType::U8, mem).unwrap();
6529        {
6530            use edgefirst_tensor::TensorMapTrait;
6531            let u8t = dst.as_u8().unwrap();
6532            let mut map = u8t.map().unwrap();
6533            for (i, b) in map.as_mut_slice().iter_mut().enumerate() {
6534                *b = 0xA0u8.wrapping_add((i as u8) & 0x3F);
6535            }
6536        }
6537        dst
6538    }
6539
6540    /// Allocate an RGBA background filled with a constant colour.
6541    fn make_bg(w: usize, h: usize, mem: Option<TensorMemory>, rgba: [u8; 4]) -> TensorDyn {
6542        let bg = TensorDyn::image(w, h, PixelFormat::Rgba, DType::U8, mem).unwrap();
6543        {
6544            use edgefirst_tensor::TensorMapTrait;
6545            let u8t = bg.as_u8().unwrap();
6546            let mut map = u8t.map().unwrap();
6547            for chunk in map.as_mut_slice().chunks_exact_mut(4) {
6548                chunk.copy_from_slice(&rgba);
6549            }
6550        }
6551        bg
6552    }
6553
6554    fn pixel_at(dst: &TensorDyn, x: usize, y: usize) -> [u8; 4] {
6555        use edgefirst_tensor::TensorMapTrait;
6556        let w = dst.width().unwrap();
6557        let off = (y * w + x) * 4;
6558        let u8t = dst.as_u8().unwrap();
6559        let map = u8t.map().unwrap();
6560        let s = map.as_slice();
6561        [s[off], s[off + 1], s[off + 2], s[off + 3]]
6562    }
6563
6564    fn assert_every_pixel_eq(dst: &TensorDyn, expected: [u8; 4], case: &str) {
6565        use edgefirst_tensor::TensorMapTrait;
6566        let u8t = dst.as_u8().unwrap();
6567        let map = u8t.map().unwrap();
6568        for (i, chunk) in map.as_slice().chunks_exact(4).enumerate() {
6569            assert_eq!(
6570                chunk, &expected,
6571                "{case}: pixel idx {i} = {chunk:?}, expected {expected:?}"
6572            );
6573        }
6574    }
6575
6576    /// Scenario 1: empty detections, empty segmentation, no background
6577    /// → dst must be fully cleared to 0x00000000.
6578    fn scenario_empty_no_bg(processor: &mut ImageProcessor, case: &str) {
6579        let mut dst = make_dirty_dst(64, 64, None);
6580        processor
6581            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::default())
6582            .unwrap_or_else(|e| panic!("{case}/decoded_masks empty+no-bg failed: {e:?}"));
6583        assert_every_pixel_eq(&dst, [0, 0, 0, 0], &format!("{case}/decoded"));
6584
6585        let mut dst = make_dirty_dst(64, 64, None);
6586        let proto = {
6587            use edgefirst_tensor::{Tensor, TensorDyn};
6588            // Placeholder (no detections); shape [1, 4] to keep the tensor well-formed.
6589            let coeff_t = Tensor::<f32>::from_slice(&[0.0_f32; 4], &[1, 4]).unwrap();
6590            let protos_t =
6591                Tensor::<f32>::from_slice(&vec![0.0_f32; 8 * 8 * 4], &[8, 8, 4]).unwrap();
6592            ProtoData {
6593                mask_coefficients: TensorDyn::F32(coeff_t),
6594                protos: TensorDyn::F32(protos_t),
6595                layout: ProtoLayout::Nhwc,
6596            }
6597        };
6598        processor
6599            .draw_proto_masks(&mut dst, &[], &proto, MaskOverlay::default())
6600            .unwrap_or_else(|e| panic!("{case}/proto_masks empty+no-bg failed: {e:?}"));
6601        assert_every_pixel_eq(&dst, [0, 0, 0, 0], &format!("{case}/proto"));
6602    }
6603
6604    /// Scenario 2: empty detections, empty segmentation, background set
6605    /// → dst must be fully equal to bg.
6606    fn scenario_empty_with_bg(processor: &mut ImageProcessor, case: &str) {
6607        let bg_color = [42, 99, 200, 255];
6608        let bg = make_bg(64, 64, None, bg_color);
6609        let overlay = MaskOverlay::new().with_background(&bg);
6610
6611        let mut dst = make_dirty_dst(64, 64, None);
6612        processor
6613            .draw_decoded_masks(&mut dst, &[], &[], overlay)
6614            .unwrap_or_else(|e| panic!("{case}/decoded_masks empty+bg failed: {e:?}"));
6615        assert_every_pixel_eq(&dst, bg_color, &format!("{case}/decoded bg blit"));
6616
6617        let mut dst = make_dirty_dst(64, 64, None);
6618        let proto = {
6619            use edgefirst_tensor::{Tensor, TensorDyn};
6620            // Placeholder (no detections); shape [1, 4] to keep the tensor well-formed.
6621            let coeff_t = Tensor::<f32>::from_slice(&[0.0_f32; 4], &[1, 4]).unwrap();
6622            let protos_t =
6623                Tensor::<f32>::from_slice(&vec![0.0_f32; 8 * 8 * 4], &[8, 8, 4]).unwrap();
6624            ProtoData {
6625                mask_coefficients: TensorDyn::F32(coeff_t),
6626                protos: TensorDyn::F32(protos_t),
6627                layout: ProtoLayout::Nhwc,
6628            }
6629        };
6630        processor
6631            .draw_proto_masks(&mut dst, &[], &proto, overlay)
6632            .unwrap_or_else(|e| panic!("{case}/proto_masks empty+bg failed: {e:?}"));
6633        assert_every_pixel_eq(&dst, bg_color, &format!("{case}/proto bg blit"));
6634    }
6635
6636    /// Scenario 3: one detection with a fully-opaque segmentation fill,
6637    /// no background → outside the box dst must be 0x00, inside it must
6638    /// be a non-zero mask colour (the render_segmentation output).
6639    fn scenario_detect_no_bg(processor: &mut ImageProcessor, case: &str) {
6640        use edgefirst_decoder::Segmentation;
6641        use ndarray::Array3;
6642        processor
6643            .set_class_colors(&[[200, 80, 40, 255]])
6644            .expect("set_class_colors");
6645
6646        let detect = DetectBox {
6647            bbox: [0.25, 0.25, 0.75, 0.75].into(),
6648            score: 0.99,
6649            label: 0,
6650        };
6651        let seg_arr = Array3::from_shape_fn((4, 4, 1), |_| 255u8);
6652        let seg = Segmentation {
6653            segmentation: seg_arr,
6654            xmin: 0.25,
6655            ymin: 0.25,
6656            xmax: 0.75,
6657            ymax: 0.75,
6658        };
6659
6660        let mut dst = make_dirty_dst(64, 64, None);
6661        processor
6662            .draw_decoded_masks(&mut dst, &[detect], &[seg], MaskOverlay::default())
6663            .unwrap_or_else(|e| panic!("{case}/decoded_masks detect+no-bg failed: {e:?}"));
6664
6665        // Outside the bbox (corner): must be cleared black.
6666        let corner = pixel_at(&dst, 2, 2);
6667        assert_eq!(
6668            corner,
6669            [0, 0, 0, 0],
6670            "{case}/decoded: corner (2,2) leaked dirty pattern: {corner:?}"
6671        );
6672        // Inside the bbox (center): the mask colour must be visible.
6673        // Any non-zero pixel is acceptable — exact rendering varies
6674        // between backends (GL smoothstep, CPU nearest).
6675        let center = pixel_at(&dst, 32, 32);
6676        assert!(
6677            center != [0, 0, 0, 0],
6678            "{case}/decoded: center (32,32) was not coloured: {center:?}"
6679        );
6680    }
6681
6682    /// Scenario 4: detection + background. Outside the box must match
6683    /// bg; inside the box must NOT match bg (mask blended on top).
6684    fn scenario_detect_with_bg(processor: &mut ImageProcessor, case: &str) {
6685        use edgefirst_decoder::Segmentation;
6686        use ndarray::Array3;
6687        processor
6688            .set_class_colors(&[[200, 80, 40, 255]])
6689            .expect("set_class_colors");
6690        let bg_color = [10, 20, 30, 255];
6691        let bg = make_bg(64, 64, None, bg_color);
6692
6693        let detect = DetectBox {
6694            bbox: [0.25, 0.25, 0.75, 0.75].into(),
6695            score: 0.99,
6696            label: 0,
6697        };
6698        let seg_arr = Array3::from_shape_fn((4, 4, 1), |_| 255u8);
6699        let seg = Segmentation {
6700            segmentation: seg_arr,
6701            xmin: 0.25,
6702            ymin: 0.25,
6703            xmax: 0.75,
6704            ymax: 0.75,
6705        };
6706
6707        let overlay = MaskOverlay::new().with_background(&bg);
6708        let mut dst = make_dirty_dst(64, 64, None);
6709        processor
6710            .draw_decoded_masks(&mut dst, &[detect], &[seg], overlay)
6711            .unwrap_or_else(|e| panic!("{case}/decoded_masks detect+bg failed: {e:?}"));
6712
6713        // Outside the bbox (corner): bg colour.
6714        let corner = pixel_at(&dst, 2, 2);
6715        assert_eq!(
6716            corner, bg_color,
6717            "{case}/decoded: corner (2,2) should show bg {bg_color:?} got {corner:?}"
6718        );
6719        // Inside the bbox (center): mask blended on bg, must differ from
6720        // pure bg (alpha-blend with mask colour produces a distinct shade).
6721        let center = pixel_at(&dst, 32, 32);
6722        assert!(
6723            center != bg_color,
6724            "{case}/decoded: center (32,32) should differ from bg {bg_color:?}, got {center:?}"
6725        );
6726    }
6727
6728    /// Run all 4 scenarios against the processor. Skip gracefully if
6729    /// construction fails (backend unavailable on this host).
6730    fn run_all_scenarios(
6731        force_backend: Option<&'static str>,
6732        case: &'static str,
6733        require_dma_for_bg: bool,
6734    ) {
6735        if require_dma_for_bg && !edgefirst_tensor::is_dma_available() {
6736            eprintln!("SKIPPED: {case} — DMA not available on this host");
6737            return;
6738        }
6739        let processor_result = with_force_backend(force_backend, ImageProcessor::new);
6740        let mut processor = match processor_result {
6741            Ok(p) => p,
6742            Err(e) => {
6743                eprintln!("SKIPPED: {case} — backend init failed: {e:?}");
6744                return;
6745            }
6746        };
6747        scenario_empty_no_bg(&mut processor, case);
6748        scenario_empty_with_bg(&mut processor, case);
6749        scenario_detect_no_bg(&mut processor, case);
6750        scenario_detect_with_bg(&mut processor, case);
6751    }
6752
6753    #[test]
6754    fn test_draw_masks_4_scenarios_cpu() {
6755        run_all_scenarios(Some("cpu"), "cpu", false);
6756    }
6757
6758    #[test]
6759    fn test_draw_masks_4_scenarios_auto() {
6760        run_all_scenarios(None, "auto", false);
6761    }
6762
6763    #[cfg(target_os = "linux")]
6764    #[cfg(feature = "opengl")]
6765    #[test]
6766    fn test_draw_masks_4_scenarios_opengl() {
6767        run_all_scenarios(Some("opengl"), "opengl", false);
6768    }
6769
6770    /// G2D forced backend: exercises the zero-detection empty-frame
6771    /// paths via `g2d_clear` and `g2d_blit`. Scenarios 3 and 4 (with
6772    /// detections) expect `NotImplemented` since G2D has no rasterizer
6773    /// for boxes / masks.
6774    #[cfg(target_os = "linux")]
6775    #[test]
6776    fn test_draw_masks_zero_detection_g2d_forced() {
6777        if !edgefirst_tensor::is_dma_available() {
6778            eprintln!("SKIPPED: g2d forced — DMA not available on this host");
6779            return;
6780        }
6781        let processor_result = with_force_backend(Some("g2d"), ImageProcessor::new);
6782        let mut processor = match processor_result {
6783            Ok(p) => p,
6784            Err(e) => {
6785                eprintln!("SKIPPED: g2d forced — init failed: {e:?}");
6786                return;
6787            }
6788        };
6789
6790        // Case 1: empty + no bg. G2D requires DMA-backed dst.
6791        let mut dst = TensorDyn::image(
6792            64,
6793            64,
6794            PixelFormat::Rgba,
6795            DType::U8,
6796            Some(TensorMemory::Dma),
6797        )
6798        .unwrap();
6799        {
6800            use edgefirst_tensor::TensorMapTrait;
6801            let u8t = dst.as_u8_mut().unwrap();
6802            let mut map = u8t.map().unwrap();
6803            map.as_mut_slice().fill(0xBB);
6804        }
6805        processor
6806            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::default())
6807            .expect("g2d empty+no-bg");
6808        assert_every_pixel_eq(&dst, [0, 0, 0, 0], "g2d/case1 cleared");
6809
6810        // Case 2: empty + bg. Both surfaces DMA-backed for g2d_blit.
6811        let bg_color = [7, 11, 13, 255];
6812        let bg = {
6813            let t = TensorDyn::image(
6814                64,
6815                64,
6816                PixelFormat::Rgba,
6817                DType::U8,
6818                Some(TensorMemory::Dma),
6819            )
6820            .unwrap();
6821            {
6822                use edgefirst_tensor::TensorMapTrait;
6823                let u8t = t.as_u8().unwrap();
6824                let mut map = u8t.map().unwrap();
6825                for chunk in map.as_mut_slice().chunks_exact_mut(4) {
6826                    chunk.copy_from_slice(&bg_color);
6827                }
6828            }
6829            t
6830        };
6831        let mut dst = TensorDyn::image(
6832            64,
6833            64,
6834            PixelFormat::Rgba,
6835            DType::U8,
6836            Some(TensorMemory::Dma),
6837        )
6838        .unwrap();
6839        {
6840            use edgefirst_tensor::TensorMapTrait;
6841            let u8t = dst.as_u8_mut().unwrap();
6842            let mut map = u8t.map().unwrap();
6843            map.as_mut_slice().fill(0x55);
6844        }
6845        processor
6846            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::new().with_background(&bg))
6847            .expect("g2d empty+bg");
6848        assert_every_pixel_eq(&dst, bg_color, "g2d/case2 bg blit");
6849
6850        // Case 3 and 4: detect present — must return NotImplemented.
6851        let detect = DetectBox {
6852            bbox: [0.25, 0.25, 0.75, 0.75].into(),
6853            score: 0.9,
6854            label: 0,
6855        };
6856        let mut dst = TensorDyn::image(
6857            64,
6858            64,
6859            PixelFormat::Rgba,
6860            DType::U8,
6861            Some(TensorMemory::Dma),
6862        )
6863        .unwrap();
6864        let err = processor
6865            .draw_decoded_masks(&mut dst, &[detect], &[], MaskOverlay::default())
6866            .expect_err("g2d must reject detect-present draw_decoded_masks");
6867        assert!(
6868            matches!(err, Error::NotImplemented(_)),
6869            "g2d case3 wrong error: {err:?}"
6870        );
6871    }
6872
6873    #[test]
6874    fn test_set_format_then_cpu_convert() {
6875        // Force CPU backend (save/restore to avoid leaking into other tests)
6876        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6877        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
6878        let mut processor = ImageProcessor::new().unwrap();
6879        match original {
6880            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6881            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6882        }
6883
6884        // Load a source image
6885        let image = edgefirst_bench::testdata::read("zidane.jpg");
6886        let src = load_image_test_helper(&image, Some(PixelFormat::Rgba), None).unwrap();
6887
6888        // Create a raw tensor, then attach format — simulating the from_fd workflow
6889        let mut dst =
6890            TensorDyn::new(&[640, 640, 3], DType::U8, Some(TensorMemory::Mem), None).unwrap();
6891        dst.set_format(PixelFormat::Rgb).unwrap();
6892
6893        // Convert should work with the set_format-annotated tensor
6894        processor
6895            .convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
6896            .unwrap();
6897
6898        // Verify format survived conversion
6899        assert_eq!(dst.format(), Some(PixelFormat::Rgb));
6900        assert_eq!(dst.width(), Some(640));
6901        assert_eq!(dst.height(), Some(640));
6902    }
6903
6904    /// Verify that creating multiple ImageProcessors on the same thread and
6905    /// performing a resize on each does not deadlock or error.
6906    ///
6907    /// Uses automatic memory allocation (DMA → PBO → Mem fallback) so that
6908    /// hardware backends (OpenGL, G2D) are exercised on capable targets.
6909    #[test]
6910    fn test_multiple_image_processors_same_thread() {
6911        let mut processors: Vec<ImageProcessor> = (0..4)
6912            .map(|_| ImageProcessor::new().expect("ImageProcessor::new() failed"))
6913            .collect();
6914
6915        for proc in &mut processors {
6916            let src = proc
6917                .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
6918                .expect("create src failed");
6919            let mut dst = proc
6920                .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
6921                .expect("create dst failed");
6922            proc.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
6923                .expect("convert failed");
6924            assert_eq!(dst.width(), Some(64));
6925            assert_eq!(dst.height(), Some(64));
6926        }
6927    }
6928
6929    /// Verify that creating ImageProcessors on separate threads and performing
6930    /// a resize on each does not deadlock or error.
6931    ///
6932    /// Uses automatic memory allocation (DMA → PBO → Mem fallback) so that
6933    /// hardware backends (OpenGL, G2D) are exercised on capable targets.
6934    /// A 60-second timeout prevents CI from hanging on deadlock regressions.
6935    #[test]
6936    fn test_multiple_image_processors_separate_threads() {
6937        use std::sync::mpsc;
6938        use std::time::Duration;
6939
6940        const TIMEOUT: Duration = Duration::from_secs(60);
6941
6942        let (tx, rx) = mpsc::channel::<()>();
6943
6944        std::thread::spawn(move || {
6945            let handles: Vec<_> = (0..4)
6946                .map(|i| {
6947                    std::thread::spawn(move || {
6948                        let mut proc = ImageProcessor::new().unwrap_or_else(|e| {
6949                            panic!("ImageProcessor::new() failed on thread {i}: {e}")
6950                        });
6951                        let src = proc
6952                            .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
6953                            .unwrap_or_else(|e| panic!("create src failed on thread {i}: {e}"));
6954                        let mut dst = proc
6955                            .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
6956                            .unwrap_or_else(|e| panic!("create dst failed on thread {i}: {e}"));
6957                        proc.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
6958                            .unwrap_or_else(|e| panic!("convert failed on thread {i}: {e}"));
6959                        assert_eq!(dst.width(), Some(64));
6960                        assert_eq!(dst.height(), Some(64));
6961                    })
6962                })
6963                .collect();
6964
6965            for (i, h) in handles.into_iter().enumerate() {
6966                h.join()
6967                    .unwrap_or_else(|e| panic!("thread {i} panicked: {e:?}"));
6968            }
6969
6970            let _ = tx.send(());
6971        });
6972
6973        rx.recv_timeout(TIMEOUT).unwrap_or_else(|_| {
6974            panic!("test_multiple_image_processors_separate_threads timed out after {TIMEOUT:?}")
6975        });
6976    }
6977
6978    /// Verify that 4 fully-initialized ImageProcessors on separate threads can
6979    /// all operate concurrently without deadlocking each other.
6980    ///
6981    /// All processors are created first, then a barrier synchronizes them so
6982    /// they all start converting at the same instant — maximizing contention.
6983    /// A 60-second timeout prevents CI from hanging on deadlock regressions.
6984    #[test]
6985    fn test_image_processors_concurrent_operations() {
6986        use std::sync::{mpsc, Arc, Barrier};
6987        use std::time::Duration;
6988
6989        const N: usize = 4;
6990        const ROUNDS: usize = 10;
6991        const TIMEOUT: Duration = Duration::from_secs(60);
6992
6993        let (tx, rx) = mpsc::channel::<()>();
6994
6995        std::thread::spawn(move || {
6996            let barrier = Arc::new(Barrier::new(N));
6997
6998            let handles: Vec<_> = (0..N)
6999                .map(|i| {
7000                    let barrier = Arc::clone(&barrier);
7001                    std::thread::spawn(move || {
7002                        let mut proc = ImageProcessor::new().unwrap_or_else(|e| {
7003                            panic!("ImageProcessor::new() failed on thread {i}: {e}")
7004                        });
7005
7006                        // All threads wait here until every processor is initialized.
7007                        barrier.wait();
7008
7009                        // Now all 4 hammer the GPU concurrently.
7010                        for round in 0..ROUNDS {
7011                            let src = proc
7012                                .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
7013                                .unwrap_or_else(|e| {
7014                                    panic!("create src failed on thread {i} round {round}: {e}")
7015                                });
7016                            let mut dst = proc
7017                                .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
7018                                .unwrap_or_else(|e| {
7019                                    panic!("create dst failed on thread {i} round {round}: {e}")
7020                                });
7021                            proc.convert(
7022                                &src,
7023                                &mut dst,
7024                                Rotation::None,
7025                                Flip::None,
7026                                Crop::default(),
7027                            )
7028                            .unwrap_or_else(|e| {
7029                                panic!("convert failed on thread {i} round {round}: {e}")
7030                            });
7031                            assert_eq!(dst.width(), Some(64));
7032                            assert_eq!(dst.height(), Some(64));
7033                        }
7034                    })
7035                })
7036                .collect();
7037
7038            for (i, h) in handles.into_iter().enumerate() {
7039                h.join()
7040                    .unwrap_or_else(|e| panic!("thread {i} panicked: {e:?}"));
7041            }
7042
7043            let _ = tx.send(());
7044        });
7045
7046        rx.recv_timeout(TIMEOUT).unwrap_or_else(|_| {
7047            panic!("test_image_processors_concurrent_operations timed out after {TIMEOUT:?}")
7048        });
7049    }
7050}