Skip to main content

edgefirst_image/
lib.rs

1// SPDX-FileCopyrightText: Copyright 2025 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4/*!
5
6## EdgeFirst HAL - Image Converter
7
8The `edgefirst_image` crate is part of the EdgeFirst Hardware Abstraction
9Layer (HAL) and provides functionality for converting images between
10different formats and sizes.  The crate is designed to work with hardware
11acceleration when available, but also provides a CPU-based fallback for
12environments where hardware acceleration is not present or not suitable.
13
14The main features of the `edgefirst_image` crate include:
15- Support for various image formats, including YUYV, RGB, RGBA, and GREY.
16- Support for source crop, destination crop, rotation, and flipping.
17- Image conversion using hardware acceleration (G2D, OpenGL) when available.
18- CPU-based image conversion as a fallback option.
19
20The crate uses [`TensorDyn`] from `edgefirst_tensor` to represent images,
21with [`PixelFormat`] metadata describing the pixel layout. The
22[`ImageProcessor`] struct manages the conversion process, selecting
23the appropriate conversion method based on the available hardware.
24
25## Examples
26
27```rust
28# use edgefirst_image::{ImageProcessor, Rotation, Flip, Crop, ImageProcessorTrait};
29# use edgefirst_codec::{peek_info, ImageDecoder, ImageLoad, DecodeOptions};
30# use edgefirst_tensor::{PixelFormat, DType, Tensor, TensorMemory};
31# fn main() -> Result<(), edgefirst_image::Error> {
32let image = edgefirst_bench::testdata::read("zidane.jpg");
33let opts = DecodeOptions::default().with_format(PixelFormat::Rgba);
34let info = peek_info(&image, &opts).expect("peek");
35let mut src = Tensor::<u8>::image(info.width, info.height, info.format,
36                                   Some(TensorMemory::Mem))?;
37let mut decoder = ImageDecoder::new();
38src.load_image(&mut decoder, &image, &opts).expect("decode");
39let mut converter = ImageProcessor::new()?;
40let mut dst = converter.create_image(640, 480, PixelFormat::Rgb, DType::U8, None)?;
41converter.convert(&src.into(), &mut dst, Rotation::None, Flip::None, Crop::default())?;
42# Ok(())
43# }
44```
45
46## Environment Variables
47The behavior of the `edgefirst_image::ImageProcessor` struct can be influenced by the
48following environment variables:
49- `EDGEFIRST_FORCE_BACKEND`: When set to `cpu`, `g2d`, or `opengl` (case-insensitive),
50  only that single backend is initialized and no fallback chain is used. If the
51  forced backend fails to initialize, an error is returned immediately. This is
52  useful for benchmarking individual backends in isolation. When this variable is
53  set, the `EDGEFIRST_DISABLE_*` variables are ignored.
54- `EDGEFIRST_DISABLE_GL`: If set to `1`, disables the use of OpenGL for image
55  conversion, forcing the use of CPU or other available hardware methods.
56- `EDGEFIRST_DISABLE_G2D`: If set to `1`, disables the use of G2D for image
57  conversion, forcing the use of CPU or other available hardware methods.
58- `EDGEFIRST_DISABLE_CPU`: If set to `1`, disables the use of CPU for image
59  conversion, forcing the use of hardware acceleration methods. If no hardware
60  acceleration methods are available, an error will be returned when attempting
61  to create an `ImageProcessor`.
62
63Additionally the TensorMemory used by default allocations can be controlled using the
64`EDGEFIRST_TENSOR_FORCE_MEM` environment variable. If set to `1`, default tensor memory
65uses system memory. This will disable the use of specialized memory regions for tensors
66and hardware acceleration. However, this will increase the performance of the CPU converter.
67*/
68#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
69
70/// Pitch alignment requirement for DMA-BUF tensors that may be imported as
71/// EGLImages by the GL backend. Mali Valhall (i.MX 95 / G310) rejects
72/// `eglCreateImageKHR` with `EGL_BAD_ALLOC` for any DMA-BUF whose row pitch
73/// is not a multiple of 64 bytes; Vivante GC7000UL (i.MX 8MP) accepts any
74/// pitch so the constant is harmless on that path. 64 is the smallest
75/// alignment that satisfies every embedded ARM GPU we ship to.
76///
77/// Applied automatically inside [`ImageProcessor::create_image`] when the
78/// allocation lands on `TensorMemory::Dma`. External callers that allocate
79/// their own DMA-BUF tensors (e.g. GStreamer plugins, video pipelines) can
80/// use [`align_width_for_gpu_pitch`] to compute a width whose resulting row
81/// stride satisfies this requirement.
82pub const GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES: usize = 64;
83
84/// Round `width` (in pixels) up so the resulting row stride
85/// `width * bpp` is a multiple of [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`]
86/// AND a multiple of `bpp` (so the rounded width is an integer pixel count).
87///
88/// `bpp` must be the per-pixel byte count for the image's primary plane
89/// (e.g. 4 for RGBA8/BGRA8, 3 for RGB888, 1 for Grey/NV12-luma).
90///
91/// External callers — GStreamer plugins, video pipelines, anyone wrapping a
92/// foreign DMA-BUF — should call this when sizing the destination so that
93/// `eglCreateImageKHR` doesn't reject the import on Mali. Pre-aligned widths
94/// (640, 1280, 1920, 3008, 3840 …) round-trip unchanged; misaligned widths
95/// are bumped up to the next valid value.
96///
97/// # Overflow behaviour
98///
99/// All arithmetic is checked. If the alignment computation or the rounded
100/// width would overflow `usize`, the function logs a warning and returns the
101/// original `width` unchanged rather than wrapping or producing a smaller
102/// value. Callers can rely on the returned width being **at least** the
103/// requested width.
104///
105/// `bpp == 0` and `width == 0` short-circuit to return the input unchanged.
106///
107/// # Examples
108///
109/// ```
110/// use edgefirst_image::align_width_for_gpu_pitch;
111///
112/// // RGBA8 (bpp=4): width must round to a multiple of 16 pixels (64-byte stride).
113/// assert_eq!(align_width_for_gpu_pitch(1920, 4), 1920); // already aligned
114/// assert_eq!(align_width_for_gpu_pitch(3004, 4), 3008); // crowd.png case: +4 px
115/// assert_eq!(align_width_for_gpu_pitch(1281, 4), 1296); // +15 px
116///
117/// // RGB888 (bpp=3): width must round to a multiple of 64 pixels (192-byte stride).
118/// assert_eq!(align_width_for_gpu_pitch(640, 3), 640);
119/// assert_eq!(align_width_for_gpu_pitch(641, 3), 704);
120/// ```
121pub fn align_width_for_gpu_pitch(width: usize, bpp: usize) -> usize {
122    if bpp == 0 || width == 0 {
123        return width;
124    }
125
126    // The minimum aligned stride must be a common multiple of both the
127    // GPU's pitch alignment and the per-pixel byte count. Using the LCM
128    // guarantees the rounded stride is an integer multiple of `bpp`, so
129    // converting back to a pixel count is exact.
130    //
131    // Compute the alignment in pixels (`width_alignment`) so we never need
132    // to multiply `width * bpp`, which is the only operation that could
133    // realistically overflow for large caller-supplied widths.
134    let Some(lcm_alignment) = checked_num_integer_lcm(GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES, bpp)
135    else {
136        log::warn!(
137            "align_width_for_gpu_pitch: lcm({GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES}, {bpp}) \
138             overflows usize, returning unaligned width {width}"
139        );
140        return width;
141    };
142    if lcm_alignment == 0 {
143        return width;
144    }
145
146    debug_assert_eq!(lcm_alignment % bpp, 0);
147    let width_alignment = lcm_alignment / bpp;
148    if width_alignment == 0 {
149        return width;
150    }
151
152    let remainder = width % width_alignment;
153    if remainder == 0 {
154        return width;
155    }
156
157    let pad = width_alignment - remainder;
158    match width.checked_add(pad) {
159        Some(aligned) => aligned,
160        None => {
161            log::warn!(
162                "align_width_for_gpu_pitch: width {width} + pad {pad} overflows usize, \
163                 returning unaligned (caller should use a smaller width or pre-aligned size)"
164            );
165            width
166        }
167    }
168}
169
170/// Round `min_pitch_bytes` up to the next multiple of
171/// [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`]. Returns `None` if the rounded
172/// value would overflow `usize`. Returns `Some(0)` for input 0.
173///
174/// Used internally by [`ImageProcessor::create_image`] to compute the
175/// padded row stride for DMA-backed image allocations. External callers
176/// that need pixel-counted alignment (instead of raw byte pitch) should
177/// use [`align_width_for_gpu_pitch`] instead.
178#[cfg(target_os = "linux")]
179pub(crate) fn align_pitch_bytes_to_gpu_alignment(min_pitch_bytes: usize) -> Option<usize> {
180    let alignment = GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES;
181    if min_pitch_bytes == 0 {
182        return Some(0);
183    }
184    let remainder = min_pitch_bytes % alignment;
185    if remainder == 0 {
186        return Some(min_pitch_bytes);
187    }
188    min_pitch_bytes.checked_add(alignment - remainder)
189}
190
191/// Overflow-safe least common multiple. Returns `None` when `(a / gcd) * b`
192/// would wrap.
193fn checked_num_integer_lcm(a: usize, b: usize) -> Option<usize> {
194    if a == 0 || b == 0 {
195        return Some(0);
196    }
197    let g = num_integer_gcd(a, b);
198    // a / g is exact (g divides a by definition) and at most a, so this
199    // division never panics. Only the subsequent multiply can overflow.
200    (a / g).checked_mul(b)
201}
202
203fn num_integer_gcd(a: usize, b: usize) -> usize {
204    if b == 0 {
205        a
206    } else {
207        num_integer_gcd(b, a % b)
208    }
209}
210
211/// Bytes-per-pixel for the primary plane of `format` at element size `elem`.
212/// Returns `None` for formats that don't have a single packed BPP (semi-planar
213/// chroma is handled separately, returning the luma-plane bpp).
214///
215/// External callers can use this together with [`align_width_for_gpu_pitch`]
216/// to size their own DMA-BUFs without having to remember per-format BPPs:
217///
218/// ```
219/// use edgefirst_image::{align_width_for_gpu_pitch, primary_plane_bpp};
220/// use edgefirst_tensor::PixelFormat;
221///
222/// let bpp = primary_plane_bpp(PixelFormat::Rgba, 1).unwrap();
223/// let aligned = align_width_for_gpu_pitch(3004, bpp);
224/// assert_eq!(aligned, 3008);
225/// ```
226pub fn primary_plane_bpp(format: PixelFormat, elem: usize) -> Option<usize> {
227    use edgefirst_tensor::PixelLayout;
228    match format.layout() {
229        PixelLayout::Packed => Some(format.channels() * elem),
230        PixelLayout::Planar => Some(elem),
231        // For NV12/NV16 the luma plane is single-channel so the pitch
232        // matches `elem`; the chroma plane uses the same pitch in bytes
233        // (UV is half-width but two interleaved channels = same pitch).
234        PixelLayout::SemiPlanar => Some(elem),
235        // `PixelLayout` is non-exhaustive — fall through unaligned for
236        // any future variant we don't yet recognise.
237        _ => None,
238    }
239}
240
241/// Return the GPU-aligned pitch in bytes when a DMA-backed image of
242/// `width × fmt` would need row-stride padding, or `None` when the
243/// natural pitch already satisfies `GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`
244/// or the caller has explicitly requested non-DMA memory.
245///
246/// Mali G310 (i.MX 95) rejects `eglCreateImage` from DMA-BUFs whose
247/// `PLANE0_PITCH_EXT` is not a multiple of 64 bytes, surfacing as
248/// `EGL_BAD_ALLOC`. The `load_image_test_helper` test-only helper
249/// in this crate uses this to decide whether to allocate a tensor
250/// with padded row stride before invoking the decode path; production
251/// callers do the equivalent peek → allocate → decode dance themselves
252/// (see crate-level docs).
253#[cfg(all(target_os = "linux", test))]
254pub(crate) fn padded_dma_pitch_for(
255    fmt: PixelFormat,
256    width: usize,
257    memory: &Option<TensorMemory>,
258) -> Option<usize> {
259    // Only pad when the caller explicitly requested DMA, or when they
260    // left memory selection to the allocator AND DMA is actually
261    // available. `Tensor::image_with_stride(..., None)` always routes
262    // through DMA allocation, so treating `None` as "DMA wanted"
263    // unconditionally would convert a normally-working image load into
264    // a hard failure on systems where DMA is unavailable (sandboxed
265    // CI, missing `/dev/dma_heap`, permission-denied containers) —
266    // whereas `Tensor::image(..., None)` would have fallen back to
267    // SHM/Mem there.
268    match memory {
269        Some(TensorMemory::Dma) => {}
270        None if edgefirst_tensor::is_dma_available() => {}
271        _ => return None,
272    }
273    // Padding only applies to packed layouts — `Tensor::image_with_stride`
274    // rejects semi-planar / planar formats, and those take their own
275    // per-plane pitches on import anyway.
276    if fmt.layout() != PixelLayout::Packed {
277        return None;
278    }
279    let bpp = primary_plane_bpp(fmt, 1)?;
280    let natural = width.checked_mul(bpp)?;
281    let aligned = align_pitch_bytes_to_gpu_alignment(natural)?;
282    if aligned > natural {
283        Some(aligned)
284    } else {
285        None
286    }
287}
288
289pub use cpu::CPUProcessor;
290pub use edgefirst_codec as codec;
291
292#[cfg(test)]
293use edgefirst_decoder::ProtoLayout;
294use edgefirst_decoder::{DetectBox, ProtoData, Segmentation};
295#[cfg(any(test, all(target_os = "linux", feature = "opengl")))]
296use edgefirst_tensor::Tensor;
297use edgefirst_tensor::{
298    DType, PixelFormat, PixelLayout, TensorDyn, TensorMemory, TensorTrait as _,
299};
300use enum_dispatch::enum_dispatch;
301pub use error::{Error, Result};
302#[cfg(target_os = "linux")]
303pub use g2d::G2DProcessor;
304#[cfg(target_os = "linux")]
305#[cfg(feature = "opengl")]
306pub use opengl_headless::GLProcessorThreaded;
307#[cfg(target_os = "linux")]
308#[cfg(feature = "opengl")]
309pub use opengl_headless::Int8InterpolationMode;
310#[cfg(target_os = "linux")]
311#[cfg(feature = "opengl")]
312pub use opengl_headless::{probe_egl_displays, EglDisplayInfo, EglDisplayKind};
313use std::{fmt::Display, time::Instant};
314
315mod cpu;
316mod error;
317mod g2d;
318#[path = "gl/mod.rs"]
319mod opengl_headless;
320
321// Use `edgefirst_tensor::PixelFormat` variants (Rgb, Rgba, Grey, etc.) and
322// `TensorDyn` / `Tensor<u8>` with `.format()` metadata instead.
323
324#[derive(Debug, Clone, Copy, PartialEq, Eq)]
325pub enum Rotation {
326    None = 0,
327    Clockwise90 = 1,
328    Rotate180 = 2,
329    CounterClockwise90 = 3,
330}
331impl Rotation {
332    /// Creates a Rotation enum from an angle in degrees. The angle must be a
333    /// multiple of 90.
334    ///
335    /// # Panics
336    /// Panics if the angle is not a multiple of 90.
337    ///
338    /// # Examples
339    /// ```rust
340    /// # use edgefirst_image::Rotation;
341    /// let rotation = Rotation::from_degrees_clockwise(270);
342    /// assert_eq!(rotation, Rotation::CounterClockwise90);
343    /// ```
344    pub fn from_degrees_clockwise(angle: usize) -> Rotation {
345        match angle.rem_euclid(360) {
346            0 => Rotation::None,
347            90 => Rotation::Clockwise90,
348            180 => Rotation::Rotate180,
349            270 => Rotation::CounterClockwise90,
350            _ => panic!("rotation angle is not a multiple of 90"),
351        }
352    }
353}
354
355#[derive(Debug, Clone, Copy, PartialEq, Eq)]
356pub enum Flip {
357    None = 0,
358    Vertical = 1,
359    Horizontal = 2,
360}
361
362/// Controls how the color palette index is chosen for each detected object.
363#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
364pub enum ColorMode {
365    /// Color is chosen by object class label (`det.label`). Default.
366    ///
367    /// Preserves backward compatibility and is correct for semantic
368    /// segmentation where colors carry class meaning.
369    #[default]
370    Class,
371    /// Color is chosen by instance order (loop index, zero-based).
372    ///
373    /// Each detected object gets a unique color regardless of class,
374    /// useful for instance segmentation.
375    Instance,
376    /// Color is chosen by track ID (future use; currently behaves like
377    /// [`Instance`](Self::Instance)).
378    Track,
379}
380
381impl ColorMode {
382    /// Return the palette index for a detection given its loop index and label.
383    #[inline]
384    pub fn index(self, idx: usize, label: usize) -> usize {
385        match self {
386            ColorMode::Class => label,
387            ColorMode::Instance | ColorMode::Track => idx,
388        }
389    }
390}
391
392/// Controls the resolution and coordinate frame of masks produced by
393/// [`ImageProcessor::materialize_masks`].
394///
395/// - [`Proto`](Self::Proto) returns per-detection tiles at proto-plane
396///   resolution (e.g. 48×32 u8 for a typical COCO bbox on a 160×160 proto
397///   plane). This is the historical behavior of `materialize_masks` and the
398///   fastest path because no upsample runs inside HAL. Mask values are
399///   continuous sigmoid output quantized to `uint8 [0, 255]`.
400/// - [`Scaled`](Self::Scaled) returns per-detection tiles at caller-specified
401///   pixel resolution by upsampling the full proto plane once and cropping by
402///   bbox after sigmoid. The upsample uses bilinear interpolation with
403///   edge-clamp sampling — semantically equivalent to Ultralytics'
404///   `process_masks_retina` reference. When a `letterbox` is also passed to
405///   [`materialize_masks`], the inverse letterbox transform is applied during
406///   the upsample so mask pixels land in original-content coordinates
407///   (drop-in for overlay on the original image). Mask values are binary
408///   `uint8 {0, 255}` after thresholding sigmoid > 0.5 — interchangeable
409///   with `Proto` output via the same `> 127` test.
410///
411/// [`materialize_masks`]: ImageProcessor::materialize_masks
412#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
413pub enum MaskResolution {
414    /// Per-detection tile at proto-plane resolution (default).
415    #[default]
416    Proto,
417    /// Per-detection tile at `(width, height)` pixel resolution in the
418    /// coordinate frame determined by the `letterbox` parameter of
419    /// [`ImageProcessor::materialize_masks`].
420    Scaled {
421        /// Target pixel width of the output coordinate frame.
422        width: u32,
423        /// Target pixel height of the output coordinate frame.
424        height: u32,
425    },
426}
427
428/// Options for mask overlay rendering.
429///
430/// Controls how segmentation masks are composited onto the destination image:
431/// - `background`: when set, the background image is drawn first and masks
432///   are composited over it (result written to `dst`). When `None`, `dst` is
433///   cleared to `0x00000000` (fully transparent) before masks are drawn.
434///   **`dst` is always fully overwritten — its prior contents are never
435///   preserved.** Callers who used to pre-load an image into `dst` before
436///   calling `draw_decoded_masks` / `draw_proto_masks` must now supply that
437///   image via `background` instead (behaviour changed in v0.16.4).
438/// - `opacity`: scales the alpha of rendered mask colors. `1.0` (default)
439///   preserves the class color's alpha unchanged; `0.5` makes masks
440///   semi-transparent.
441/// - `color_mode`: controls whether colors are assigned by class label,
442///   instance index, or track ID. Defaults to [`ColorMode::Class`].
443#[derive(Debug, Clone, Copy)]
444pub struct MaskOverlay<'a> {
445    /// Compositing source image. Must have the same dimensions and pixel
446    /// format as `dst`. When `Some`, the output is `background + masks`.
447    /// When `None`, `dst` is cleared to `0x00000000` before masks are drawn.
448    pub background: Option<&'a TensorDyn>,
449    pub opacity: f32,
450    /// Normalized letterbox region `[xmin, ymin, xmax, ymax]` in model-input
451    /// space that contains actual image content (the rest is padding).
452    ///
453    /// When set, bounding boxes and mask coordinates from the decoder (which
454    /// are in model-input normalized space) are mapped back to the original
455    /// image coordinate space before rendering.
456    ///
457    /// Use [`with_letterbox_crop`](Self::with_letterbox_crop) to compute this
458    /// from the [`Crop`] that was used in the model input [`convert`](crate::ImageProcessorTrait::convert) call.
459    pub letterbox: Option<[f32; 4]>,
460    pub color_mode: ColorMode,
461}
462
463impl Default for MaskOverlay<'_> {
464    fn default() -> Self {
465        Self {
466            background: None,
467            opacity: 1.0,
468            letterbox: None,
469            color_mode: ColorMode::Class,
470        }
471    }
472}
473
474impl<'a> MaskOverlay<'a> {
475    pub fn new() -> Self {
476        Self::default()
477    }
478
479    /// Set the compositing source image.
480    ///
481    /// `bg` must have the same dimensions and pixel format as the `dst` passed
482    /// to [`draw_decoded_masks`](crate::ImageProcessorTrait::draw_decoded_masks) /
483    /// [`draw_proto_masks`](crate::ImageProcessorTrait::draw_proto_masks).
484    /// The output will be `bg + masks`. Without a background, `dst` is cleared
485    /// to `0x00000000`.
486    pub fn with_background(mut self, bg: &'a TensorDyn) -> Self {
487        self.background = Some(bg);
488        self
489    }
490
491    pub fn with_opacity(mut self, opacity: f32) -> Self {
492        self.opacity = opacity.clamp(0.0, 1.0);
493        self
494    }
495
496    pub fn with_color_mode(mut self, mode: ColorMode) -> Self {
497        self.color_mode = mode;
498        self
499    }
500
501    /// Set the letterbox transform from the [`Crop`] used when preparing the
502    /// model input, so that bounding boxes and masks are correctly mapped back
503    /// to the original image coordinate space during rendering.
504    ///
505    /// Pass the same `crop` that was given to
506    /// [`convert`](crate::ImageProcessorTrait::convert) along with the model
507    /// input dimensions (`model_w` × `model_h`).
508    ///
509    /// Has no effect when `crop.dst_rect` is `None` (no letterbox applied).
510    pub fn with_letterbox_crop(mut self, crop: &Crop, model_w: usize, model_h: usize) -> Self {
511        if let Some(r) = crop.dst_rect {
512            self.letterbox = Some([
513                r.left as f32 / model_w as f32,
514                r.top as f32 / model_h as f32,
515                (r.left + r.width) as f32 / model_w as f32,
516                (r.top + r.height) as f32 / model_h as f32,
517            ]);
518        }
519        self
520    }
521}
522
523/// Apply the inverse letterbox transform to a bounding box.
524///
525/// `letterbox` is `[lx0, ly0, lx1, ly1]` — the normalized region of the model
526/// input that contains actual image content (output of
527/// [`MaskOverlay::with_letterbox_crop`]).
528///
529/// Converts model-input-normalized coords to output-image-normalized coords,
530/// clamped to `[0.0, 1.0]`. Also canonicalises the bbox (ensures xmin ≤ xmax).
531#[inline]
532fn unletter_bbox(bbox: DetectBox, lb: [f32; 4]) -> DetectBox {
533    let b = bbox.bbox.to_canonical();
534    let [lx0, ly0, lx1, ly1] = lb;
535    let inv_w = if lx1 > lx0 { 1.0 / (lx1 - lx0) } else { 1.0 };
536    let inv_h = if ly1 > ly0 { 1.0 / (ly1 - ly0) } else { 1.0 };
537    DetectBox {
538        bbox: edgefirst_decoder::BoundingBox {
539            xmin: ((b.xmin - lx0) * inv_w).clamp(0.0, 1.0),
540            ymin: ((b.ymin - ly0) * inv_h).clamp(0.0, 1.0),
541            xmax: ((b.xmax - lx0) * inv_w).clamp(0.0, 1.0),
542            ymax: ((b.ymax - ly0) * inv_h).clamp(0.0, 1.0),
543        },
544        ..bbox
545    }
546}
547
548#[derive(Debug, Clone, Copy, PartialEq, Eq)]
549pub struct Crop {
550    pub src_rect: Option<Rect>,
551    pub dst_rect: Option<Rect>,
552    pub dst_color: Option<[u8; 4]>,
553}
554
555impl Default for Crop {
556    fn default() -> Self {
557        Crop::new()
558    }
559}
560impl Crop {
561    // Creates a new Crop with default values (no cropping).
562    pub fn new() -> Self {
563        Crop {
564            src_rect: None,
565            dst_rect: None,
566            dst_color: None,
567        }
568    }
569
570    // Sets the source rectangle for cropping.
571    pub fn with_src_rect(mut self, src_rect: Option<Rect>) -> Self {
572        self.src_rect = src_rect;
573        self
574    }
575
576    // Sets the destination rectangle for cropping.
577    pub fn with_dst_rect(mut self, dst_rect: Option<Rect>) -> Self {
578        self.dst_rect = dst_rect;
579        self
580    }
581
582    // Sets the destination color for areas outside the cropped region.
583    pub fn with_dst_color(mut self, dst_color: Option<[u8; 4]>) -> Self {
584        self.dst_color = dst_color;
585        self
586    }
587
588    // Creates a new Crop with no cropping.
589    pub fn no_crop() -> Self {
590        Crop::new()
591    }
592
593    /// Validate crop rectangles against explicit dimensions.
594    pub(crate) fn check_crop_dims(
595        &self,
596        src_w: usize,
597        src_h: usize,
598        dst_w: usize,
599        dst_h: usize,
600    ) -> Result<(), Error> {
601        let src_ok = self
602            .src_rect
603            .is_none_or(|r| r.left + r.width <= src_w && r.top + r.height <= src_h);
604        let dst_ok = self
605            .dst_rect
606            .is_none_or(|r| r.left + r.width <= dst_w && r.top + r.height <= dst_h);
607        match (src_ok, dst_ok) {
608            (true, true) => Ok(()),
609            (true, false) => Err(Error::CropInvalid(format!(
610                "Dest crop invalid: {:?}",
611                self.dst_rect
612            ))),
613            (false, true) => Err(Error::CropInvalid(format!(
614                "Src crop invalid: {:?}",
615                self.src_rect
616            ))),
617            (false, false) => Err(Error::CropInvalid(format!(
618                "Dest and Src crop invalid: {:?} {:?}",
619                self.dst_rect, self.src_rect
620            ))),
621        }
622    }
623
624    /// Validate crop rectangles against TensorDyn source and destination.
625    pub fn check_crop_dyn(
626        &self,
627        src: &edgefirst_tensor::TensorDyn,
628        dst: &edgefirst_tensor::TensorDyn,
629    ) -> Result<(), Error> {
630        self.check_crop_dims(
631            src.width().unwrap_or(0),
632            src.height().unwrap_or(0),
633            dst.width().unwrap_or(0),
634            dst.height().unwrap_or(0),
635        )
636    }
637}
638
639#[derive(Debug, Clone, Copy, PartialEq, Eq)]
640pub struct Rect {
641    pub left: usize,
642    pub top: usize,
643    pub width: usize,
644    pub height: usize,
645}
646
647impl Rect {
648    // Creates a new Rect with the specified left, top, width, and height.
649    pub fn new(left: usize, top: usize, width: usize, height: usize) -> Self {
650        Self {
651            left,
652            top,
653            width,
654            height,
655        }
656    }
657
658    // Checks if the rectangle is valid for the given TensorDyn image.
659    pub fn check_rect_dyn(&self, image: &TensorDyn) -> bool {
660        let w = image.width().unwrap_or(0);
661        let h = image.height().unwrap_or(0);
662        self.left + self.width <= w && self.top + self.height <= h
663    }
664}
665
666#[enum_dispatch(ImageProcessor)]
667pub trait ImageProcessorTrait {
668    /// Converts the source image to the destination image format and size. The
669    /// image is cropped first, then flipped, then rotated
670    ///
671    /// # Arguments
672    ///
673    /// * `dst` - The destination image to be converted to.
674    /// * `src` - The source image to convert from.
675    /// * `rotation` - The rotation to apply to the destination image.
676    /// * `flip` - Flips the image
677    /// * `crop` - An optional rectangle specifying the area to crop from the
678    ///   source image
679    ///
680    /// # Returns
681    ///
682    /// A `Result` indicating success or failure of the conversion.
683    fn convert(
684        &mut self,
685        src: &TensorDyn,
686        dst: &mut TensorDyn,
687        rotation: Rotation,
688        flip: Flip,
689        crop: Crop,
690    ) -> Result<()>;
691
692    /// Draw pre-decoded detection boxes and segmentation masks onto `dst`.
693    ///
694    /// Supports two segmentation modes based on the mask channel count:
695    /// - **Instance segmentation** (`C=1`): one `Segmentation` per detection,
696    ///   `segmentation` and `detect` are zipped.
697    /// - **Semantic segmentation** (`C>1`): a single `Segmentation` covering
698    ///   all classes; only the first element is used.
699    ///
700    /// # Format requirements
701    ///
702    /// - CPU backend: `dst` must be `RGBA` or `RGB`.
703    /// - OpenGL backend: `dst` must be `RGBA`, `BGRA`, or `RGB`.
704    /// - G2D backend: only produces the base frame (empty detections);
705    ///   returns `NotImplemented` when any detection or segmentation is
706    ///   supplied.
707    ///
708    /// # Output contract
709    ///
710    /// This function always fully writes `dst` — it never relies on the
711    /// caller having pre-cleared the destination. The four cases are:
712    ///
713    /// | detections | background | output                              |
714    /// |------------|------------|-------------------------------------|
715    /// | none       | none       | dst cleared to `0x00000000`         |
716    /// | none       | set        | dst ← background                    |
717    /// | set        | none       | masks drawn over cleared dst        |
718    /// | set        | set        | masks drawn over background         |
719    ///
720    /// Each backend implements this with its native primitives: G2D uses
721    /// `g2d_clear` / `g2d_blit`, OpenGL uses `glClear` / DMA-BUF GPU blit
722    /// plus the mask program, and CPU uses direct buffer fill / memcpy as
723    /// the terminal fallback. CPU-memcpy of DMA buffers is avoided on the
724    /// accelerated paths.
725    ///
726    /// An empty `segmentation` slice is valid — only bounding boxes are drawn.
727    ///
728    /// `overlay` controls compositing: `background` is the compositing source
729    /// (must match `dst` in size and format); `opacity` scales mask alpha.
730    ///
731    /// # Buffer aliasing
732    ///
733    /// `dst` and `overlay.background` must reference **distinct underlying
734    /// buffers**. An aliased pair returns [`Error::AliasedBuffers`] without
735    /// dispatching to any backend — the GL path would otherwise read and
736    /// write the same texture in a single draw, which is undefined behaviour
737    /// on most drivers. Aliasing is detected via
738    /// [`TensorDyn::aliases`](edgefirst_tensor::TensorDyn::aliases), which
739    /// catches both shared-allocation clones and separate imports over the
740    /// same dmabuf fd.
741    ///
742    /// # Migration from v0.16.3 and earlier
743    ///
744    /// Prior to v0.16.4 the call silently preserved `dst`'s contents on empty
745    /// detections. That invariant no longer holds — `dst` is always fully
746    /// written. Callers who pre-loaded an image into `dst` before calling this
747    /// function must now pass that image via `overlay.background` instead.
748    fn draw_decoded_masks(
749        &mut self,
750        dst: &mut TensorDyn,
751        detect: &[DetectBox],
752        segmentation: &[Segmentation],
753        overlay: MaskOverlay<'_>,
754    ) -> Result<()>;
755
756    /// Draw masks from proto data onto image (fused decode+draw).
757    ///
758    /// For YOLO segmentation models, this avoids materializing intermediate
759    /// `Array3<u8>` masks. The `ProtoData` contains mask coefficients and the
760    /// prototype tensor; the renderer computes `mask_coeff @ protos` directly
761    /// at the output resolution using bilinear sampling.
762    ///
763    /// `detect` and `proto_data.mask_coefficients` must have the same length
764    /// (enforced by zip — excess entries are silently ignored). An empty
765    /// `detect` slice is valid and produces the base frame — cleared or
766    /// background-blitted — via the selected backend's native primitive.
767    ///
768    /// # Format requirements and output contract
769    ///
770    /// Same as [`draw_decoded_masks`](Self::draw_decoded_masks), including
771    /// the "always fully writes dst" guarantee across all four
772    /// detection/background combinations.
773    ///
774    /// `overlay` controls compositing — see [`draw_decoded_masks`](Self::draw_decoded_masks).
775    fn draw_proto_masks(
776        &mut self,
777        dst: &mut TensorDyn,
778        detect: &[DetectBox],
779        proto_data: &ProtoData,
780        overlay: MaskOverlay<'_>,
781    ) -> Result<()>;
782
783    /// Sets the colors used for rendering segmentation masks. Up to 20 colors
784    /// can be set.
785    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()>;
786}
787
788/// Configuration for [`ImageProcessor`] construction.
789///
790/// Use with [`ImageProcessor::with_config`] to override the default EGL
791/// display auto-detection and backend selection. The default configuration
792/// preserves the existing auto-detection behaviour.
793#[derive(Debug, Clone, Default)]
794pub struct ImageProcessorConfig {
795    /// Force OpenGL to use this EGL display type instead of auto-detecting.
796    ///
797    /// When `None`, the processor probes displays in priority order: GBM,
798    /// PlatformDevice, Default. Use [`probe_egl_displays`] to discover
799    /// which displays are available on the current system.
800    ///
801    /// Ignored when `EDGEFIRST_DISABLE_GL=1` is set.
802    #[cfg(target_os = "linux")]
803    #[cfg(feature = "opengl")]
804    pub egl_display: Option<EglDisplayKind>,
805
806    /// Preferred compute backend.
807    ///
808    /// When set to a specific backend (not [`ComputeBackend::Auto`]), the
809    /// processor initializes that backend with no fallback — returns an error if the conversion is not supported.
810    /// This takes precedence over `EDGEFIRST_FORCE_BACKEND` and the
811    /// `EDGEFIRST_DISABLE_*` environment variables.
812    ///
813    /// - [`ComputeBackend::OpenGl`]: init OpenGL + CPU, skip G2D
814    /// - [`ComputeBackend::G2d`]: init G2D + CPU, skip OpenGL
815    /// - [`ComputeBackend::Cpu`]: init CPU only
816    /// - [`ComputeBackend::Auto`]: existing env-var-driven selection
817    pub backend: ComputeBackend,
818}
819
820/// Compute backend selection for [`ImageProcessor`].
821///
822/// Use with [`ImageProcessorConfig::backend`] to select which backend the
823/// processor should prefer. When a specific backend is selected, the
824/// processor initializes that backend plus CPU as a fallback. When `Auto`
825/// is used, the existing environment-variable-driven selection applies.
826#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
827pub enum ComputeBackend {
828    /// Auto-detect based on available hardware and environment variables.
829    #[default]
830    Auto,
831    /// CPU-only processing (no hardware acceleration).
832    Cpu,
833    /// Prefer G2D hardware blitter (+ CPU fallback).
834    G2d,
835    /// Prefer OpenGL ES (+ CPU fallback).
836    OpenGl,
837}
838
839/// Backend forced via the `EDGEFIRST_FORCE_BACKEND` environment variable
840/// or [`ImageProcessorConfig::backend`].
841///
842/// When set, the [`ImageProcessor`] only initializes and dispatches to the
843/// selected backend — no fallback chain is used.
844#[derive(Debug, Clone, Copy, PartialEq, Eq)]
845pub(crate) enum ForcedBackend {
846    Cpu,
847    G2d,
848    OpenGl,
849}
850
851/// Image converter that uses available hardware acceleration or CPU as a
852/// fallback.
853#[derive(Debug)]
854pub struct ImageProcessor {
855    /// CPU-based image converter as a fallback. This is only None if the
856    /// EDGEFIRST_DISABLE_CPU environment variable is set.
857    pub cpu: Option<CPUProcessor>,
858
859    #[cfg(target_os = "linux")]
860    /// G2D-based image converter for Linux systems. This is only available if
861    /// the EDGEFIRST_DISABLE_G2D environment variable is not set and libg2d.so
862    /// is available.
863    pub g2d: Option<G2DProcessor>,
864    #[cfg(target_os = "linux")]
865    #[cfg(feature = "opengl")]
866    /// OpenGL-based image converter for Linux systems. This is only available
867    /// if the EDGEFIRST_DISABLE_GL environment variable is not set and OpenGL
868    /// ES is available.
869    pub opengl: Option<GLProcessorThreaded>,
870
871    /// When set, only the specified backend is used — no fallback chain.
872    pub(crate) forced_backend: Option<ForcedBackend>,
873}
874
875unsafe impl Send for ImageProcessor {}
876unsafe impl Sync for ImageProcessor {}
877
878impl ImageProcessor {
879    /// Creates a new `ImageProcessor` instance, initializing available
880    /// hardware converters based on the system capabilities and environment
881    /// variables.
882    ///
883    /// # Examples
884    /// ```rust,no_run
885    /// # use edgefirst_image::{ImageProcessor, Rotation, Flip, Crop, ImageProcessorTrait};
886    /// # use edgefirst_codec::{peek_info, ImageDecoder, ImageLoad, DecodeOptions};
887    /// # use edgefirst_tensor::{PixelFormat, DType, Tensor, TensorMemory};
888    /// # fn main() -> Result<(), edgefirst_image::Error> {
889    /// let image = std::fs::read("zidane.jpg")?;
890    /// let opts = DecodeOptions::default().with_format(PixelFormat::Rgba);
891    /// let info = peek_info(&image, &opts).expect("peek");
892    /// let mut src = Tensor::<u8>::image(info.width, info.height, info.format,
893    ///                                    Some(TensorMemory::Mem))?;
894    /// let mut decoder = ImageDecoder::new();
895    /// src.load_image(&mut decoder, &image, &opts).expect("decode");
896    /// let mut converter = ImageProcessor::new()?;
897    /// let mut dst = converter.create_image(640, 480, PixelFormat::Rgb, DType::U8, None)?;
898    /// converter.convert(&src.into(), &mut dst, Rotation::None, Flip::None, Crop::default())?;
899    /// # Ok(())
900    /// # }
901    /// ```
902    pub fn new() -> Result<Self> {
903        Self::with_config(ImageProcessorConfig::default())
904    }
905
906    /// Creates a new `ImageProcessor` with the given configuration.
907    ///
908    /// When [`ImageProcessorConfig::backend`] is set to a specific backend,
909    /// environment variables are ignored and the processor initializes the
910    /// requested backend plus CPU as a fallback.
911    ///
912    /// When `Auto`, the existing `EDGEFIRST_FORCE_BACKEND` and
913    /// `EDGEFIRST_DISABLE_*` environment variables apply.
914    #[allow(unused_variables)]
915    pub fn with_config(config: ImageProcessorConfig) -> Result<Self> {
916        // ── Config-driven backend selection ──────────────────────────
917        // When the caller explicitly requests a backend via the config,
918        // skip all environment variable logic.
919        match config.backend {
920            ComputeBackend::Cpu => {
921                log::info!("ComputeBackend::Cpu — CPU only");
922                return Ok(Self {
923                    cpu: Some(CPUProcessor::new()),
924                    #[cfg(target_os = "linux")]
925                    g2d: None,
926                    #[cfg(target_os = "linux")]
927                    #[cfg(feature = "opengl")]
928                    opengl: None,
929                    forced_backend: None,
930                });
931            }
932            ComputeBackend::G2d => {
933                log::info!("ComputeBackend::G2d — G2D + CPU fallback");
934                #[cfg(target_os = "linux")]
935                {
936                    let g2d = match G2DProcessor::new() {
937                        Ok(g) => Some(g),
938                        Err(e) => {
939                            log::warn!("G2D requested but failed to initialize: {e:?}");
940                            None
941                        }
942                    };
943                    return Ok(Self {
944                        cpu: Some(CPUProcessor::new()),
945                        g2d,
946                        #[cfg(feature = "opengl")]
947                        opengl: None,
948                        forced_backend: None,
949                    });
950                }
951                #[cfg(not(target_os = "linux"))]
952                {
953                    log::warn!("G2D requested but not available on this platform, using CPU");
954                    return Ok(Self {
955                        cpu: Some(CPUProcessor::new()),
956                        forced_backend: None,
957                    });
958                }
959            }
960            ComputeBackend::OpenGl => {
961                log::info!("ComputeBackend::OpenGl — OpenGL + CPU fallback");
962                #[cfg(target_os = "linux")]
963                {
964                    #[cfg(feature = "opengl")]
965                    let opengl = match GLProcessorThreaded::new(config.egl_display) {
966                        Ok(gl) => Some(gl),
967                        Err(e) => {
968                            log::warn!("OpenGL requested but failed to initialize: {e:?}");
969                            None
970                        }
971                    };
972                    return Ok(Self {
973                        cpu: Some(CPUProcessor::new()),
974                        g2d: None,
975                        #[cfg(feature = "opengl")]
976                        opengl,
977                        forced_backend: None,
978                    });
979                }
980                #[cfg(not(target_os = "linux"))]
981                {
982                    log::warn!("OpenGL requested but not available on this platform, using CPU");
983                    return Ok(Self {
984                        cpu: Some(CPUProcessor::new()),
985                        forced_backend: None,
986                    });
987                }
988            }
989            ComputeBackend::Auto => { /* fall through to env-var logic below */ }
990        }
991
992        // ── EDGEFIRST_FORCE_BACKEND ──────────────────────────────────
993        // When set, only the requested backend is initialised and no
994        // fallback chain is used. Accepted values (case-insensitive):
995        //   "cpu", "g2d", "opengl"
996        if let Ok(val) = std::env::var("EDGEFIRST_FORCE_BACKEND") {
997            let val_lower = val.to_lowercase();
998            let forced = match val_lower.as_str() {
999                "cpu" => ForcedBackend::Cpu,
1000                "g2d" => ForcedBackend::G2d,
1001                "opengl" => ForcedBackend::OpenGl,
1002                other => {
1003                    return Err(Error::ForcedBackendUnavailable(format!(
1004                        "unknown EDGEFIRST_FORCE_BACKEND value: {other:?} (expected cpu, g2d, or opengl)"
1005                    )));
1006                }
1007            };
1008
1009            log::info!("EDGEFIRST_FORCE_BACKEND={val} — only initializing {val_lower} backend");
1010
1011            return match forced {
1012                ForcedBackend::Cpu => Ok(Self {
1013                    cpu: Some(CPUProcessor::new()),
1014                    #[cfg(target_os = "linux")]
1015                    g2d: None,
1016                    #[cfg(target_os = "linux")]
1017                    #[cfg(feature = "opengl")]
1018                    opengl: None,
1019                    forced_backend: Some(ForcedBackend::Cpu),
1020                }),
1021                ForcedBackend::G2d => {
1022                    #[cfg(target_os = "linux")]
1023                    {
1024                        let g2d = G2DProcessor::new().map_err(|e| {
1025                            Error::ForcedBackendUnavailable(format!(
1026                                "g2d forced but failed to initialize: {e:?}"
1027                            ))
1028                        })?;
1029                        Ok(Self {
1030                            cpu: None,
1031                            g2d: Some(g2d),
1032                            #[cfg(feature = "opengl")]
1033                            opengl: None,
1034                            forced_backend: Some(ForcedBackend::G2d),
1035                        })
1036                    }
1037                    #[cfg(not(target_os = "linux"))]
1038                    {
1039                        Err(Error::ForcedBackendUnavailable(
1040                            "g2d backend is only available on Linux".into(),
1041                        ))
1042                    }
1043                }
1044                ForcedBackend::OpenGl => {
1045                    #[cfg(target_os = "linux")]
1046                    #[cfg(feature = "opengl")]
1047                    {
1048                        let opengl = GLProcessorThreaded::new(config.egl_display).map_err(|e| {
1049                            Error::ForcedBackendUnavailable(format!(
1050                                "opengl forced but failed to initialize: {e:?}"
1051                            ))
1052                        })?;
1053                        Ok(Self {
1054                            cpu: None,
1055                            g2d: None,
1056                            opengl: Some(opengl),
1057                            forced_backend: Some(ForcedBackend::OpenGl),
1058                        })
1059                    }
1060                    #[cfg(not(all(target_os = "linux", feature = "opengl")))]
1061                    {
1062                        Err(Error::ForcedBackendUnavailable(
1063                            "opengl backend requires Linux with the 'opengl' feature enabled"
1064                                .into(),
1065                        ))
1066                    }
1067                }
1068            };
1069        }
1070
1071        // ── Existing DISABLE logic (unchanged) ──────────────────────
1072        #[cfg(target_os = "linux")]
1073        let g2d = if std::env::var("EDGEFIRST_DISABLE_G2D")
1074            .map(|x| x != "0" && x.to_lowercase() != "false")
1075            .unwrap_or(false)
1076        {
1077            log::debug!("EDGEFIRST_DISABLE_G2D is set");
1078            None
1079        } else {
1080            match G2DProcessor::new() {
1081                Ok(g2d_converter) => Some(g2d_converter),
1082                Err(err) => {
1083                    log::warn!("Failed to initialize G2D converter: {err:?}");
1084                    None
1085                }
1086            }
1087        };
1088
1089        #[cfg(target_os = "linux")]
1090        #[cfg(feature = "opengl")]
1091        let opengl = if std::env::var("EDGEFIRST_DISABLE_GL")
1092            .map(|x| x != "0" && x.to_lowercase() != "false")
1093            .unwrap_or(false)
1094        {
1095            log::debug!("EDGEFIRST_DISABLE_GL is set");
1096            None
1097        } else {
1098            match GLProcessorThreaded::new(config.egl_display) {
1099                Ok(gl_converter) => Some(gl_converter),
1100                Err(err) => {
1101                    log::warn!("Failed to initialize GL converter: {err:?}");
1102                    None
1103                }
1104            }
1105        };
1106
1107        let cpu = if std::env::var("EDGEFIRST_DISABLE_CPU")
1108            .map(|x| x != "0" && x.to_lowercase() != "false")
1109            .unwrap_or(false)
1110        {
1111            log::debug!("EDGEFIRST_DISABLE_CPU is set");
1112            None
1113        } else {
1114            Some(CPUProcessor::new())
1115        };
1116        Ok(Self {
1117            cpu,
1118            #[cfg(target_os = "linux")]
1119            g2d,
1120            #[cfg(target_os = "linux")]
1121            #[cfg(feature = "opengl")]
1122            opengl,
1123            forced_backend: None,
1124        })
1125    }
1126
1127    /// Sets the interpolation mode for int8 proto textures on the OpenGL
1128    /// backend. No-op if OpenGL is not available.
1129    #[cfg(target_os = "linux")]
1130    #[cfg(feature = "opengl")]
1131    pub fn set_int8_interpolation_mode(&mut self, mode: Int8InterpolationMode) -> Result<()> {
1132        if let Some(ref mut gl) = self.opengl {
1133            gl.set_int8_interpolation_mode(mode)?;
1134        }
1135        Ok(())
1136    }
1137
1138    /// Create a [`TensorDyn`] image with the best available memory backend.
1139    ///
1140    /// Priority: DMA-buf → PBO (byte-sized types: u8, i8) → system memory.
1141    ///
1142    /// Use this method instead of [`TensorDyn::image()`] when the tensor will
1143    /// be used with [`ImageProcessor::convert()`]. It selects the optimal
1144    /// memory backing (including PBO for GPU zero-copy) which direct
1145    /// allocation cannot achieve.
1146    ///
1147    /// This method is on [`ImageProcessor`] rather than [`ImageProcessorTrait`]
1148    /// because optimal allocation requires knowledge of the active compute
1149    /// backends (e.g. the GL context handle for PBO allocation). Individual
1150    /// backend implementations ([`CPUProcessor`], etc.) do not have this
1151    /// cross-backend visibility.
1152    ///
1153    /// # Arguments
1154    ///
1155    /// * `width` - Image width in pixels
1156    /// * `height` - Image height in pixels
1157    /// * `format` - Pixel format
1158    /// * `dtype` - Element data type (e.g. `DType::U8`, `DType::I8`)
1159    /// * `memory` - Optional memory type override; when `None`, the best
1160    ///   available backend is selected automatically.
1161    ///
1162    /// # Returns
1163    ///
1164    /// A [`TensorDyn`] backed by the highest-performance memory type
1165    /// available on this system.
1166    ///
1167    /// # Pitch alignment for DMA-backed allocations
1168    ///
1169    /// DMA-BUF imports into the GL backend (Mali Valhall on i.MX 95
1170    /// specifically) require every row pitch to be a multiple of
1171    /// [`GPU_DMA_BUF_PITCH_ALIGNMENT_BYTES`] (currently 64). When this
1172    /// method lands on `TensorMemory::Dma`, the underlying allocation is
1173    /// silently padded so the row stride satisfies that requirement.
1174    ///
1175    /// **The user-requested `width` is preserved** — `tensor.width()`
1176    /// returns the same value you passed in. The padding is carried by
1177    /// [`TensorDyn::row_stride`] / `effective_row_stride()`, which the
1178    /// GL backend reads when importing the buffer as an EGLImage.
1179    /// Callers that compute byte offsets from the tensor must use the
1180    /// stride, not `width × bytes_per_pixel`; the CPU mapping spans the
1181    /// full `stride × height` bytes.
1182    ///
1183    /// Pre-aligned widths (640, 1280, 1920, 3008, 3840 …) allocate
1184    /// exactly `width × bpp × height` bytes with no padding. PBO and
1185    /// Mem fallbacks never pad — they don't go through EGLImage import.
1186    ///
1187    /// See also [`align_width_for_gpu_pitch`] for an advisory helper
1188    /// that external callers (GStreamer plugins, video pipelines) can
1189    /// use to size their own DMA-BUFs for GL compatibility.
1190    ///
1191    /// # Errors
1192    ///
1193    /// Returns an error if all allocation strategies fail.
1194    pub fn create_image(
1195        &self,
1196        width: usize,
1197        height: usize,
1198        format: PixelFormat,
1199        dtype: DType,
1200        memory: Option<TensorMemory>,
1201    ) -> Result<TensorDyn> {
1202        // Compute the GPU-aligned row stride in bytes for this image.
1203        // `None` means either the format has no defined primary-plane bpp
1204        // (unknown future layout) or the stride calculation would overflow
1205        // — in both cases we fall back to the natural layout via the plain
1206        // `TensorDyn::image` constructor, and the slow-path warning inside
1207        // `draw_*_masks` will fire if the subsequent GL import fails.
1208        //
1209        // DMA allocation is Linux-only (see `TensorMemory::Dma` cfg gate),
1210        // so both the stride computation and the helper closure are gated
1211        // accordingly — the callers below are already Linux-only.
1212        #[cfg(target_os = "linux")]
1213        let dma_stride_bytes: Option<usize> = primary_plane_bpp(format, dtype.size())
1214            .and_then(|bpp| width.checked_mul(bpp))
1215            .and_then(align_pitch_bytes_to_gpu_alignment);
1216
1217        // Helper: allocate a DMA image, using the padded-stride constructor
1218        // when the computed stride exceeds the natural pitch, otherwise the
1219        // plain constructor (byte-identical result in the common case).
1220        #[cfg(target_os = "linux")]
1221        let try_dma = || -> Result<TensorDyn> {
1222            // Stride padding is only meaningful for packed pixel layouts
1223            // (RGBA8, BGRA8, RGB888, Grey) — the formats the GL backend
1224            // renders into. Semi-planar (NV12, NV16) and planar (PlanarRgb,
1225            // PlanarRgba) tensors go through `TensorDyn::image(...)` with
1226            // their natural layout; they're imported from camera capture
1227            // via `from_fd` far more often than allocated here, and
1228            // `Tensor::image_with_stride` explicitly rejects them.
1229            let packed = format.layout() == edgefirst_tensor::PixelLayout::Packed;
1230            match dma_stride_bytes {
1231                Some(stride)
1232                    if packed
1233                        && primary_plane_bpp(format, dtype.size())
1234                            .and_then(|bpp| width.checked_mul(bpp))
1235                            .is_some_and(|natural| stride > natural) =>
1236                {
1237                    log::debug!(
1238                        "create_image: padding row stride for {format:?} {width}x{height} \
1239                         from natural pitch to {stride} bytes for GPU alignment"
1240                    );
1241                    Ok(TensorDyn::image_with_stride(
1242                        width,
1243                        height,
1244                        format,
1245                        dtype,
1246                        stride,
1247                        Some(edgefirst_tensor::TensorMemory::Dma),
1248                    )?)
1249                }
1250                _ => Ok(TensorDyn::image(
1251                    width,
1252                    height,
1253                    format,
1254                    dtype,
1255                    Some(edgefirst_tensor::TensorMemory::Dma),
1256                )?),
1257            }
1258        };
1259
1260        // If an explicit memory type is requested, honour it directly.
1261        // On Linux, `TensorMemory::Dma` gets the padded-stride treatment;
1262        // other memory types take the user-requested width verbatim.
1263        match memory {
1264            #[cfg(target_os = "linux")]
1265            Some(TensorMemory::Dma) => {
1266                return try_dma();
1267            }
1268            Some(mem) => {
1269                return Ok(TensorDyn::image(width, height, format, dtype, Some(mem))?);
1270            }
1271            None => {}
1272        }
1273
1274        // Try DMA first on Linux — skip only when GL has explicitly selected PBO
1275        // as the preferred transfer path (PBO is better than DMA in that case).
1276        #[cfg(target_os = "linux")]
1277        {
1278            #[cfg(feature = "opengl")]
1279            let gl_uses_pbo = self
1280                .opengl
1281                .as_ref()
1282                .is_some_and(|gl| gl.transfer_backend() == opengl_headless::TransferBackend::Pbo);
1283            #[cfg(not(feature = "opengl"))]
1284            let gl_uses_pbo = false;
1285
1286            if !gl_uses_pbo {
1287                if let Ok(img) = try_dma() {
1288                    return Ok(img);
1289                }
1290            }
1291        }
1292
1293        // Try PBO (if GL available).
1294        // PBO buffers are u8-sized; the int8 shader emulates i8 output via
1295        // XOR 0x80 on the same underlying buffer, so both U8 and I8 work.
1296        #[cfg(target_os = "linux")]
1297        #[cfg(feature = "opengl")]
1298        if dtype.size() == 1 {
1299            if let Some(gl) = &self.opengl {
1300                match gl.create_pbo_image(width, height, format) {
1301                    Ok(t) => {
1302                        if dtype == DType::I8 {
1303                            // SAFETY: Tensor<u8> and Tensor<i8> are layout-
1304                            // identical (same element size, no T-dependent
1305                            // drop glue). The int8 shader applies XOR 0x80
1306                            // on the same PBO buffer. Same rationale as
1307                            // gl::processor::tensor_i8_as_u8_mut.
1308                            // Invariant: PBO tensors never have chroma
1309                            // (create_pbo_image → Tensor::wrap sets it None).
1310                            debug_assert!(
1311                                t.chroma().is_none(),
1312                                "PBO i8 transmute requires chroma == None"
1313                            );
1314                            let t_i8: Tensor<i8> = unsafe { std::mem::transmute(t) };
1315                            return Ok(TensorDyn::from(t_i8));
1316                        }
1317                        return Ok(TensorDyn::from(t));
1318                    }
1319                    Err(e) => log::debug!("PBO image creation failed, falling back to Mem: {e:?}"),
1320                }
1321            }
1322        }
1323
1324        // Fallback to Mem
1325        Ok(TensorDyn::image(
1326            width,
1327            height,
1328            format,
1329            dtype,
1330            Some(edgefirst_tensor::TensorMemory::Mem),
1331        )?)
1332    }
1333
1334    /// Import an external DMA-BUF image.
1335    ///
1336    /// Each [`PlaneDescriptor`] owns an already-duped fd; this method
1337    /// consumes the descriptors and takes ownership of those fds (whether
1338    /// the call succeeds or fails).
1339    ///
1340    /// The caller must ensure the DMA-BUF allocation is large enough for the
1341    /// specified width, height, format, and any stride/offset on the plane
1342    /// descriptors. No buffer-size validation is performed; an undersized
1343    /// buffer may cause GPU faults or EGL import failure.
1344    ///
1345    /// # Arguments
1346    ///
1347    /// * `image` - Plane descriptor for the primary (or only) plane
1348    /// * `chroma` - Optional plane descriptor for the UV chroma plane
1349    ///   (required for multiplane NV12)
1350    /// * `width` - Image width in pixels
1351    /// * `height` - Image height in pixels
1352    /// * `format` - Pixel format of the buffer
1353    /// * `dtype` - Element data type (e.g. `DType::U8`)
1354    ///
1355    /// # Returns
1356    ///
1357    /// A `TensorDyn` configured as an image.
1358    ///
1359    /// # Errors
1360    ///
1361    /// * [`Error::NotSupported`] if `chroma` is `Some` for a non-semi-planar
1362    ///   format, or multiplane NV16 (not yet supported), or the fd is not
1363    ///   DMA-backed
1364    /// * [`Error::InvalidShape`] if NV12 height is odd
1365    ///
1366    /// # Platform
1367    ///
1368    /// Linux only.
1369    ///
1370    /// # Examples
1371    ///
1372    /// ```rust,ignore
1373    /// use edgefirst_tensor::PlaneDescriptor;
1374    ///
1375    /// // Single-plane RGBA
1376    /// let pd = PlaneDescriptor::new(fd.as_fd())?;
1377    /// let src = proc.import_image(pd, None, 1920, 1080, PixelFormat::Rgba, DType::U8)?;
1378    ///
1379    /// // Multi-plane NV12 with stride
1380    /// let y_pd = PlaneDescriptor::new(y_fd.as_fd())?.with_stride(2048);
1381    /// let uv_pd = PlaneDescriptor::new(uv_fd.as_fd())?.with_stride(2048);
1382    /// let src = proc.import_image(y_pd, Some(uv_pd), 1920, 1080,
1383    ///                             PixelFormat::Nv12, DType::U8)?;
1384    /// ```
1385    #[cfg(target_os = "linux")]
1386    pub fn import_image(
1387        &self,
1388        image: edgefirst_tensor::PlaneDescriptor,
1389        chroma: Option<edgefirst_tensor::PlaneDescriptor>,
1390        width: usize,
1391        height: usize,
1392        format: PixelFormat,
1393        dtype: DType,
1394    ) -> Result<TensorDyn> {
1395        use edgefirst_tensor::{Tensor, TensorMemory};
1396
1397        // Capture stride/offset from descriptors before consuming them
1398        let image_stride = image.stride();
1399        let image_offset = image.offset();
1400        let chroma_stride = chroma.as_ref().and_then(|c| c.stride());
1401        let chroma_offset = chroma.as_ref().and_then(|c| c.offset());
1402
1403        if let Some(chroma_pd) = chroma {
1404            // ── Multiplane path ──────────────────────────────────────
1405            // Multiplane tensors are backed by Tensor<u8> (or transmuted to
1406            // Tensor<i8>). Reject other dtypes to avoid silently returning a
1407            // tensor with the wrong element type.
1408            if dtype != DType::U8 && dtype != DType::I8 {
1409                return Err(Error::NotSupported(format!(
1410                    "multiplane import only supports U8/I8, got {dtype:?}"
1411                )));
1412            }
1413            if format.layout() != PixelLayout::SemiPlanar {
1414                return Err(Error::NotSupported(format!(
1415                    "import_image with chroma requires a semi-planar format, got {format:?}"
1416                )));
1417            }
1418
1419            let chroma_h = match format {
1420                PixelFormat::Nv12 => {
1421                    if !height.is_multiple_of(2) {
1422                        return Err(Error::InvalidShape(format!(
1423                            "NV12 requires even height, got {height}"
1424                        )));
1425                    }
1426                    height / 2
1427                }
1428                // NV16 multiplane will be supported in a future release;
1429                // the GL backend currently only handles NV12 plane1 attributes.
1430                PixelFormat::Nv16 => {
1431                    return Err(Error::NotSupported(
1432                        "multiplane NV16 is not yet supported; use contiguous NV16 instead".into(),
1433                    ))
1434                }
1435                _ => {
1436                    return Err(Error::NotSupported(format!(
1437                        "unsupported semi-planar format: {format:?}"
1438                    )))
1439                }
1440            };
1441
1442            let luma = Tensor::<u8>::from_fd(image.into_fd(), &[height, width], Some("luma"))?;
1443            if luma.memory() != TensorMemory::Dma {
1444                return Err(Error::NotSupported(format!(
1445                    "luma fd must be DMA-backed, got {:?}",
1446                    luma.memory()
1447                )));
1448            }
1449
1450            let chroma_tensor =
1451                Tensor::<u8>::from_fd(chroma_pd.into_fd(), &[chroma_h, width], Some("chroma"))?;
1452            if chroma_tensor.memory() != TensorMemory::Dma {
1453                return Err(Error::NotSupported(format!(
1454                    "chroma fd must be DMA-backed, got {:?}",
1455                    chroma_tensor.memory()
1456                )));
1457            }
1458
1459            // from_planes creates the combined tensor with format set,
1460            // preserving luma's row_stride (currently None since luma was raw).
1461            let mut tensor = Tensor::<u8>::from_planes(luma, chroma_tensor, format)?;
1462
1463            // Apply stride/offset to the combined tensor (luma plane)
1464            if let Some(s) = image_stride {
1465                tensor.set_row_stride(s)?;
1466            }
1467            if let Some(o) = image_offset {
1468                tensor.set_plane_offset(o);
1469            }
1470
1471            // Apply stride/offset to the chroma sub-tensor.
1472            // The chroma tensor is a raw 2D [chroma_h, width] tensor without
1473            // format metadata, so we validate stride manually rather than
1474            // using set_row_stride (which requires format).
1475            if let Some(chroma_ref) = tensor.chroma_mut() {
1476                if let Some(s) = chroma_stride {
1477                    if s < width {
1478                        return Err(Error::InvalidShape(format!(
1479                            "chroma stride {s} < minimum {width} for {format:?}"
1480                        )));
1481                    }
1482                    chroma_ref.set_row_stride_unchecked(s);
1483                }
1484                if let Some(o) = chroma_offset {
1485                    chroma_ref.set_plane_offset(o);
1486                }
1487            }
1488
1489            if dtype == DType::I8 {
1490                // SAFETY: Tensor<u8> and Tensor<i8> have identical layout because
1491                // the struct contains only type-erased storage (OwnedFd, shape, name),
1492                // no inline T values. This assertion catches layout drift at compile time.
1493                const {
1494                    assert!(std::mem::size_of::<Tensor<u8>>() == std::mem::size_of::<Tensor<i8>>());
1495                    assert!(
1496                        std::mem::align_of::<Tensor<u8>>() == std::mem::align_of::<Tensor<i8>>()
1497                    );
1498                }
1499                let tensor_i8: Tensor<i8> = unsafe { std::mem::transmute(tensor) };
1500                return Ok(TensorDyn::from(tensor_i8));
1501            }
1502            Ok(TensorDyn::from(tensor))
1503        } else {
1504            // ── Single-plane path ────────────────────────────────────
1505            let shape = match format.layout() {
1506                PixelLayout::Packed => vec![height, width, format.channels()],
1507                PixelLayout::Planar => vec![format.channels(), height, width],
1508                PixelLayout::SemiPlanar => {
1509                    let total_h = match format {
1510                        PixelFormat::Nv12 => {
1511                            if !height.is_multiple_of(2) {
1512                                return Err(Error::InvalidShape(format!(
1513                                    "NV12 requires even height, got {height}"
1514                                )));
1515                            }
1516                            height * 3 / 2
1517                        }
1518                        PixelFormat::Nv16 => height * 2,
1519                        _ => {
1520                            return Err(Error::InvalidShape(format!(
1521                                "unknown semi-planar height multiplier for {format:?}"
1522                            )))
1523                        }
1524                    };
1525                    vec![total_h, width]
1526                }
1527                _ => {
1528                    return Err(Error::NotSupported(format!(
1529                        "unsupported pixel layout for import_image: {:?}",
1530                        format.layout()
1531                    )));
1532                }
1533            };
1534            let tensor = TensorDyn::from_fd(image.into_fd(), &shape, dtype, None)?;
1535            if tensor.memory() != TensorMemory::Dma {
1536                return Err(Error::NotSupported(format!(
1537                    "import_image requires DMA-backed fd, got {:?}",
1538                    tensor.memory()
1539                )));
1540            }
1541            let mut tensor = tensor.with_format(format)?;
1542            if let Some(s) = image_stride {
1543                tensor.set_row_stride(s)?;
1544            }
1545            if let Some(o) = image_offset {
1546                tensor.set_plane_offset(o);
1547            }
1548            Ok(tensor)
1549        }
1550    }
1551
1552    /// Decode model outputs and draw segmentation masks onto `dst`.
1553    ///
1554    /// This is the primary mask rendering API. The processor decodes via the
1555    /// provided [`Decoder`], selects the optimal rendering path (hybrid
1556    /// CPU+GL or fused GPU), and composites masks onto `dst`.
1557    ///
1558    /// Returns the detected bounding boxes.
1559    pub fn draw_masks(
1560        &mut self,
1561        decoder: &edgefirst_decoder::Decoder,
1562        outputs: &[&TensorDyn],
1563        dst: &mut TensorDyn,
1564        overlay: MaskOverlay<'_>,
1565    ) -> Result<Vec<DetectBox>> {
1566        let mut output_boxes = Vec::with_capacity(100);
1567
1568        // Try proto path first (fused rendering without materializing masks)
1569        let proto_result = decoder
1570            .decode_proto(outputs, &mut output_boxes)
1571            .map_err(|e| Error::Internal(format!("decode_proto: {e:#?}")))?;
1572
1573        if let Some(proto_data) = proto_result {
1574            self.draw_proto_masks(dst, &output_boxes, &proto_data, overlay)?;
1575        } else {
1576            // Detection-only or unsupported model: full decode + render
1577            let mut output_masks = Vec::with_capacity(100);
1578            decoder
1579                .decode(outputs, &mut output_boxes, &mut output_masks)
1580                .map_err(|e| Error::Internal(format!("decode: {e:#?}")))?;
1581            self.draw_decoded_masks(dst, &output_boxes, &output_masks, overlay)?;
1582        }
1583        Ok(output_boxes)
1584    }
1585
1586    /// Decode tracked model outputs and draw segmentation masks onto `dst`.
1587    ///
1588    /// Like [`draw_masks`](Self::draw_masks) but integrates a tracker for
1589    /// maintaining object identities across frames. The tracker runs after
1590    /// NMS but before mask extraction.
1591    ///
1592    /// Returns detected boxes and track info.
1593    #[cfg(feature = "tracker")]
1594    pub fn draw_masks_tracked<TR: edgefirst_tracker::Tracker<DetectBox>>(
1595        &mut self,
1596        decoder: &edgefirst_decoder::Decoder,
1597        tracker: &mut TR,
1598        timestamp: u64,
1599        outputs: &[&TensorDyn],
1600        dst: &mut TensorDyn,
1601        overlay: MaskOverlay<'_>,
1602    ) -> Result<(Vec<DetectBox>, Vec<edgefirst_tracker::TrackInfo>)> {
1603        let mut output_boxes = Vec::with_capacity(100);
1604        let mut output_tracks = Vec::new();
1605
1606        let proto_result = decoder
1607            .decode_proto_tracked(
1608                tracker,
1609                timestamp,
1610                outputs,
1611                &mut output_boxes,
1612                &mut output_tracks,
1613            )
1614            .map_err(|e| Error::Internal(format!("decode_proto_tracked: {e:#?}")))?;
1615
1616        if let Some(proto_data) = proto_result {
1617            self.draw_proto_masks(dst, &output_boxes, &proto_data, overlay)?;
1618        } else {
1619            // Note: decode_proto_tracked returns None for detection-only/ModelPack
1620            // models WITHOUT calling the tracker. The else branch below is the
1621            // first (and only) tracker call for those model types.
1622            let mut output_masks = Vec::with_capacity(100);
1623            decoder
1624                .decode_tracked(
1625                    tracker,
1626                    timestamp,
1627                    outputs,
1628                    &mut output_boxes,
1629                    &mut output_masks,
1630                    &mut output_tracks,
1631                )
1632                .map_err(|e| Error::Internal(format!("decode_tracked: {e:#?}")))?;
1633            self.draw_decoded_masks(dst, &output_boxes, &output_masks, overlay)?;
1634        }
1635        Ok((output_boxes, output_tracks))
1636    }
1637
1638    /// Materialize per-instance segmentation masks from raw prototype data.
1639    ///
1640    /// Computes `mask_coeff @ protos` with sigmoid activation for each detection,
1641    /// producing compact masks at prototype resolution (e.g., 160×160 crops).
1642    /// Mask values are continuous sigmoid confidence outputs quantized to u8
1643    /// (0 = background, 255 = full confidence), NOT binary thresholded.
1644    ///
1645    /// The returned [`Vec<Segmentation>`] can be:
1646    /// - Inspected or exported for analytics, IoU computation, etc.
1647    /// - Passed directly to [`ImageProcessorTrait::draw_decoded_masks`] for
1648    ///   GPU-interpolated rendering.
1649    ///
1650    /// # Performance Note
1651    ///
1652    /// Calling `materialize_masks` + `draw_decoded_masks` separately prevents
1653    /// the HAL from using its internal fused optimization path. For render-only
1654    /// use cases, prefer [`ImageProcessorTrait::draw_proto_masks`] which selects
1655    /// the fastest path automatically (currently 1.6×–27× faster on tested
1656    /// platforms). Use this method when you need access to the intermediate masks.
1657    ///
1658    /// # Errors
1659    ///
1660    /// Returns [`Error::NoConverter`] if the CPU backend is not available.
1661    pub fn materialize_masks(
1662        &mut self,
1663        detect: &[DetectBox],
1664        proto_data: &ProtoData,
1665        letterbox: Option<[f32; 4]>,
1666        resolution: MaskResolution,
1667    ) -> Result<Vec<Segmentation>> {
1668        let cpu = self.cpu.as_mut().ok_or(Error::NoConverter)?;
1669        match resolution {
1670            MaskResolution::Proto => cpu.materialize_segmentations(detect, proto_data, letterbox),
1671            MaskResolution::Scaled { width, height } => {
1672                cpu.materialize_scaled_segmentations(detect, proto_data, letterbox, width, height)
1673            }
1674        }
1675    }
1676}
1677
1678impl ImageProcessorTrait for ImageProcessor {
1679    /// Converts the source image to the destination image format and size. The
1680    /// image is cropped first, then flipped, then rotated
1681    ///
1682    /// Prefer hardware accelerators when available, falling back to CPU if
1683    /// necessary.
1684    fn convert(
1685        &mut self,
1686        src: &TensorDyn,
1687        dst: &mut TensorDyn,
1688        rotation: Rotation,
1689        flip: Flip,
1690        crop: Crop,
1691    ) -> Result<()> {
1692        let start = Instant::now();
1693        let src_fmt = src.format();
1694        let dst_fmt = dst.format();
1695        let _span = tracing::trace_span!(
1696            "image.convert",
1697            ?src_fmt,
1698            ?dst_fmt,
1699            src_memory = ?src.memory(),
1700            dst_memory = ?dst.memory(),
1701            ?rotation,
1702            ?flip,
1703        )
1704        .entered();
1705        log::trace!(
1706            "convert: {src_fmt:?}({:?}/{:?}) → {dst_fmt:?}({:?}/{:?}), \
1707             rotation={rotation:?}, flip={flip:?}, backend={:?}",
1708            src.dtype(),
1709            src.memory(),
1710            dst.dtype(),
1711            dst.memory(),
1712            self.forced_backend,
1713        );
1714
1715        // ── Forced backend: no fallback chain ────────────────────────
1716        if let Some(forced) = self.forced_backend {
1717            return match forced {
1718                ForcedBackend::Cpu => {
1719                    if let Some(cpu) = self.cpu.as_mut() {
1720                        let r = cpu.convert(src, dst, rotation, flip, crop);
1721                        log::trace!(
1722                            "convert: forced=cpu result={} ({:?})",
1723                            if r.is_ok() { "ok" } else { "err" },
1724                            start.elapsed()
1725                        );
1726                        return r;
1727                    }
1728                    Err(Error::ForcedBackendUnavailable("cpu".into()))
1729                }
1730                ForcedBackend::G2d => {
1731                    #[cfg(target_os = "linux")]
1732                    if let Some(g2d) = self.g2d.as_mut() {
1733                        let r = g2d.convert(src, dst, rotation, flip, crop);
1734                        log::trace!(
1735                            "convert: forced=g2d result={} ({:?})",
1736                            if r.is_ok() { "ok" } else { "err" },
1737                            start.elapsed()
1738                        );
1739                        return r;
1740                    }
1741                    Err(Error::ForcedBackendUnavailable("g2d".into()))
1742                }
1743                ForcedBackend::OpenGl => {
1744                    #[cfg(target_os = "linux")]
1745                    #[cfg(feature = "opengl")]
1746                    if let Some(opengl) = self.opengl.as_mut() {
1747                        let r = opengl.convert(src, dst, rotation, flip, crop);
1748                        log::trace!(
1749                            "convert: forced=opengl result={} ({:?})",
1750                            if r.is_ok() { "ok" } else { "err" },
1751                            start.elapsed()
1752                        );
1753                        return r;
1754                    }
1755                    Err(Error::ForcedBackendUnavailable("opengl".into()))
1756                }
1757            };
1758        }
1759
1760        // ── Auto fallback chain: OpenGL → G2D → CPU ──────────────────
1761        #[cfg(target_os = "linux")]
1762        #[cfg(feature = "opengl")]
1763        if let Some(opengl) = self.opengl.as_mut() {
1764            match opengl.convert(src, dst, rotation, flip, crop) {
1765                Ok(_) => {
1766                    log::trace!(
1767                        "convert: auto selected=opengl for {src_fmt:?}→{dst_fmt:?} ({:?})",
1768                        start.elapsed()
1769                    );
1770                    return Ok(());
1771                }
1772                Err(e) => {
1773                    log::trace!("convert: auto opengl declined {src_fmt:?}→{dst_fmt:?}: {e}");
1774                }
1775            }
1776        }
1777
1778        #[cfg(target_os = "linux")]
1779        if let Some(g2d) = self.g2d.as_mut() {
1780            match g2d.convert(src, dst, rotation, flip, crop) {
1781                Ok(_) => {
1782                    log::trace!(
1783                        "convert: auto selected=g2d for {src_fmt:?}→{dst_fmt:?} ({:?})",
1784                        start.elapsed()
1785                    );
1786                    return Ok(());
1787                }
1788                Err(e) => {
1789                    log::trace!("convert: auto g2d declined {src_fmt:?}→{dst_fmt:?}: {e}");
1790                }
1791            }
1792        }
1793
1794        if let Some(cpu) = self.cpu.as_mut() {
1795            match cpu.convert(src, dst, rotation, flip, crop) {
1796                Ok(_) => {
1797                    log::trace!(
1798                        "convert: auto selected=cpu for {src_fmt:?}→{dst_fmt:?} ({:?})",
1799                        start.elapsed()
1800                    );
1801                    return Ok(());
1802                }
1803                Err(e) => {
1804                    log::trace!("convert: auto cpu failed {src_fmt:?}→{dst_fmt:?}: {e}");
1805                    return Err(e);
1806                }
1807            }
1808        }
1809        Err(Error::NoConverter)
1810    }
1811
1812    fn draw_decoded_masks(
1813        &mut self,
1814        dst: &mut TensorDyn,
1815        detect: &[DetectBox],
1816        segmentation: &[Segmentation],
1817        overlay: MaskOverlay<'_>,
1818    ) -> Result<()> {
1819        let _span = tracing::trace_span!(
1820            "image.draw_decoded_masks",
1821            n_detections = detect.len(),
1822            n_segmentations = segmentation.len(),
1823        )
1824        .entered();
1825        let start = Instant::now();
1826
1827        if let Some(bg) = overlay.background {
1828            if bg.aliases(dst) {
1829                return Err(Error::AliasedBuffers(
1830                    "background must not reference the same buffer as dst".to_string(),
1831                ));
1832            }
1833        }
1834
1835        // Un-letterbox detect boxes and segmentation bboxes for rendering when
1836        // a letterbox was applied to prepare the model input.
1837        let lb_boxes: Vec<DetectBox>;
1838        let lb_segs: Vec<Segmentation>;
1839        let (detect, segmentation) = if let Some(lb) = overlay.letterbox {
1840            lb_boxes = detect.iter().map(|&d| unletter_bbox(d, lb)).collect();
1841            // Keep segmentation bboxes in sync with the transformed detect boxes
1842            // when we have a 1:1 correspondence (instance segmentation).
1843            lb_segs = if segmentation.len() == lb_boxes.len() {
1844                segmentation
1845                    .iter()
1846                    .zip(lb_boxes.iter())
1847                    .map(|(s, d)| Segmentation {
1848                        xmin: d.bbox.xmin,
1849                        ymin: d.bbox.ymin,
1850                        xmax: d.bbox.xmax,
1851                        ymax: d.bbox.ymax,
1852                        segmentation: s.segmentation.clone(),
1853                    })
1854                    .collect()
1855            } else {
1856                segmentation.to_vec()
1857            };
1858            (lb_boxes.as_slice(), lb_segs.as_slice())
1859        } else {
1860            (detect, segmentation)
1861        };
1862        #[cfg(target_os = "linux")]
1863        let is_empty_frame = detect.is_empty() && segmentation.is_empty();
1864
1865        // ── Forced backend: no fallback chain ────────────────────────
1866        if let Some(forced) = self.forced_backend {
1867            return match forced {
1868                ForcedBackend::Cpu => {
1869                    if let Some(cpu) = self.cpu.as_mut() {
1870                        return cpu.draw_decoded_masks(dst, detect, segmentation, overlay);
1871                    }
1872                    Err(Error::ForcedBackendUnavailable("cpu".into()))
1873                }
1874                ForcedBackend::G2d => {
1875                    // G2D can only produce empty frames (clear / bg blit).
1876                    // For populated frames it has no rasterizer — fail loudly.
1877                    #[cfg(target_os = "linux")]
1878                    if let Some(g2d) = self.g2d.as_mut() {
1879                        return g2d.draw_decoded_masks(dst, detect, segmentation, overlay);
1880                    }
1881                    Err(Error::ForcedBackendUnavailable("g2d".into()))
1882                }
1883                ForcedBackend::OpenGl => {
1884                    // GL handles background natively via GPU blit, and now
1885                    // actively clears when there is no background.
1886                    #[cfg(target_os = "linux")]
1887                    #[cfg(feature = "opengl")]
1888                    if let Some(opengl) = self.opengl.as_mut() {
1889                        return opengl.draw_decoded_masks(dst, detect, segmentation, overlay);
1890                    }
1891                    Err(Error::ForcedBackendUnavailable("opengl".into()))
1892                }
1893            };
1894        }
1895
1896        // ── Auto dispatch ──────────────────────────────────────────
1897        // Empty frames prefer G2D when available — a single g2d_clear or
1898        // g2d_blit is the cheapest HW path to produce the correct output
1899        // and avoids spinning up the GL pipeline every zero-detection
1900        // frame in a triple-buffered display loop.
1901        #[cfg(target_os = "linux")]
1902        if is_empty_frame {
1903            if let Some(g2d) = self.g2d.as_mut() {
1904                match g2d.draw_decoded_masks(dst, detect, segmentation, overlay) {
1905                    Ok(_) => {
1906                        log::trace!(
1907                            "draw_decoded_masks empty frame via g2d in {:?}",
1908                            start.elapsed()
1909                        );
1910                        return Ok(());
1911                    }
1912                    Err(e) => log::trace!("g2d empty-frame path unavailable: {e:?}"),
1913                }
1914            }
1915        }
1916
1917        // Populated frames (or G2D unavailable): GL first, CPU fallback.
1918        // Both backends now own their own base-layer handling (bg blit
1919        // or clear), so we hand the overlay through untouched.
1920        #[cfg(target_os = "linux")]
1921        #[cfg(feature = "opengl")]
1922        if let Some(opengl) = self.opengl.as_mut() {
1923            log::trace!(
1924                "draw_decoded_masks started with opengl in {:?}",
1925                start.elapsed()
1926            );
1927            match opengl.draw_decoded_masks(dst, detect, segmentation, overlay) {
1928                Ok(_) => {
1929                    log::trace!("draw_decoded_masks with opengl in {:?}", start.elapsed());
1930                    return Ok(());
1931                }
1932                Err(e) => {
1933                    log::trace!("draw_decoded_masks didn't work with opengl: {e:?}")
1934                }
1935            }
1936        }
1937
1938        log::trace!(
1939            "draw_decoded_masks started with cpu in {:?}",
1940            start.elapsed()
1941        );
1942        if let Some(cpu) = self.cpu.as_mut() {
1943            match cpu.draw_decoded_masks(dst, detect, segmentation, overlay) {
1944                Ok(_) => {
1945                    log::trace!("draw_decoded_masks with cpu in {:?}", start.elapsed());
1946                    return Ok(());
1947                }
1948                Err(e) => {
1949                    log::trace!("draw_decoded_masks didn't work with cpu: {e:?}");
1950                    return Err(e);
1951                }
1952            }
1953        }
1954        Err(Error::NoConverter)
1955    }
1956
1957    fn draw_proto_masks(
1958        &mut self,
1959        dst: &mut TensorDyn,
1960        detect: &[DetectBox],
1961        proto_data: &ProtoData,
1962        overlay: MaskOverlay<'_>,
1963    ) -> Result<()> {
1964        let start = Instant::now();
1965
1966        if let Some(bg) = overlay.background {
1967            if bg.aliases(dst) {
1968                return Err(Error::AliasedBuffers(
1969                    "background must not reference the same buffer as dst".to_string(),
1970                ));
1971            }
1972        }
1973
1974        // Un-letterbox detect boxes for rendering when a letterbox was applied
1975        // to prepare the model input.  The original `detect` coords are still
1976        // passed to `materialize_segmentations` (which needs model-space coords
1977        // to correctly crop the proto tensor) alongside `overlay.letterbox` so
1978        // it can emit `Segmentation` structs in output-image space.
1979        let lb_boxes: Vec<DetectBox>;
1980        let render_detect = if let Some(lb) = overlay.letterbox {
1981            lb_boxes = detect.iter().map(|&d| unletter_bbox(d, lb)).collect();
1982            lb_boxes.as_slice()
1983        } else {
1984            detect
1985        };
1986        #[cfg(target_os = "linux")]
1987        let is_empty_frame = detect.is_empty();
1988
1989        // ── Forced backend: no fallback chain ────────────────────────
1990        if let Some(forced) = self.forced_backend {
1991            return match forced {
1992                ForcedBackend::Cpu => {
1993                    if let Some(cpu) = self.cpu.as_mut() {
1994                        return cpu.draw_proto_masks(dst, render_detect, proto_data, overlay);
1995                    }
1996                    Err(Error::ForcedBackendUnavailable("cpu".into()))
1997                }
1998                ForcedBackend::G2d => {
1999                    #[cfg(target_os = "linux")]
2000                    if let Some(g2d) = self.g2d.as_mut() {
2001                        return g2d.draw_proto_masks(dst, render_detect, proto_data, overlay);
2002                    }
2003                    Err(Error::ForcedBackendUnavailable("g2d".into()))
2004                }
2005                ForcedBackend::OpenGl => {
2006                    #[cfg(target_os = "linux")]
2007                    #[cfg(feature = "opengl")]
2008                    if let Some(opengl) = self.opengl.as_mut() {
2009                        return opengl.draw_proto_masks(dst, render_detect, proto_data, overlay);
2010                    }
2011                    Err(Error::ForcedBackendUnavailable("opengl".into()))
2012                }
2013            };
2014        }
2015
2016        // ── Auto dispatch ──────────────────────────────────────────
2017        // Empty frames: prefer G2D — cheapest HW path (clear or bg blit).
2018        #[cfg(target_os = "linux")]
2019        if is_empty_frame {
2020            if let Some(g2d) = self.g2d.as_mut() {
2021                match g2d.draw_proto_masks(dst, render_detect, proto_data, overlay) {
2022                    Ok(_) => {
2023                        log::trace!(
2024                            "draw_proto_masks empty frame via g2d in {:?}",
2025                            start.elapsed()
2026                        );
2027                        return Ok(());
2028                    }
2029                    Err(e) => log::trace!("g2d empty-frame path unavailable: {e:?}"),
2030                }
2031            }
2032        }
2033
2034        // Hybrid path: CPU materialize + GL overlay (benchmarked faster than
2035        // full-GPU draw_proto_masks on all tested platforms: 27× on imx8mp,
2036        // 4× on imx95, 2.5× on rpi5, 1.6× on x86).
2037        // GL owns its own bg-blit / glClear — we pass the overlay through.
2038        //
2039        // CPU materialize needs `&mut` for its MaskScratch buffers; GL also
2040        // needs `&mut`. The CPU borrow is scoped to its block so the
2041        // subsequent GL borrow is free to take over `self`.
2042        #[cfg(target_os = "linux")]
2043        #[cfg(feature = "opengl")]
2044        if let (Some(_), Some(_)) = (self.cpu.as_ref(), self.opengl.as_ref()) {
2045            let segmentation = match self.cpu.as_mut() {
2046                Some(cpu) => {
2047                    log::trace!(
2048                        "draw_proto_masks started with hybrid (cpu+opengl) in {:?}",
2049                        start.elapsed()
2050                    );
2051                    cpu.materialize_segmentations(detect, proto_data, overlay.letterbox)?
2052                }
2053                None => unreachable!("cpu presence checked above"),
2054            };
2055            if let Some(opengl) = self.opengl.as_mut() {
2056                match opengl.draw_decoded_masks(dst, render_detect, &segmentation, overlay) {
2057                    Ok(_) => {
2058                        log::trace!(
2059                            "draw_proto_masks with hybrid (cpu+opengl) in {:?}",
2060                            start.elapsed()
2061                        );
2062                        return Ok(());
2063                    }
2064                    Err(e) => {
2065                        log::trace!(
2066                            "draw_proto_masks hybrid path failed, falling back to cpu: {e:?}"
2067                        );
2068                    }
2069                }
2070            }
2071        }
2072
2073        let Some(cpu) = self.cpu.as_mut() else {
2074            return Err(Error::Internal(
2075                "draw_proto_masks requires CPU backend for fallback path".into(),
2076            ));
2077        };
2078        log::trace!("draw_proto_masks started with cpu in {:?}", start.elapsed());
2079        cpu.draw_proto_masks(dst, render_detect, proto_data, overlay)
2080    }
2081
2082    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()> {
2083        let start = Instant::now();
2084
2085        // ── Forced backend: no fallback chain ────────────────────────
2086        if let Some(forced) = self.forced_backend {
2087            return match forced {
2088                ForcedBackend::Cpu => {
2089                    if let Some(cpu) = self.cpu.as_mut() {
2090                        return cpu.set_class_colors(colors);
2091                    }
2092                    Err(Error::ForcedBackendUnavailable("cpu".into()))
2093                }
2094                ForcedBackend::G2d => Err(Error::NotSupported(
2095                    "g2d does not support set_class_colors".into(),
2096                )),
2097                ForcedBackend::OpenGl => {
2098                    #[cfg(target_os = "linux")]
2099                    #[cfg(feature = "opengl")]
2100                    if let Some(opengl) = self.opengl.as_mut() {
2101                        return opengl.set_class_colors(colors);
2102                    }
2103                    Err(Error::ForcedBackendUnavailable("opengl".into()))
2104                }
2105            };
2106        }
2107
2108        // skip G2D as it doesn't support rendering to image
2109
2110        #[cfg(target_os = "linux")]
2111        #[cfg(feature = "opengl")]
2112        if let Some(opengl) = self.opengl.as_mut() {
2113            log::trace!("image started with opengl in {:?}", start.elapsed());
2114            match opengl.set_class_colors(colors) {
2115                Ok(_) => {
2116                    log::trace!("colors set with opengl in {:?}", start.elapsed());
2117                    return Ok(());
2118                }
2119                Err(e) => {
2120                    log::trace!("colors didn't set with opengl: {e:?}")
2121                }
2122            }
2123        }
2124        log::trace!("image started with cpu in {:?}", start.elapsed());
2125        if let Some(cpu) = self.cpu.as_mut() {
2126            match cpu.set_class_colors(colors) {
2127                Ok(_) => {
2128                    log::trace!("colors set with cpu in {:?}", start.elapsed());
2129                    return Ok(());
2130                }
2131                Err(e) => {
2132                    log::trace!("colors didn't set with cpu: {e:?}");
2133                    return Err(e);
2134                }
2135            }
2136        }
2137        Err(Error::NoConverter)
2138    }
2139}
2140
2141// ---------------------------------------------------------------------------
2142// Image loading / saving helpers
2143// ---------------------------------------------------------------------------
2144
2145/// Test-only convenience helper that peeks the image header, allocates a
2146/// tensor sized to the image (honoring DMA pitch padding on Linux when
2147/// requested), and decodes via [`edgefirst_codec`]. Mirrors the semantics of
2148/// the removed public `load_image` API for test sites; production callers
2149/// should use the explicit peek → allocate → decode pattern directly.
2150#[cfg(test)]
2151pub(crate) fn load_image_test_helper(
2152    image: &[u8],
2153    format: Option<PixelFormat>,
2154    memory: Option<TensorMemory>,
2155) -> Result<TensorDyn> {
2156    use edgefirst_codec::{peek_info, DecodeOptions, ImageDecoder, ImageLoad};
2157
2158    let opts = match format {
2159        Some(f) => DecodeOptions::default().with_format(f),
2160        None => DecodeOptions::default(),
2161    };
2162    let info = peek_info(image, &opts)?;
2163    let dest_fmt = info.format;
2164    let w = info.width;
2165    let h = info.height;
2166
2167    let mut decoder = ImageDecoder::new();
2168
2169    #[cfg(target_os = "linux")]
2170    if let Some(aligned_pitch) = padded_dma_pitch_for(dest_fmt, w, &memory) {
2171        let mut dma = Tensor::<u8>::image_with_stride(
2172            w,
2173            h,
2174            dest_fmt,
2175            aligned_pitch,
2176            Some(TensorMemory::Dma),
2177        )?;
2178        dma.load_image(&mut decoder, image, &opts)?;
2179        return Ok(TensorDyn::from(dma));
2180    }
2181
2182    let mut img = Tensor::<u8>::image(w, h, dest_fmt, memory)?;
2183    img.load_image(&mut decoder, image, &opts)?;
2184    Ok(TensorDyn::from(img))
2185}
2186
2187/// Save a [`TensorDyn`] image as a JPEG file.
2188///
2189/// Only packed RGB and RGBA formats are supported.
2190pub fn save_jpeg(tensor: &TensorDyn, path: impl AsRef<std::path::Path>, quality: u8) -> Result<()> {
2191    let t = tensor.as_u8().ok_or(Error::UnsupportedFormat(
2192        "save_jpeg requires u8 tensor".to_string(),
2193    ))?;
2194    let fmt = t.format().ok_or(Error::NotAnImage)?;
2195    if fmt.layout() != PixelLayout::Packed {
2196        return Err(Error::NotImplemented(
2197            "Saving planar images is not supported".to_string(),
2198        ));
2199    }
2200
2201    let colour = match fmt {
2202        PixelFormat::Rgb => jpeg_encoder::ColorType::Rgb,
2203        PixelFormat::Rgba => jpeg_encoder::ColorType::Rgba,
2204        _ => {
2205            return Err(Error::NotImplemented(
2206                "Unsupported image format for saving".to_string(),
2207            ));
2208        }
2209    };
2210
2211    let w = t.width().ok_or(Error::NotAnImage)?;
2212    let h = t.height().ok_or(Error::NotAnImage)?;
2213    let encoder = jpeg_encoder::Encoder::new_file(path, quality)?;
2214    let tensor_map = t.map()?;
2215
2216    encoder.encode(&tensor_map, w as u16, h as u16, colour)?;
2217
2218    Ok(())
2219}
2220
2221pub(crate) struct FunctionTimer<T: Display> {
2222    name: T,
2223    start: std::time::Instant,
2224}
2225
2226impl<T: Display> FunctionTimer<T> {
2227    pub fn new(name: T) -> Self {
2228        Self {
2229            name,
2230            start: std::time::Instant::now(),
2231        }
2232    }
2233}
2234
2235impl<T: Display> Drop for FunctionTimer<T> {
2236    fn drop(&mut self) {
2237        log::trace!("{} elapsed: {:?}", self.name, self.start.elapsed())
2238    }
2239}
2240
2241const DEFAULT_COLORS: [[f32; 4]; 20] = [
2242    [0., 1., 0., 0.7],
2243    [1., 0.5568628, 0., 0.7],
2244    [0.25882353, 0.15294118, 0.13333333, 0.7],
2245    [0.8, 0.7647059, 0.78039216, 0.7],
2246    [0.3137255, 0.3137255, 0.3137255, 0.7],
2247    [0.1411765, 0.3098039, 0.1215686, 0.7],
2248    [1., 0.95686275, 0.5137255, 0.7],
2249    [0.3529412, 0.32156863, 0., 0.7],
2250    [0.4235294, 0.6235294, 0.6509804, 0.7],
2251    [0.5098039, 0.5098039, 0.7294118, 0.7],
2252    [0.00784314, 0.18823529, 0.29411765, 0.7],
2253    [0.0, 0.2706, 1.0, 0.7],
2254    [0.0, 0.0, 0.0, 0.7],
2255    [0.0, 0.5, 0.0, 0.7],
2256    [1.0, 0.0, 0.0, 0.7],
2257    [0.0, 0.0, 1.0, 0.7],
2258    [1.0, 0.5, 0.5, 0.7],
2259    [0.1333, 0.5451, 0.1333, 0.7],
2260    [0.1176, 0.4118, 0.8235, 0.7],
2261    [1., 1., 1., 0.7],
2262];
2263
2264const fn denorm<const M: usize, const N: usize>(a: [[f32; M]; N]) -> [[u8; M]; N] {
2265    let mut result = [[0; M]; N];
2266    let mut i = 0;
2267    while i < N {
2268        let mut j = 0;
2269        while j < M {
2270            result[i][j] = (a[i][j] * 255.0).round() as u8;
2271            j += 1;
2272        }
2273        i += 1;
2274    }
2275    result
2276}
2277
2278const DEFAULT_COLORS_U8: [[u8; 4]; 20] = denorm(DEFAULT_COLORS);
2279
2280#[cfg(test)]
2281#[cfg_attr(coverage_nightly, coverage(off))]
2282mod alignment_tests {
2283    use super::*;
2284
2285    #[test]
2286    fn align_width_rgba8_common_widths() {
2287        // RGBA8 (bpp=4, lcm(64,4)=64, so width must round to multiple of 16 px).
2288        assert_eq!(align_width_for_gpu_pitch(640, 4), 640); // 2560 byte pitch — already aligned
2289        assert_eq!(align_width_for_gpu_pitch(1280, 4), 1280); // 5120
2290        assert_eq!(align_width_for_gpu_pitch(1920, 4), 1920); // 7680
2291        assert_eq!(align_width_for_gpu_pitch(3840, 4), 3840); // 15360
2292                                                              // crowd.png case from the imx95 investigation:
2293        assert_eq!(align_width_for_gpu_pitch(3004, 4), 3008); // 12016 → 12032
2294        assert_eq!(align_width_for_gpu_pitch(3000, 4), 3008); // 12000 → 12032
2295        assert_eq!(align_width_for_gpu_pitch(17, 4), 32); // 68 → 128
2296        assert_eq!(align_width_for_gpu_pitch(1, 4), 16); // 4 → 64
2297    }
2298
2299    #[test]
2300    fn align_width_rgb888_packed() {
2301        // RGB888 (bpp=3, lcm(64,3)=192, so width must round to multiple of 64 px).
2302        assert_eq!(align_width_for_gpu_pitch(64, 3), 64); // 192 byte pitch
2303        assert_eq!(align_width_for_gpu_pitch(640, 3), 640); // 1920
2304        assert_eq!(align_width_for_gpu_pitch(1, 3), 64); // 3 → 192
2305        assert_eq!(align_width_for_gpu_pitch(65, 3), 128); // 195 → 384
2306                                                           // Verify the rounded width × bpp is a clean multiple of the LCM.
2307        for w in [3004usize, 1281, 100, 17] {
2308            let padded = align_width_for_gpu_pitch(w, 3);
2309            assert!(padded >= w);
2310            assert_eq!((padded * 3) % 64, 0);
2311            assert_eq!((padded * 3) % 3, 0);
2312        }
2313    }
2314
2315    #[test]
2316    fn align_width_grey_u8() {
2317        // Grey (bpp=1, lcm(64,1)=64, so width must round to multiple of 64 px).
2318        assert_eq!(align_width_for_gpu_pitch(64, 1), 64);
2319        assert_eq!(align_width_for_gpu_pitch(640, 1), 640);
2320        assert_eq!(align_width_for_gpu_pitch(1, 1), 64);
2321        assert_eq!(align_width_for_gpu_pitch(65, 1), 128);
2322    }
2323
2324    #[test]
2325    fn align_width_zero_inputs() {
2326        assert_eq!(align_width_for_gpu_pitch(0, 4), 0);
2327        assert_eq!(align_width_for_gpu_pitch(640, 0), 640);
2328    }
2329
2330    #[test]
2331    fn align_width_never_returns_smaller_than_input() {
2332        // Spot-check the "returned width >= input width" contract across a
2333        // range of values that would previously have hit `width * bpp`
2334        // overflow paths.
2335        for &bpp in &[1usize, 2, 3, 4, 8] {
2336            for &w in &[
2337                1usize,
2338                17,
2339                64,
2340                65,
2341                100,
2342                1280,
2343                1281,
2344                1920,
2345                3004,
2346                3072,
2347                3840,
2348                usize::MAX / 8,
2349                usize::MAX / 4,
2350                usize::MAX / 2,
2351                usize::MAX - 1,
2352                usize::MAX,
2353            ] {
2354                let aligned = align_width_for_gpu_pitch(w, bpp);
2355                assert!(
2356                    aligned >= w,
2357                    "align_width_for_gpu_pitch({w}, {bpp}) = {aligned} < {w}"
2358                );
2359            }
2360        }
2361    }
2362
2363    #[test]
2364    fn align_width_overflow_returns_unaligned_not_smaller() {
2365        // For width values close to usize::MAX, padding up would wrap. The
2366        // function must return the original width rather than wrapping or
2367        // panicking. A pre-aligned width round-trips unchanged even at the
2368        // extreme.
2369        let aligned_extreme = usize::MAX - 15; // 16-pixel boundary for RGBA8
2370        assert_eq!(
2371            align_width_for_gpu_pitch(aligned_extreme, 4),
2372            aligned_extreme
2373        );
2374        // A misaligned extreme value cannot be rounded up — the function
2375        // returns the original.
2376        let misaligned_extreme = usize::MAX - 1;
2377        let result = align_width_for_gpu_pitch(misaligned_extreme, 4);
2378        assert!(
2379            result == misaligned_extreme || result >= misaligned_extreme,
2380            "extreme misaligned width must not be rounded down to {result}"
2381        );
2382    }
2383
2384    #[test]
2385    fn checked_lcm_basic_and_overflow() {
2386        assert_eq!(checked_num_integer_lcm(64, 4), Some(64));
2387        assert_eq!(checked_num_integer_lcm(64, 3), Some(192));
2388        assert_eq!(checked_num_integer_lcm(64, 1), Some(64));
2389        assert_eq!(checked_num_integer_lcm(0, 4), Some(0));
2390        assert_eq!(checked_num_integer_lcm(64, 0), Some(0));
2391        // Coprime values whose product exceeds usize::MAX must return None.
2392        assert_eq!(
2393            checked_num_integer_lcm(usize::MAX, usize::MAX - 1),
2394            None,
2395            "coprime extreme values must overflow detect, not panic"
2396        );
2397    }
2398
2399    #[test]
2400    fn primary_plane_bpp_known_formats() {
2401        // Packed formats use channels × elem_size.
2402        assert_eq!(primary_plane_bpp(PixelFormat::Rgba, 1), Some(4));
2403        assert_eq!(primary_plane_bpp(PixelFormat::Bgra, 1), Some(4));
2404        assert_eq!(primary_plane_bpp(PixelFormat::Rgb, 1), Some(3));
2405        assert_eq!(primary_plane_bpp(PixelFormat::Grey, 1), Some(1));
2406        // Semi-planar (NV12) reports the luma plane's bpp.
2407        assert_eq!(primary_plane_bpp(PixelFormat::Nv12, 1), Some(1));
2408    }
2409}
2410
2411#[cfg(test)]
2412#[cfg_attr(coverage_nightly, coverage(off))]
2413#[allow(deprecated)]
2414mod image_tests {
2415    use super::*;
2416    use crate::{CPUProcessor, Rotation};
2417    #[cfg(target_os = "linux")]
2418    use edgefirst_tensor::is_dma_available;
2419    use edgefirst_tensor::{TensorMapTrait, TensorMemory, TensorTrait};
2420    use image::buffer::ConvertBuffer;
2421
2422    /// Test helper: call `ImageProcessorTrait::convert()` on two `TensorDyn`s
2423    /// by going through the `TensorDyn` API.
2424    ///
2425    /// Returns the `(src_image, dst_image)` reconstructed from the TensorDyn
2426    /// round-trip so the caller can feed them to `compare_images` etc.
2427    fn convert_img(
2428        proc: &mut dyn ImageProcessorTrait,
2429        src: TensorDyn,
2430        dst: TensorDyn,
2431        rotation: Rotation,
2432        flip: Flip,
2433        crop: Crop,
2434    ) -> (Result<()>, TensorDyn, TensorDyn) {
2435        let src_fourcc = src.format().unwrap();
2436        let dst_fourcc = dst.format().unwrap();
2437        let src_dyn = src;
2438        let mut dst_dyn = dst;
2439        let result = proc.convert(&src_dyn, &mut dst_dyn, rotation, flip, crop);
2440        let src_back = {
2441            let mut __t = src_dyn.into_u8().unwrap();
2442            __t.set_format(src_fourcc).unwrap();
2443            TensorDyn::from(__t)
2444        };
2445        let dst_back = {
2446            let mut __t = dst_dyn.into_u8().unwrap();
2447            __t.set_format(dst_fourcc).unwrap();
2448            TensorDyn::from(__t)
2449        };
2450        (result, src_back, dst_back)
2451    }
2452
2453    #[ctor::ctor]
2454    fn init() {
2455        env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
2456    }
2457
2458    macro_rules! function {
2459        () => {{
2460            fn f() {}
2461            fn type_name_of<T>(_: T) -> &'static str {
2462                std::any::type_name::<T>()
2463            }
2464            let name = type_name_of(f);
2465
2466            // Find and cut the rest of the path
2467            match &name[..name.len() - 3].rfind(':') {
2468                Some(pos) => &name[pos + 1..name.len() - 3],
2469                None => &name[..name.len() - 3],
2470            }
2471        }};
2472    }
2473
2474    #[test]
2475    fn test_invalid_crop() {
2476        let src = TensorDyn::image(100, 100, PixelFormat::Rgb, DType::U8, None).unwrap();
2477        let dst = TensorDyn::image(100, 100, PixelFormat::Rgb, DType::U8, None).unwrap();
2478
2479        let crop = Crop::new()
2480            .with_src_rect(Some(Rect::new(50, 50, 60, 60)))
2481            .with_dst_rect(Some(Rect::new(0, 0, 150, 150)));
2482
2483        let result = crop.check_crop_dyn(&src, &dst);
2484        assert!(matches!(
2485            result,
2486            Err(Error::CropInvalid(e)) if e.starts_with("Dest and Src crop invalid")
2487        ));
2488
2489        let crop = crop.with_src_rect(Some(Rect::new(0, 0, 10, 10)));
2490        let result = crop.check_crop_dyn(&src, &dst);
2491        assert!(matches!(
2492            result,
2493            Err(Error::CropInvalid(e)) if e.starts_with("Dest crop invalid")
2494        ));
2495
2496        let crop = crop
2497            .with_src_rect(Some(Rect::new(50, 50, 60, 60)))
2498            .with_dst_rect(Some(Rect::new(0, 0, 50, 50)));
2499        let result = crop.check_crop_dyn(&src, &dst);
2500        assert!(matches!(
2501            result,
2502            Err(Error::CropInvalid(e)) if e.starts_with("Src crop invalid")
2503        ));
2504
2505        let crop = crop.with_src_rect(Some(Rect::new(50, 50, 50, 50)));
2506
2507        let result = crop.check_crop_dyn(&src, &dst);
2508        assert!(result.is_ok());
2509    }
2510
2511    #[test]
2512    fn test_invalid_tensor_format() -> Result<(), Error> {
2513        // 4D tensor cannot be set to a 3-channel pixel format
2514        let mut tensor = Tensor::<u8>::new(&[720, 1280, 4, 1], None, None)?;
2515        let result = tensor.set_format(PixelFormat::Rgb);
2516        assert!(result.is_err(), "4D tensor should reject set_format");
2517
2518        // Tensor with wrong channel count for the format
2519        let mut tensor = Tensor::<u8>::new(&[720, 1280, 4], None, None)?;
2520        let result = tensor.set_format(PixelFormat::Rgb);
2521        assert!(result.is_err(), "4-channel tensor should reject RGB format");
2522
2523        Ok(())
2524    }
2525
2526    #[test]
2527    fn test_invalid_image_file() -> Result<(), Error> {
2528        let result = crate::load_image_test_helper(&[123; 5000], None, None);
2529        assert!(
2530            matches!(result, Err(Error::Codec(_))),
2531            "unrecognised bytes should surface as Error::Codec, got {result:?}"
2532        );
2533        Ok(())
2534    }
2535
2536    #[test]
2537    fn test_invalid_jpeg_format() -> Result<(), Error> {
2538        let result = crate::load_image_test_helper(&[123; 5000], Some(PixelFormat::Yuyv), None);
2539        // YUYV is not a valid decode target; peek_info fails before the magic-
2540        // bytes check, so the precise variant depends on which error fires first.
2541        assert!(
2542            matches!(result, Err(Error::Codec(_))),
2543            "Yuyv target with garbage bytes should surface as Error::Codec, got {result:?}"
2544        );
2545        Ok(())
2546    }
2547
2548    #[test]
2549    fn test_load_resize_save() {
2550        let file = edgefirst_bench::testdata::read("zidane.jpg");
2551        let img = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
2552        assert_eq!(img.width(), Some(1280));
2553        assert_eq!(img.height(), Some(720));
2554
2555        let dst = TensorDyn::image(640, 360, PixelFormat::Rgba, DType::U8, None).unwrap();
2556        let mut converter = CPUProcessor::new();
2557        let (result, _img, dst) = convert_img(
2558            &mut converter,
2559            img,
2560            dst,
2561            Rotation::None,
2562            Flip::None,
2563            Crop::no_crop(),
2564        );
2565        result.unwrap();
2566        assert_eq!(dst.width(), Some(640));
2567        assert_eq!(dst.height(), Some(360));
2568
2569        crate::save_jpeg(&dst, "zidane_resized.jpg", 80).unwrap();
2570
2571        let file = std::fs::read("zidane_resized.jpg").unwrap();
2572        let img = crate::load_image_test_helper(&file, None, None).unwrap();
2573        assert_eq!(img.width(), Some(640));
2574        assert_eq!(img.height(), Some(360));
2575        assert_eq!(img.format().unwrap(), PixelFormat::Rgb);
2576    }
2577
2578    #[test]
2579    fn test_from_tensor_planar() -> Result<(), Error> {
2580        let mut tensor = Tensor::new(&[3, 720, 1280], None, None)?;
2581        tensor
2582            .map()?
2583            .copy_from_slice(&edgefirst_bench::testdata::read("camera720p.8bps"));
2584        let planar = {
2585            tensor
2586                .set_format(PixelFormat::PlanarRgb)
2587                .map_err(|e| crate::Error::Internal(e.to_string()))?;
2588            TensorDyn::from(tensor)
2589        };
2590
2591        let rbga = load_bytes_to_tensor(
2592            1280,
2593            720,
2594            PixelFormat::Rgba,
2595            None,
2596            &edgefirst_bench::testdata::read("camera720p.rgba"),
2597        )?;
2598        compare_images_convert_to_rgb(&planar, &rbga, 0.98, function!());
2599
2600        Ok(())
2601    }
2602
2603    #[test]
2604    fn test_from_tensor_invalid_format() {
2605        // PixelFormat::from_fourcc_str returns None for unknown FourCC codes.
2606        // Since there's no "TEST" pixel format, this validates graceful handling.
2607        assert!(PixelFormat::from_fourcc(u32::from_le_bytes(*b"TEST")).is_none());
2608    }
2609
2610    #[test]
2611    #[should_panic(expected = "Failed to save planar RGB image")]
2612    fn test_save_planar() {
2613        let planar_img = load_bytes_to_tensor(
2614            1280,
2615            720,
2616            PixelFormat::PlanarRgb,
2617            None,
2618            &edgefirst_bench::testdata::read("camera720p.8bps"),
2619        )
2620        .unwrap();
2621
2622        let save_path = "/tmp/planar_rgb.jpg";
2623        crate::save_jpeg(&planar_img, save_path, 90).expect("Failed to save planar RGB image");
2624    }
2625
2626    #[test]
2627    #[should_panic(expected = "Failed to save YUYV image")]
2628    fn test_save_yuyv() {
2629        let planar_img = load_bytes_to_tensor(
2630            1280,
2631            720,
2632            PixelFormat::Yuyv,
2633            None,
2634            &edgefirst_bench::testdata::read("camera720p.yuyv"),
2635        )
2636        .unwrap();
2637
2638        let save_path = "/tmp/yuyv.jpg";
2639        crate::save_jpeg(&planar_img, save_path, 90).expect("Failed to save YUYV image");
2640    }
2641
2642    #[test]
2643    fn test_rotation_angle() {
2644        assert_eq!(Rotation::from_degrees_clockwise(0), Rotation::None);
2645        assert_eq!(Rotation::from_degrees_clockwise(90), Rotation::Clockwise90);
2646        assert_eq!(Rotation::from_degrees_clockwise(180), Rotation::Rotate180);
2647        assert_eq!(
2648            Rotation::from_degrees_clockwise(270),
2649            Rotation::CounterClockwise90
2650        );
2651        assert_eq!(Rotation::from_degrees_clockwise(360), Rotation::None);
2652        assert_eq!(Rotation::from_degrees_clockwise(450), Rotation::Clockwise90);
2653        assert_eq!(Rotation::from_degrees_clockwise(540), Rotation::Rotate180);
2654        assert_eq!(
2655            Rotation::from_degrees_clockwise(630),
2656            Rotation::CounterClockwise90
2657        );
2658    }
2659
2660    #[test]
2661    #[should_panic(expected = "rotation angle is not a multiple of 90")]
2662    fn test_rotation_angle_panic() {
2663        Rotation::from_degrees_clockwise(361);
2664    }
2665
2666    #[test]
2667    fn test_disable_env_var() -> Result<(), Error> {
2668        // EDGEFIRST_FORCE_BACKEND takes precedence over EDGEFIRST_DISABLE_*,
2669        // so clear it for the duration of this test to avoid races with
2670        // test_force_backend_cpu running in parallel.
2671        let saved_force = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
2672        unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") };
2673
2674        #[cfg(target_os = "linux")]
2675        {
2676            let original = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
2677            unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
2678            let converter = ImageProcessor::new()?;
2679            match original {
2680                Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
2681                None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
2682            }
2683            assert!(converter.g2d.is_none());
2684        }
2685
2686        #[cfg(target_os = "linux")]
2687        #[cfg(feature = "opengl")]
2688        {
2689            let original = std::env::var("EDGEFIRST_DISABLE_GL").ok();
2690            unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
2691            let converter = ImageProcessor::new()?;
2692            match original {
2693                Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
2694                None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
2695            }
2696            assert!(converter.opengl.is_none());
2697        }
2698
2699        let original = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
2700        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
2701        let converter = ImageProcessor::new()?;
2702        match original {
2703            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
2704            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
2705        }
2706        assert!(converter.cpu.is_none());
2707
2708        let original_cpu = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
2709        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
2710        let original_gl = std::env::var("EDGEFIRST_DISABLE_GL").ok();
2711        unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
2712        let original_g2d = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
2713        unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
2714        let mut converter = ImageProcessor::new()?;
2715
2716        let src = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None)?;
2717        let dst = TensorDyn::image(640, 360, PixelFormat::Rgba, DType::U8, None)?;
2718        let (result, _src, _dst) = convert_img(
2719            &mut converter,
2720            src,
2721            dst,
2722            Rotation::None,
2723            Flip::None,
2724            Crop::no_crop(),
2725        );
2726        assert!(matches!(result, Err(Error::NoConverter)));
2727
2728        match original_cpu {
2729            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
2730            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
2731        }
2732        match original_gl {
2733            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
2734            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
2735        }
2736        match original_g2d {
2737            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
2738            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
2739        }
2740        match saved_force {
2741            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
2742            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
2743        }
2744
2745        Ok(())
2746    }
2747
2748    #[test]
2749    fn test_unsupported_conversion() {
2750        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
2751        let dst = TensorDyn::image(640, 360, PixelFormat::Nv12, DType::U8, None).unwrap();
2752        let mut converter = ImageProcessor::new().unwrap();
2753        let (result, _src, _dst) = convert_img(
2754            &mut converter,
2755            src,
2756            dst,
2757            Rotation::None,
2758            Flip::None,
2759            Crop::no_crop(),
2760        );
2761        log::debug!("result: {:?}", result);
2762        assert!(matches!(
2763            result,
2764            Err(Error::NotSupported(e)) if e.starts_with("Conversion from NV12 to NV12")
2765        ));
2766    }
2767
2768    #[test]
2769    fn test_load_grey() {
2770        let grey_img = crate::load_image_test_helper(
2771            &edgefirst_bench::testdata::read("grey.jpg"),
2772            Some(PixelFormat::Rgba),
2773            None,
2774        )
2775        .unwrap();
2776
2777        let grey_but_rgb_img = crate::load_image_test_helper(
2778            &edgefirst_bench::testdata::read("grey-rgb.jpg"),
2779            Some(PixelFormat::Rgba),
2780            None,
2781        )
2782        .unwrap();
2783
2784        compare_images(&grey_img, &grey_but_rgb_img, 0.99, function!());
2785    }
2786
2787    #[test]
2788    fn test_new_nv12() {
2789        let nv12 = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
2790        assert_eq!(nv12.height(), Some(720));
2791        assert_eq!(nv12.width(), Some(1280));
2792        assert_eq!(nv12.format().unwrap(), PixelFormat::Nv12);
2793        // PixelFormat::Nv12.channels() returns 1 (luma plane channel count)
2794        assert_eq!(nv12.format().unwrap().channels(), 1);
2795        assert!(nv12.format().is_some_and(
2796            |f| f.layout() == PixelLayout::Planar || f.layout() == PixelLayout::SemiPlanar
2797        ))
2798    }
2799
2800    #[test]
2801    #[cfg(target_os = "linux")]
2802    fn test_new_image_converter() {
2803        let dst_width = 640;
2804        let dst_height = 360;
2805        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
2806        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
2807
2808        let mut converter = ImageProcessor::new().unwrap();
2809        let converter_dst = converter
2810            .create_image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None)
2811            .unwrap();
2812        let (result, src, converter_dst) = convert_img(
2813            &mut converter,
2814            src,
2815            converter_dst,
2816            Rotation::None,
2817            Flip::None,
2818            Crop::no_crop(),
2819        );
2820        result.unwrap();
2821
2822        let cpu_dst =
2823            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
2824        let mut cpu_converter = CPUProcessor::new();
2825        let (result, _src, cpu_dst) = convert_img(
2826            &mut cpu_converter,
2827            src,
2828            cpu_dst,
2829            Rotation::None,
2830            Flip::None,
2831            Crop::no_crop(),
2832        );
2833        result.unwrap();
2834
2835        compare_images(&converter_dst, &cpu_dst, 0.98, function!());
2836    }
2837
2838    #[test]
2839    #[cfg(target_os = "linux")]
2840    fn test_create_image_dtype_i8() {
2841        let mut converter = ImageProcessor::new().unwrap();
2842
2843        // I8 image should allocate successfully via create_image
2844        let dst = converter
2845            .create_image(320, 240, PixelFormat::Rgb, DType::I8, None)
2846            .unwrap();
2847        assert_eq!(dst.dtype(), DType::I8);
2848        assert!(dst.width() == Some(320));
2849        assert!(dst.height() == Some(240));
2850        assert_eq!(dst.format(), Some(PixelFormat::Rgb));
2851
2852        // U8 for comparison
2853        let dst_u8 = converter
2854            .create_image(320, 240, PixelFormat::Rgb, DType::U8, None)
2855            .unwrap();
2856        assert_eq!(dst_u8.dtype(), DType::U8);
2857
2858        // Convert into I8 dst should succeed
2859        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
2860        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
2861        let mut dst_i8 = converter
2862            .create_image(320, 240, PixelFormat::Rgb, DType::I8, None)
2863            .unwrap();
2864        converter
2865            .convert(
2866                &src,
2867                &mut dst_i8,
2868                Rotation::None,
2869                Flip::None,
2870                Crop::no_crop(),
2871            )
2872            .unwrap();
2873    }
2874
2875    #[test]
2876    #[cfg(target_os = "linux")]
2877    fn test_create_image_nv12_dma_non_aligned_width() {
2878        // Regression for C2: create_image must not apply stride padding to
2879        // non-packed formats. NV12 is semi-planar (PixelLayout::SemiPlanar),
2880        // so the try_dma path should fall through to the plain
2881        // TensorDyn::image allocation for any width, regardless of the
2882        // 64-byte GPU pitch alignment.
2883        let converter = ImageProcessor::new().unwrap();
2884
2885        // 100 is intentionally not a multiple of 64 (the Mali pitch
2886        // alignment) to prove that non-packed layouts do not take the
2887        // stride-padded branch.
2888        let result = converter.create_image(
2889            100,
2890            64,
2891            PixelFormat::Nv12,
2892            DType::U8,
2893            Some(TensorMemory::Dma),
2894        );
2895
2896        match result {
2897            Ok(img) => {
2898                assert_eq!(img.width(), Some(100));
2899                assert_eq!(img.height(), Some(64));
2900                assert_eq!(img.format(), Some(PixelFormat::Nv12));
2901                // Non-packed formats must never carry a row_stride override.
2902                assert!(
2903                    img.row_stride().is_none(),
2904                    "NV12 must not be stride-padded by create_image",
2905                );
2906            }
2907            Err(e) => {
2908                // Accept skip on hosts without a dma-heap, but never the
2909                // "NotImplemented" we used to return for non-packed layouts.
2910                let msg = format!("{e}");
2911                assert!(
2912                    !msg.contains("image_with_stride"),
2913                    "NV12 should not hit the stride-padded path: {msg}",
2914                );
2915            }
2916        }
2917    }
2918
2919    #[test]
2920    #[ignore] // Hangs on desktop platforms where DMA-buf is unavailable and PBO
2921              // fallback triggers a GPU driver hang during SHM→texture upload (e.g.,
2922              // NVIDIA without /dev/dma_heap permissions). Works on embedded targets.
2923    fn test_crop_skip() {
2924        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
2925        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
2926
2927        let mut converter = ImageProcessor::new().unwrap();
2928        let converter_dst = converter
2929            .create_image(1280, 720, PixelFormat::Rgba, DType::U8, None)
2930            .unwrap();
2931        let crop = Crop::new()
2932            .with_src_rect(Some(Rect::new(0, 0, 640, 640)))
2933            .with_dst_rect(Some(Rect::new(0, 0, 640, 640)));
2934        let (result, src, converter_dst) = convert_img(
2935            &mut converter,
2936            src,
2937            converter_dst,
2938            Rotation::None,
2939            Flip::None,
2940            crop,
2941        );
2942        result.unwrap();
2943
2944        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
2945        let mut cpu_converter = CPUProcessor::new();
2946        let (result, _src, cpu_dst) = convert_img(
2947            &mut cpu_converter,
2948            src,
2949            cpu_dst,
2950            Rotation::None,
2951            Flip::None,
2952            crop,
2953        );
2954        result.unwrap();
2955
2956        compare_images(&converter_dst, &cpu_dst, 0.99999, function!());
2957    }
2958
2959    #[test]
2960    fn test_invalid_pixel_format() {
2961        // PixelFormat::from_fourcc returns None for unknown formats,
2962        // so TensorDyn::image cannot be called with an invalid format.
2963        assert!(PixelFormat::from_fourcc(u32::from_le_bytes(*b"TEST")).is_none());
2964    }
2965
2966    // Helper function to check if G2D library is available (Linux/i.MX8 only)
2967    #[cfg(target_os = "linux")]
2968    static G2D_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
2969
2970    #[cfg(target_os = "linux")]
2971    fn is_g2d_available() -> bool {
2972        *G2D_AVAILABLE.get_or_init(|| G2DProcessor::new().is_ok())
2973    }
2974
2975    #[cfg(target_os = "linux")]
2976    #[cfg(feature = "opengl")]
2977    static GL_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
2978
2979    #[cfg(target_os = "linux")]
2980    #[cfg(feature = "opengl")]
2981    // Helper function to check if OpenGL is available
2982    fn is_opengl_available() -> bool {
2983        #[cfg(all(target_os = "linux", feature = "opengl"))]
2984        {
2985            *GL_AVAILABLE.get_or_init(|| GLProcessorThreaded::new(None).is_ok())
2986        }
2987
2988        #[cfg(not(all(target_os = "linux", feature = "opengl")))]
2989        {
2990            false
2991        }
2992    }
2993
2994    #[test]
2995    fn test_load_jpeg_with_exif() {
2996        let file = edgefirst_bench::testdata::read("zidane_rotated_exif.jpg").to_vec();
2997        let loaded = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
2998
2999        assert_eq!(loaded.height(), Some(1280));
3000        assert_eq!(loaded.width(), Some(720));
3001
3002        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3003        let cpu_src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3004
3005        let (dst_width, dst_height) = (cpu_src.height().unwrap(), cpu_src.width().unwrap());
3006
3007        let cpu_dst =
3008            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3009        let mut cpu_converter = CPUProcessor::new();
3010
3011        let (result, _cpu_src, cpu_dst) = convert_img(
3012            &mut cpu_converter,
3013            cpu_src,
3014            cpu_dst,
3015            Rotation::Clockwise90,
3016            Flip::None,
3017            Crop::no_crop(),
3018        );
3019        result.unwrap();
3020
3021        compare_images(&loaded, &cpu_dst, 0.98, function!());
3022    }
3023
3024    #[test]
3025    fn test_load_png_with_exif() {
3026        let file = edgefirst_bench::testdata::read("zidane_rotated_exif_180.png").to_vec();
3027        let loaded = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3028
3029        assert_eq!(loaded.height(), Some(720));
3030        assert_eq!(loaded.width(), Some(1280));
3031
3032        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3033        let cpu_src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3034
3035        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
3036        let mut cpu_converter = CPUProcessor::new();
3037
3038        let (result, _cpu_src, cpu_dst) = convert_img(
3039            &mut cpu_converter,
3040            cpu_src,
3041            cpu_dst,
3042            Rotation::Rotate180,
3043            Flip::None,
3044            Crop::no_crop(),
3045        );
3046        result.unwrap();
3047
3048        compare_images(&loaded, &cpu_dst, 0.98, function!());
3049    }
3050
3051    /// Synthesise an RGB JPEG with a deterministic pattern at `(width, height)`
3052    /// using the workspace's `jpeg-encoder` crate (the `image` crate is
3053    /// compiled without its JPEG feature). Used to exercise the decoder /
3054    /// pitch-padding paths for arbitrary dimensions without having to bundle
3055    /// a fixture file per test size.
3056    #[cfg(target_os = "linux")]
3057    fn make_rgb_jpeg(width: u32, height: u32) -> Vec<u8> {
3058        let mut bytes = Vec::with_capacity((width * height * 3) as usize);
3059        for y in 0..height {
3060            for x in 0..width {
3061                bytes.push(((x + y) & 0xFF) as u8);
3062                bytes.push(((x.wrapping_mul(3)) & 0xFF) as u8);
3063                bytes.push(((y.wrapping_mul(5)) & 0xFF) as u8);
3064            }
3065        }
3066        let mut out = Vec::new();
3067        let encoder = jpeg_encoder::Encoder::new(&mut out, 85);
3068        encoder
3069            .encode(
3070                &bytes,
3071                width as u16,
3072                height as u16,
3073                jpeg_encoder::ColorType::Rgb,
3074            )
3075            .expect("jpeg-encoder must succeed on trivial input");
3076        out
3077    }
3078
3079    /// End-to-end: a 375×333 RGBA JPEG (width NOT divisible by 4) loaded
3080    /// via the pitch-padded DMA path and letterboxed through the GL
3081    /// backend must produce correct output. Before the Rgba/Bgra
3082    /// width%4 relaxation in `DmaImportAttrs::from_tensor`, this case
3083    /// failed the pre-check and forced a CPU texture upload fallback;
3084    /// with the relaxation, EGL import succeeds at the driver level and
3085    /// the GL fast path runs. Output correctness is checked against a
3086    /// CPU reference (convert ran with `EDGEFIRST_FORCE_BACKEND=cpu`).
3087    #[test]
3088    #[cfg(target_os = "linux")]
3089    #[cfg(feature = "opengl")]
3090    fn test_convert_rgba_non_4_aligned_width_end_to_end() {
3091        use edgefirst_tensor::is_dma_available;
3092        if !is_dma_available() {
3093            eprintln!(
3094                "SKIPPED: test_convert_rgba_non_4_aligned_width_end_to_end — DMA not available"
3095            );
3096            return;
3097        }
3098        // 375 is the canonical failure width from dataset loaders —
3099        // 375 * 4 = 1500 bytes/row, pitch-padded to 1536. Width%4 = 3,
3100        // so the old pre-check rejected it; new code accepts it.
3101        let jpeg = make_rgb_jpeg(375, 333);
3102        let src_gl = crate::load_image_test_helper(&jpeg, Some(PixelFormat::Rgba), None).unwrap();
3103        assert_eq!(src_gl.width(), Some(375));
3104        // Row stride must still be pitch-padded (separate concern from width).
3105        let stride = src_gl.row_stride().unwrap();
3106        assert_eq!(stride, 1536, "expected padded pitch 1536, got {stride}");
3107
3108        // GL-backed convert into a pitch-aligned 640×640 Rgba dest.
3109        let mut gl_proc = ImageProcessor::new().unwrap();
3110        let gl_dst = gl_proc
3111            .create_image(640, 640, PixelFormat::Rgba, DType::U8, None)
3112            .unwrap();
3113        let (r_gl, _src_gl, gl_dst) = convert_img(
3114            &mut gl_proc,
3115            src_gl,
3116            gl_dst,
3117            Rotation::None,
3118            Flip::None,
3119            Crop::no_crop(),
3120        );
3121        r_gl.expect("GL-backed convert must succeed for 375x333 Rgba src");
3122
3123        // CPU reference via a fresh load so the two paths start from
3124        // byte-identical inputs. `with_config(backend=Cpu)` forces the
3125        // CPU-only processor regardless of which backends the host has
3126        // available.
3127        let src_cpu =
3128            crate::load_image_test_helper(&jpeg, Some(PixelFormat::Rgba), Some(TensorMemory::Mem))
3129                .unwrap();
3130        let mut cpu_proc = ImageProcessor::with_config(ImageProcessorConfig {
3131            backend: ComputeBackend::Cpu,
3132            ..Default::default()
3133        })
3134        .unwrap();
3135        let cpu_dst = TensorDyn::image(
3136            640,
3137            640,
3138            PixelFormat::Rgba,
3139            DType::U8,
3140            Some(TensorMemory::Mem),
3141        )
3142        .unwrap();
3143        let (r_cpu, _src_cpu, cpu_dst) = convert_img(
3144            &mut cpu_proc,
3145            src_cpu,
3146            cpu_dst,
3147            Rotation::None,
3148            Flip::None,
3149            Crop::no_crop(),
3150        );
3151        r_cpu.unwrap();
3152
3153        // Structural similarity: the GL path may have gone through EGL
3154        // import OR fallen back to CPU texture upload — either way, the
3155        // output must match the CPU reference closely.
3156        compare_images(&gl_dst, &cpu_dst, 0.95, function!());
3157    }
3158
3159    /// Regression lock: loading a JPEG at a non-64-aligned RGBA pitch (e.g.
3160    /// 500×333 → natural pitch 2000, needs to be padded to 2048) must go
3161    /// through `image_with_stride` and set `row_stride()` / `effective_row_stride()`
3162    /// to the padded value. The earlier pitch-padding commit fixed this in
3163    /// `load_jpeg`; a regression would surface as `row_stride == None` or
3164    /// `effective_row_stride == 2000`.
3165    #[test]
3166    #[cfg(target_os = "linux")]
3167    fn test_load_jpeg_rgba_non_aligned_pitch_padded_dma() {
3168        use edgefirst_tensor::is_dma_available;
3169        if !is_dma_available() {
3170            eprintln!(
3171                "SKIPPED: test_load_jpeg_rgba_non_aligned_pitch_padded_dma — DMA not available"
3172            );
3173            return;
3174        }
3175        // Widths that force a non-64-aligned natural RGBA pitch. All three
3176        // are divisible by 4 so the EGL width-alignment pre-check passes.
3177        // The pitch-padding fix is what makes these importable at all.
3178        for &w in &[500u32, 612, 428] {
3179            let jpeg = make_rgb_jpeg(w, 333);
3180            let loaded =
3181                crate::load_image_test_helper(&jpeg, Some(PixelFormat::Rgba), None).unwrap();
3182            let natural = (w as usize) * 4;
3183            let aligned = crate::align_pitch_bytes_to_gpu_alignment(natural).unwrap();
3184            assert!(
3185                aligned > natural,
3186                "test sanity: width {w} should be unaligned"
3187            );
3188            let stride = loaded
3189                .row_stride()
3190                .expect("padded DMA path must set an explicit row_stride — regression if None");
3191            assert_eq!(
3192                stride, aligned,
3193                "width {w}: expected padded stride {aligned}, got {stride} \
3194                 (regression: pitch-padding branch skipped?)"
3195            );
3196            let eff = loaded.effective_row_stride().unwrap();
3197            assert_eq!(
3198                eff, aligned,
3199                "effective_row_stride must match stored stride"
3200            );
3201            assert_eq!(loaded.width(), Some(w as usize));
3202            assert_eq!(loaded.height(), Some(333));
3203        }
3204    }
3205
3206    /// `padded_dma_pitch_for` must respect the caller's memory choice and
3207    /// must NOT route into the pitch-padded DMA path when the caller left
3208    /// the choice to the allocator (`None`) but DMA is unavailable on the
3209    /// host. The padded path requires `image_with_stride`, which always
3210    /// allocates DMA — taking it on a system without `/dev/dma_heap`
3211    /// would convert a normally-working image load into a hard failure
3212    /// (since `Tensor::image(..., None)` would have fallen back to
3213    /// SHM/Mem).
3214    #[test]
3215    #[cfg(target_os = "linux")]
3216    fn test_padded_dma_pitch_for_respects_memory_choice() {
3217        use edgefirst_tensor::{is_dma_available, TensorMemory};
3218
3219        // 500×4 = 2000 → padded to 2048 by GPU alignment. Use it for
3220        // every case so any "no padding" answer is unambiguous.
3221        let unaligned_w = 500;
3222
3223        // Caller asks for Mem / Shm: never pad, regardless of DMA.
3224        assert_eq!(
3225            crate::padded_dma_pitch_for(PixelFormat::Rgba, unaligned_w, &Some(TensorMemory::Mem),),
3226            None,
3227            "Mem must never trigger DMA padding"
3228        );
3229        assert_eq!(
3230            crate::padded_dma_pitch_for(PixelFormat::Rgba, unaligned_w, &Some(TensorMemory::Shm),),
3231            None,
3232            "Shm must never trigger DMA padding"
3233        );
3234
3235        // Caller explicitly asks for DMA: always pad if width needs it.
3236        // Even if the runtime can't actually allocate DMA, the caller
3237        // owns that decision and the resulting allocation error is
3238        // their problem, not ours.
3239        assert_eq!(
3240            crate::padded_dma_pitch_for(PixelFormat::Rgba, unaligned_w, &Some(TensorMemory::Dma),),
3241            Some(2048),
3242            "explicit Dma must pad regardless of runtime DMA availability"
3243        );
3244
3245        // Caller leaves it to the allocator: behaviour depends on
3246        // host-runtime DMA availability. This is the case the fix
3247        // guards against.
3248        let none_result = crate::padded_dma_pitch_for(PixelFormat::Rgba, unaligned_w, &None);
3249        if is_dma_available() {
3250            assert_eq!(
3251                none_result,
3252                Some(2048),
3253                "memory=None + DMA available → pad (will route through DMA)"
3254            );
3255        } else {
3256            assert_eq!(
3257                none_result, None,
3258                "memory=None + DMA unavailable → must NOT pad (would force \
3259                 image_with_stride into a DMA-only allocation that fails). \
3260                 Regression: padded_dma_pitch_for ignored is_dma_available()."
3261            );
3262        }
3263    }
3264
3265    // Synthesise a small greyscale PNG in memory at `(width, height)` with a
3266    // deterministic ramp pattern so multiple tests can cross-check output
3267    // without bundling an extra fixture file.
3268    fn make_grey_png(width: u32, height: u32) -> Vec<u8> {
3269        let mut bytes = Vec::with_capacity((width * height) as usize);
3270        for y in 0..height {
3271            for x in 0..width {
3272                bytes.push(((x + y) & 0xFF) as u8);
3273            }
3274        }
3275        let img = image::GrayImage::from_vec(width, height, bytes).unwrap();
3276        let mut buf = Vec::new();
3277        img.write_to(&mut std::io::Cursor::new(&mut buf), image::ImageFormat::Png)
3278            .unwrap();
3279        buf
3280    }
3281
3282    /// Greyscale PNG with a width that forces a pitch-misaligned natural
3283    /// row stride (612 bytes is not a multiple of the 64-byte GPU pitch
3284    /// alignment) must still load via the pitch-padded DMA path. Gated on
3285    /// DMA availability because `image_with_stride` is DMA-only.
3286    #[test]
3287    #[cfg(target_os = "linux")]
3288    fn test_load_png_grey_misaligned_width_dma() {
3289        use edgefirst_tensor::is_dma_available;
3290        if !is_dma_available() {
3291            eprintln!("SKIPPED: test_load_png_grey_misaligned_width_dma — DMA not available");
3292            return;
3293        }
3294        let png = make_grey_png(612, 388);
3295        let loaded = crate::load_image_test_helper(&png, Some(PixelFormat::Grey), None).unwrap();
3296        assert_eq!(loaded.width(), Some(612));
3297        assert_eq!(loaded.height(), Some(388));
3298        assert_eq!(loaded.format(), Some(PixelFormat::Grey));
3299
3300        // Round-trip pixels — natural-pitch DMA-BUFs pad the stride so we
3301        // must indirect through row_stride() rather than assume width.
3302        let map = loaded.as_u8().unwrap().map().unwrap();
3303        let stride = loaded.row_stride().unwrap_or(612);
3304        assert!(stride >= 612);
3305        let bytes: &[u8] = &map;
3306        for y in 0..388usize {
3307            for x in 0..612usize {
3308                let expected = ((x + y) & 0xFF) as u8;
3309                let got = bytes[y * stride + x];
3310                assert_eq!(
3311                    got, expected,
3312                    "grey png mismatch at ({x},{y}): got {got} expected {expected}"
3313                );
3314            }
3315        }
3316    }
3317
3318    /// Greyscale PNG loaded with explicit Mem backing — runs on any
3319    /// platform (no DMA permission requirement) and covers the
3320    /// decoder-native Luma → Grey no-conversion path.
3321    #[test]
3322    fn test_load_png_grey_mem() {
3323        use edgefirst_tensor::TensorMemory;
3324        let png = make_grey_png(612, 100);
3325        let loaded =
3326            crate::load_image_test_helper(&png, Some(PixelFormat::Grey), Some(TensorMemory::Mem))
3327                .unwrap();
3328        assert_eq!(loaded.width(), Some(612));
3329        assert_eq!(loaded.height(), Some(100));
3330        assert_eq!(loaded.format(), Some(PixelFormat::Grey));
3331        let map = loaded.as_u8().unwrap().map().unwrap();
3332        let bytes: &[u8] = &map;
3333        // Mem allocation uses the natural pitch — 612 bytes per row, exact.
3334        assert_eq!(bytes.len(), 612 * 100);
3335        for y in 0..100 {
3336            for x in 0..612 {
3337                assert_eq!(bytes[y * 612 + x], ((x + y) & 0xFF) as u8);
3338            }
3339        }
3340    }
3341
3342    /// Greyscale PNG decoded into RGB — exercises the decoder-colorspace
3343    /// mismatch path (Luma → Rgb via CPU converter). Uses Mem memory to
3344    /// stay portable to host-side test environments.
3345    #[test]
3346    fn test_load_png_grey_to_rgb_mem() {
3347        use edgefirst_tensor::TensorMemory;
3348        let png = make_grey_png(620, 240);
3349        let loaded =
3350            crate::load_image_test_helper(&png, Some(PixelFormat::Rgb), Some(TensorMemory::Mem))
3351                .unwrap();
3352        assert_eq!(loaded.width(), Some(620));
3353        assert_eq!(loaded.height(), Some(240));
3354        assert_eq!(loaded.format(), Some(PixelFormat::Rgb));
3355
3356        // Greyscale promoted to RGB replicates luma into each channel.
3357        let map = loaded.as_u8().unwrap().map().unwrap();
3358        let bytes: &[u8] = &map;
3359        for (x, y) in [(0usize, 0usize), (100, 50), (619, 239)] {
3360            let expected = ((x + y) & 0xFF) as u8;
3361            let off = (y * 620 + x) * 3;
3362            assert_eq!(bytes[off], expected, "R@{x},{y}");
3363            assert_eq!(bytes[off + 1], expected, "G@{x},{y}");
3364            assert_eq!(bytes[off + 2], expected, "B@{x},{y}");
3365        }
3366    }
3367
3368    #[test]
3369    #[cfg(target_os = "linux")]
3370    fn test_g2d_resize() {
3371        if !is_g2d_available() {
3372            eprintln!("SKIPPED: test_g2d_resize - G2D library (libg2d.so.2) not available");
3373            return;
3374        }
3375        if !is_dma_available() {
3376            eprintln!(
3377                "SKIPPED: test_g2d_resize - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3378            );
3379            return;
3380        }
3381
3382        let dst_width = 640;
3383        let dst_height = 360;
3384        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3385        let src =
3386            crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), Some(TensorMemory::Dma))
3387                .unwrap();
3388
3389        let g2d_dst = TensorDyn::image(
3390            dst_width,
3391            dst_height,
3392            PixelFormat::Rgba,
3393            DType::U8,
3394            Some(TensorMemory::Dma),
3395        )
3396        .unwrap();
3397        let mut g2d_converter = G2DProcessor::new().unwrap();
3398        let (result, src, g2d_dst) = convert_img(
3399            &mut g2d_converter,
3400            src,
3401            g2d_dst,
3402            Rotation::None,
3403            Flip::None,
3404            Crop::no_crop(),
3405        );
3406        result.unwrap();
3407
3408        let cpu_dst =
3409            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3410        let mut cpu_converter = CPUProcessor::new();
3411        let (result, _src, cpu_dst) = convert_img(
3412            &mut cpu_converter,
3413            src,
3414            cpu_dst,
3415            Rotation::None,
3416            Flip::None,
3417            Crop::no_crop(),
3418        );
3419        result.unwrap();
3420
3421        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
3422    }
3423
3424    #[test]
3425    #[cfg(target_os = "linux")]
3426    #[cfg(feature = "opengl")]
3427    fn test_opengl_resize() {
3428        if !is_opengl_available() {
3429            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3430            return;
3431        }
3432
3433        let dst_width = 640;
3434        let dst_height = 360;
3435        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3436        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3437
3438        let cpu_dst =
3439            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3440        let mut cpu_converter = CPUProcessor::new();
3441        let (result, src, cpu_dst) = convert_img(
3442            &mut cpu_converter,
3443            src,
3444            cpu_dst,
3445            Rotation::None,
3446            Flip::None,
3447            Crop::no_crop(),
3448        );
3449        result.unwrap();
3450
3451        let mut src = src;
3452        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3453
3454        for _ in 0..5 {
3455            let gl_dst =
3456                TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None)
3457                    .unwrap();
3458            let (result, src_back, gl_dst) = convert_img(
3459                &mut gl_converter,
3460                src,
3461                gl_dst,
3462                Rotation::None,
3463                Flip::None,
3464                Crop::no_crop(),
3465            );
3466            result.unwrap();
3467            src = src_back;
3468
3469            compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3470        }
3471    }
3472
3473    #[test]
3474    #[cfg(target_os = "linux")]
3475    #[cfg(feature = "opengl")]
3476    fn test_opengl_10_threads() {
3477        if !is_opengl_available() {
3478            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3479            return;
3480        }
3481
3482        let handles: Vec<_> = (0..10)
3483            .map(|i| {
3484                std::thread::Builder::new()
3485                    .name(format!("Thread {i}"))
3486                    .spawn(test_opengl_resize)
3487                    .unwrap()
3488            })
3489            .collect();
3490        handles.into_iter().for_each(|h| {
3491            if let Err(e) = h.join() {
3492                std::panic::resume_unwind(e)
3493            }
3494        });
3495    }
3496
3497    #[test]
3498    #[cfg(target_os = "linux")]
3499    #[cfg(feature = "opengl")]
3500    fn test_opengl_grey() {
3501        if !is_opengl_available() {
3502            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3503            return;
3504        }
3505
3506        let img = crate::load_image_test_helper(
3507            &edgefirst_bench::testdata::read("grey.jpg"),
3508            Some(PixelFormat::Grey),
3509            None,
3510        )
3511        .unwrap();
3512
3513        let gl_dst = TensorDyn::image(640, 640, PixelFormat::Grey, DType::U8, None).unwrap();
3514        let cpu_dst = TensorDyn::image(640, 640, PixelFormat::Grey, DType::U8, None).unwrap();
3515
3516        let mut converter = CPUProcessor::new();
3517
3518        let (result, img, cpu_dst) = convert_img(
3519            &mut converter,
3520            img,
3521            cpu_dst,
3522            Rotation::None,
3523            Flip::None,
3524            Crop::no_crop(),
3525        );
3526        result.unwrap();
3527
3528        let mut gl = GLProcessorThreaded::new(None).unwrap();
3529        let (result, _img, gl_dst) = convert_img(
3530            &mut gl,
3531            img,
3532            gl_dst,
3533            Rotation::None,
3534            Flip::None,
3535            Crop::no_crop(),
3536        );
3537        result.unwrap();
3538
3539        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3540    }
3541
3542    #[test]
3543    #[cfg(target_os = "linux")]
3544    fn test_g2d_src_crop() {
3545        if !is_g2d_available() {
3546            eprintln!("SKIPPED: test_g2d_src_crop - G2D library (libg2d.so.2) not available");
3547            return;
3548        }
3549        if !is_dma_available() {
3550            eprintln!(
3551                "SKIPPED: test_g2d_src_crop - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3552            );
3553            return;
3554        }
3555
3556        let dst_width = 640;
3557        let dst_height = 640;
3558        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3559        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3560
3561        let cpu_dst =
3562            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3563        let mut cpu_converter = CPUProcessor::new();
3564        let crop = Crop {
3565            src_rect: Some(Rect {
3566                left: 0,
3567                top: 0,
3568                width: 640,
3569                height: 360,
3570            }),
3571            dst_rect: None,
3572            dst_color: None,
3573        };
3574        let (result, src, cpu_dst) = convert_img(
3575            &mut cpu_converter,
3576            src,
3577            cpu_dst,
3578            Rotation::None,
3579            Flip::None,
3580            crop,
3581        );
3582        result.unwrap();
3583
3584        let g2d_dst =
3585            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3586        let mut g2d_converter = G2DProcessor::new().unwrap();
3587        let (result, _src, g2d_dst) = convert_img(
3588            &mut g2d_converter,
3589            src,
3590            g2d_dst,
3591            Rotation::None,
3592            Flip::None,
3593            crop,
3594        );
3595        result.unwrap();
3596
3597        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
3598    }
3599
3600    #[test]
3601    #[cfg(target_os = "linux")]
3602    fn test_g2d_dst_crop() {
3603        if !is_g2d_available() {
3604            eprintln!("SKIPPED: test_g2d_dst_crop - G2D library (libg2d.so.2) not available");
3605            return;
3606        }
3607        if !is_dma_available() {
3608            eprintln!(
3609                "SKIPPED: test_g2d_dst_crop - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3610            );
3611            return;
3612        }
3613
3614        let dst_width = 640;
3615        let dst_height = 640;
3616        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3617        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3618
3619        let cpu_dst =
3620            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3621        let mut cpu_converter = CPUProcessor::new();
3622        let crop = Crop {
3623            src_rect: None,
3624            dst_rect: Some(Rect::new(100, 100, 512, 288)),
3625            dst_color: None,
3626        };
3627        let (result, src, cpu_dst) = convert_img(
3628            &mut cpu_converter,
3629            src,
3630            cpu_dst,
3631            Rotation::None,
3632            Flip::None,
3633            crop,
3634        );
3635        result.unwrap();
3636
3637        let g2d_dst =
3638            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3639        let mut g2d_converter = G2DProcessor::new().unwrap();
3640        let (result, _src, g2d_dst) = convert_img(
3641            &mut g2d_converter,
3642            src,
3643            g2d_dst,
3644            Rotation::None,
3645            Flip::None,
3646            crop,
3647        );
3648        result.unwrap();
3649
3650        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
3651    }
3652
3653    #[test]
3654    #[cfg(target_os = "linux")]
3655    fn test_g2d_all_rgba() {
3656        if !is_g2d_available() {
3657            eprintln!("SKIPPED: test_g2d_all_rgba - G2D library (libg2d.so.2) not available");
3658            return;
3659        }
3660        if !is_dma_available() {
3661            eprintln!(
3662                "SKIPPED: test_g2d_all_rgba - DMA memory allocation not available (permission denied or no DMA-BUF support)"
3663            );
3664            return;
3665        }
3666
3667        let dst_width = 640;
3668        let dst_height = 640;
3669        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3670        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3671        let src_dyn = src;
3672
3673        let mut cpu_dst =
3674            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3675        let mut cpu_converter = CPUProcessor::new();
3676        let mut g2d_dst =
3677            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3678        let mut g2d_converter = G2DProcessor::new().unwrap();
3679
3680        let crop = Crop {
3681            src_rect: Some(Rect::new(50, 120, 1024, 576)),
3682            dst_rect: Some(Rect::new(100, 100, 512, 288)),
3683            dst_color: None,
3684        };
3685
3686        for rot in [
3687            Rotation::None,
3688            Rotation::Clockwise90,
3689            Rotation::Rotate180,
3690            Rotation::CounterClockwise90,
3691        ] {
3692            cpu_dst
3693                .as_u8()
3694                .unwrap()
3695                .map()
3696                .unwrap()
3697                .as_mut_slice()
3698                .fill(114);
3699            g2d_dst
3700                .as_u8()
3701                .unwrap()
3702                .map()
3703                .unwrap()
3704                .as_mut_slice()
3705                .fill(114);
3706            for flip in [Flip::None, Flip::Horizontal, Flip::Vertical] {
3707                let mut cpu_dst_dyn = cpu_dst;
3708                cpu_converter
3709                    .convert(&src_dyn, &mut cpu_dst_dyn, Rotation::None, Flip::None, crop)
3710                    .unwrap();
3711                cpu_dst = {
3712                    let mut __t = cpu_dst_dyn.into_u8().unwrap();
3713                    __t.set_format(PixelFormat::Rgba).unwrap();
3714                    TensorDyn::from(__t)
3715                };
3716
3717                let mut g2d_dst_dyn = g2d_dst;
3718                g2d_converter
3719                    .convert(&src_dyn, &mut g2d_dst_dyn, Rotation::None, Flip::None, crop)
3720                    .unwrap();
3721                g2d_dst = {
3722                    let mut __t = g2d_dst_dyn.into_u8().unwrap();
3723                    __t.set_format(PixelFormat::Rgba).unwrap();
3724                    TensorDyn::from(__t)
3725                };
3726
3727                compare_images(
3728                    &g2d_dst,
3729                    &cpu_dst,
3730                    0.98,
3731                    &format!("{} {:?} {:?}", function!(), rot, flip),
3732                );
3733            }
3734        }
3735    }
3736
3737    #[test]
3738    #[cfg(target_os = "linux")]
3739    #[cfg(feature = "opengl")]
3740    fn test_opengl_src_crop() {
3741        if !is_opengl_available() {
3742            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3743            return;
3744        }
3745
3746        let dst_width = 640;
3747        let dst_height = 360;
3748        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3749        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3750        let crop = Crop {
3751            src_rect: Some(Rect {
3752                left: 320,
3753                top: 180,
3754                width: 1280 - 320,
3755                height: 720 - 180,
3756            }),
3757            dst_rect: None,
3758            dst_color: None,
3759        };
3760
3761        let cpu_dst =
3762            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3763        let mut cpu_converter = CPUProcessor::new();
3764        let (result, src, cpu_dst) = convert_img(
3765            &mut cpu_converter,
3766            src,
3767            cpu_dst,
3768            Rotation::None,
3769            Flip::None,
3770            crop,
3771        );
3772        result.unwrap();
3773
3774        let gl_dst =
3775            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3776        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3777        let (result, _src, gl_dst) = convert_img(
3778            &mut gl_converter,
3779            src,
3780            gl_dst,
3781            Rotation::None,
3782            Flip::None,
3783            crop,
3784        );
3785        result.unwrap();
3786
3787        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3788    }
3789
3790    #[test]
3791    #[cfg(target_os = "linux")]
3792    #[cfg(feature = "opengl")]
3793    fn test_opengl_dst_crop() {
3794        if !is_opengl_available() {
3795            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3796            return;
3797        }
3798
3799        let dst_width = 640;
3800        let dst_height = 640;
3801        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3802        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3803
3804        let cpu_dst =
3805            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3806        let mut cpu_converter = CPUProcessor::new();
3807        let crop = Crop {
3808            src_rect: None,
3809            dst_rect: Some(Rect::new(100, 100, 512, 288)),
3810            dst_color: None,
3811        };
3812        let (result, src, cpu_dst) = convert_img(
3813            &mut cpu_converter,
3814            src,
3815            cpu_dst,
3816            Rotation::None,
3817            Flip::None,
3818            crop,
3819        );
3820        result.unwrap();
3821
3822        let gl_dst =
3823            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3824        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3825        let (result, _src, gl_dst) = convert_img(
3826            &mut gl_converter,
3827            src,
3828            gl_dst,
3829            Rotation::None,
3830            Flip::None,
3831            crop,
3832        );
3833        result.unwrap();
3834
3835        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
3836    }
3837
3838    #[test]
3839    #[cfg(target_os = "linux")]
3840    #[cfg(feature = "opengl")]
3841    fn test_opengl_all_rgba() {
3842        if !is_opengl_available() {
3843            eprintln!("SKIPPED: {} - OpenGL not available", function!());
3844            return;
3845        }
3846
3847        let dst_width = 640;
3848        let dst_height = 640;
3849        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3850
3851        let mut cpu_converter = CPUProcessor::new();
3852
3853        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
3854
3855        let mut mem = vec![None, Some(TensorMemory::Mem), Some(TensorMemory::Shm)];
3856        if is_dma_available() {
3857            mem.push(Some(TensorMemory::Dma));
3858        }
3859        let crop = Crop {
3860            src_rect: Some(Rect::new(50, 120, 1024, 576)),
3861            dst_rect: Some(Rect::new(100, 100, 512, 288)),
3862            dst_color: None,
3863        };
3864        for m in mem {
3865            let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), m).unwrap();
3866            let src_dyn = src;
3867
3868            for rot in [
3869                Rotation::None,
3870                Rotation::Clockwise90,
3871                Rotation::Rotate180,
3872                Rotation::CounterClockwise90,
3873            ] {
3874                for flip in [Flip::None, Flip::Horizontal, Flip::Vertical] {
3875                    let cpu_dst =
3876                        TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, m)
3877                            .unwrap();
3878                    let gl_dst =
3879                        TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, m)
3880                            .unwrap();
3881                    cpu_dst
3882                        .as_u8()
3883                        .unwrap()
3884                        .map()
3885                        .unwrap()
3886                        .as_mut_slice()
3887                        .fill(114);
3888                    gl_dst
3889                        .as_u8()
3890                        .unwrap()
3891                        .map()
3892                        .unwrap()
3893                        .as_mut_slice()
3894                        .fill(114);
3895
3896                    let mut cpu_dst_dyn = cpu_dst;
3897                    cpu_converter
3898                        .convert(&src_dyn, &mut cpu_dst_dyn, Rotation::None, Flip::None, crop)
3899                        .unwrap();
3900                    let cpu_dst = {
3901                        let mut __t = cpu_dst_dyn.into_u8().unwrap();
3902                        __t.set_format(PixelFormat::Rgba).unwrap();
3903                        TensorDyn::from(__t)
3904                    };
3905
3906                    let mut gl_dst_dyn = gl_dst;
3907                    gl_converter
3908                        .convert(&src_dyn, &mut gl_dst_dyn, Rotation::None, Flip::None, crop)
3909                        .map_err(|e| {
3910                            log::error!("error mem {m:?} rot {rot:?} error: {e:?}");
3911                            e
3912                        })
3913                        .unwrap();
3914                    let gl_dst = {
3915                        let mut __t = gl_dst_dyn.into_u8().unwrap();
3916                        __t.set_format(PixelFormat::Rgba).unwrap();
3917                        TensorDyn::from(__t)
3918                    };
3919
3920                    compare_images(
3921                        &gl_dst,
3922                        &cpu_dst,
3923                        0.98,
3924                        &format!("{} {:?} {:?}", function!(), rot, flip),
3925                    );
3926                }
3927            }
3928        }
3929    }
3930
3931    #[test]
3932    #[cfg(target_os = "linux")]
3933    fn test_cpu_rotate() {
3934        for rot in [
3935            Rotation::Clockwise90,
3936            Rotation::Rotate180,
3937            Rotation::CounterClockwise90,
3938        ] {
3939            test_cpu_rotate_(rot);
3940        }
3941    }
3942
3943    #[cfg(target_os = "linux")]
3944    fn test_cpu_rotate_(rot: Rotation) {
3945        // This test rotates the image 4 times and checks that the image was returned to
3946        // be the same Currently doesn't check if rotations actually rotated in
3947        // right direction
3948        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
3949
3950        let unchanged_src =
3951            crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3952        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
3953
3954        let (dst_width, dst_height) = match rot {
3955            Rotation::None | Rotation::Rotate180 => (src.width().unwrap(), src.height().unwrap()),
3956            Rotation::Clockwise90 | Rotation::CounterClockwise90 => {
3957                (src.height().unwrap(), src.width().unwrap())
3958            }
3959        };
3960
3961        let cpu_dst =
3962            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
3963        let mut cpu_converter = CPUProcessor::new();
3964
3965        // After rotating 4 times, the image should be the same as the original
3966
3967        let (result, src, cpu_dst) = convert_img(
3968            &mut cpu_converter,
3969            src,
3970            cpu_dst,
3971            rot,
3972            Flip::None,
3973            Crop::no_crop(),
3974        );
3975        result.unwrap();
3976
3977        let (result, cpu_dst, src) = convert_img(
3978            &mut cpu_converter,
3979            cpu_dst,
3980            src,
3981            rot,
3982            Flip::None,
3983            Crop::no_crop(),
3984        );
3985        result.unwrap();
3986
3987        let (result, src, cpu_dst) = convert_img(
3988            &mut cpu_converter,
3989            src,
3990            cpu_dst,
3991            rot,
3992            Flip::None,
3993            Crop::no_crop(),
3994        );
3995        result.unwrap();
3996
3997        let (result, _cpu_dst, src) = convert_img(
3998            &mut cpu_converter,
3999            cpu_dst,
4000            src,
4001            rot,
4002            Flip::None,
4003            Crop::no_crop(),
4004        );
4005        result.unwrap();
4006
4007        compare_images(&src, &unchanged_src, 0.98, function!());
4008    }
4009
4010    #[test]
4011    #[cfg(target_os = "linux")]
4012    #[cfg(feature = "opengl")]
4013    fn test_opengl_rotate() {
4014        if !is_opengl_available() {
4015            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4016            return;
4017        }
4018
4019        let size = (1280, 720);
4020        let mut mem = vec![None, Some(TensorMemory::Shm), Some(TensorMemory::Mem)];
4021
4022        if is_dma_available() {
4023            mem.push(Some(TensorMemory::Dma));
4024        }
4025        for m in mem {
4026            for rot in [
4027                Rotation::Clockwise90,
4028                Rotation::Rotate180,
4029                Rotation::CounterClockwise90,
4030            ] {
4031                test_opengl_rotate_(size, rot, m);
4032            }
4033        }
4034    }
4035
4036    #[cfg(target_os = "linux")]
4037    #[cfg(feature = "opengl")]
4038    fn test_opengl_rotate_(
4039        size: (usize, usize),
4040        rot: Rotation,
4041        tensor_memory: Option<TensorMemory>,
4042    ) {
4043        let (dst_width, dst_height) = match rot {
4044            Rotation::None | Rotation::Rotate180 => size,
4045            Rotation::Clockwise90 | Rotation::CounterClockwise90 => (size.1, size.0),
4046        };
4047
4048        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
4049        let src =
4050            crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), tensor_memory).unwrap();
4051
4052        let cpu_dst =
4053            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4054        let mut cpu_converter = CPUProcessor::new();
4055
4056        let (result, mut src, cpu_dst) = convert_img(
4057            &mut cpu_converter,
4058            src,
4059            cpu_dst,
4060            rot,
4061            Flip::None,
4062            Crop::no_crop(),
4063        );
4064        result.unwrap();
4065
4066        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4067
4068        for _ in 0..5 {
4069            let gl_dst = TensorDyn::image(
4070                dst_width,
4071                dst_height,
4072                PixelFormat::Rgba,
4073                DType::U8,
4074                tensor_memory,
4075            )
4076            .unwrap();
4077            let (result, src_back, gl_dst) = convert_img(
4078                &mut gl_converter,
4079                src,
4080                gl_dst,
4081                rot,
4082                Flip::None,
4083                Crop::no_crop(),
4084            );
4085            result.unwrap();
4086            src = src_back;
4087            compare_images(&gl_dst, &cpu_dst, 0.98, function!());
4088        }
4089    }
4090
4091    #[test]
4092    #[cfg(target_os = "linux")]
4093    fn test_g2d_rotate() {
4094        if !is_g2d_available() {
4095            eprintln!("SKIPPED: test_g2d_rotate - G2D library (libg2d.so.2) not available");
4096            return;
4097        }
4098        if !is_dma_available() {
4099            eprintln!(
4100                "SKIPPED: test_g2d_rotate - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4101            );
4102            return;
4103        }
4104
4105        let size = (1280, 720);
4106        for rot in [
4107            Rotation::Clockwise90,
4108            Rotation::Rotate180,
4109            Rotation::CounterClockwise90,
4110        ] {
4111            test_g2d_rotate_(size, rot);
4112        }
4113    }
4114
4115    #[cfg(target_os = "linux")]
4116    fn test_g2d_rotate_(size: (usize, usize), rot: Rotation) {
4117        let (dst_width, dst_height) = match rot {
4118            Rotation::None | Rotation::Rotate180 => size,
4119            Rotation::Clockwise90 | Rotation::CounterClockwise90 => (size.1, size.0),
4120        };
4121
4122        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
4123        let src =
4124            crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), Some(TensorMemory::Dma))
4125                .unwrap();
4126
4127        let cpu_dst =
4128            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4129        let mut cpu_converter = CPUProcessor::new();
4130
4131        let (result, src, cpu_dst) = convert_img(
4132            &mut cpu_converter,
4133            src,
4134            cpu_dst,
4135            rot,
4136            Flip::None,
4137            Crop::no_crop(),
4138        );
4139        result.unwrap();
4140
4141        let g2d_dst = TensorDyn::image(
4142            dst_width,
4143            dst_height,
4144            PixelFormat::Rgba,
4145            DType::U8,
4146            Some(TensorMemory::Dma),
4147        )
4148        .unwrap();
4149        let mut g2d_converter = G2DProcessor::new().unwrap();
4150
4151        let (result, _src, g2d_dst) = convert_img(
4152            &mut g2d_converter,
4153            src,
4154            g2d_dst,
4155            rot,
4156            Flip::None,
4157            Crop::no_crop(),
4158        );
4159        result.unwrap();
4160
4161        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
4162    }
4163
4164    #[test]
4165    fn test_rgba_to_yuyv_resize_cpu() {
4166        let src = load_bytes_to_tensor(
4167            1280,
4168            720,
4169            PixelFormat::Rgba,
4170            None,
4171            &edgefirst_bench::testdata::read("camera720p.rgba"),
4172        )
4173        .unwrap();
4174
4175        let (dst_width, dst_height) = (640, 360);
4176
4177        let dst =
4178            TensorDyn::image(dst_width, dst_height, PixelFormat::Yuyv, DType::U8, None).unwrap();
4179
4180        let dst_through_yuyv =
4181            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4182        let dst_direct =
4183            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4184
4185        let mut cpu_converter = CPUProcessor::new();
4186
4187        let (result, src, dst) = convert_img(
4188            &mut cpu_converter,
4189            src,
4190            dst,
4191            Rotation::None,
4192            Flip::None,
4193            Crop::no_crop(),
4194        );
4195        result.unwrap();
4196
4197        let (result, _dst, dst_through_yuyv) = convert_img(
4198            &mut cpu_converter,
4199            dst,
4200            dst_through_yuyv,
4201            Rotation::None,
4202            Flip::None,
4203            Crop::no_crop(),
4204        );
4205        result.unwrap();
4206
4207        let (result, _src, dst_direct) = convert_img(
4208            &mut cpu_converter,
4209            src,
4210            dst_direct,
4211            Rotation::None,
4212            Flip::None,
4213            Crop::no_crop(),
4214        );
4215        result.unwrap();
4216
4217        compare_images(&dst_through_yuyv, &dst_direct, 0.98, function!());
4218    }
4219
4220    #[test]
4221    #[cfg(target_os = "linux")]
4222    #[cfg(feature = "opengl")]
4223    #[ignore = "opengl doesn't support rendering to PixelFormat::Yuyv texture"]
4224    fn test_rgba_to_yuyv_resize_opengl() {
4225        if !is_opengl_available() {
4226            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4227            return;
4228        }
4229
4230        if !is_dma_available() {
4231            eprintln!(
4232                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
4233                function!()
4234            );
4235            return;
4236        }
4237
4238        let src = load_bytes_to_tensor(
4239            1280,
4240            720,
4241            PixelFormat::Rgba,
4242            None,
4243            &edgefirst_bench::testdata::read("camera720p.rgba"),
4244        )
4245        .unwrap();
4246
4247        let (dst_width, dst_height) = (640, 360);
4248
4249        let dst = TensorDyn::image(
4250            dst_width,
4251            dst_height,
4252            PixelFormat::Yuyv,
4253            DType::U8,
4254            Some(TensorMemory::Dma),
4255        )
4256        .unwrap();
4257
4258        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4259
4260        let (result, src, dst) = convert_img(
4261            &mut gl_converter,
4262            src,
4263            dst,
4264            Rotation::None,
4265            Flip::None,
4266            Crop::new()
4267                .with_dst_rect(Some(Rect::new(100, 100, 100, 100)))
4268                .with_dst_color(Some([255, 255, 255, 255])),
4269        );
4270        result.unwrap();
4271
4272        std::fs::write(
4273            "rgba_to_yuyv_opengl.yuyv",
4274            dst.as_u8().unwrap().map().unwrap().as_slice(),
4275        )
4276        .unwrap();
4277        let cpu_dst = TensorDyn::image(
4278            dst_width,
4279            dst_height,
4280            PixelFormat::Yuyv,
4281            DType::U8,
4282            Some(TensorMemory::Dma),
4283        )
4284        .unwrap();
4285        let (result, _src, cpu_dst) = convert_img(
4286            &mut CPUProcessor::new(),
4287            src,
4288            cpu_dst,
4289            Rotation::None,
4290            Flip::None,
4291            Crop::no_crop(),
4292        );
4293        result.unwrap();
4294
4295        compare_images_convert_to_rgb(&dst, &cpu_dst, 0.98, function!());
4296    }
4297
4298    #[test]
4299    #[cfg(target_os = "linux")]
4300    fn test_rgba_to_yuyv_resize_g2d() {
4301        if !is_g2d_available() {
4302            eprintln!(
4303                "SKIPPED: test_rgba_to_yuyv_resize_g2d - G2D library (libg2d.so.2) not available"
4304            );
4305            return;
4306        }
4307        if !is_dma_available() {
4308            eprintln!(
4309                "SKIPPED: test_rgba_to_yuyv_resize_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4310            );
4311            return;
4312        }
4313
4314        let src = load_bytes_to_tensor(
4315            1280,
4316            720,
4317            PixelFormat::Rgba,
4318            Some(TensorMemory::Dma),
4319            &edgefirst_bench::testdata::read("camera720p.rgba"),
4320        )
4321        .unwrap();
4322
4323        let (dst_width, dst_height) = (1280, 720);
4324
4325        let cpu_dst = TensorDyn::image(
4326            dst_width,
4327            dst_height,
4328            PixelFormat::Yuyv,
4329            DType::U8,
4330            Some(TensorMemory::Dma),
4331        )
4332        .unwrap();
4333
4334        let g2d_dst = TensorDyn::image(
4335            dst_width,
4336            dst_height,
4337            PixelFormat::Yuyv,
4338            DType::U8,
4339            Some(TensorMemory::Dma),
4340        )
4341        .unwrap();
4342
4343        let mut g2d_converter = G2DProcessor::new().unwrap();
4344        let crop = Crop {
4345            src_rect: None,
4346            dst_rect: Some(Rect::new(100, 100, 2, 2)),
4347            dst_color: None,
4348        };
4349
4350        g2d_dst
4351            .as_u8()
4352            .unwrap()
4353            .map()
4354            .unwrap()
4355            .as_mut_slice()
4356            .fill(128);
4357        let (result, src, g2d_dst) = convert_img(
4358            &mut g2d_converter,
4359            src,
4360            g2d_dst,
4361            Rotation::None,
4362            Flip::None,
4363            crop,
4364        );
4365        result.unwrap();
4366
4367        let cpu_dst_img = cpu_dst;
4368        cpu_dst_img
4369            .as_u8()
4370            .unwrap()
4371            .map()
4372            .unwrap()
4373            .as_mut_slice()
4374            .fill(128);
4375        let (result, _src, cpu_dst) = convert_img(
4376            &mut CPUProcessor::new(),
4377            src,
4378            cpu_dst_img,
4379            Rotation::None,
4380            Flip::None,
4381            crop,
4382        );
4383        result.unwrap();
4384
4385        compare_images_convert_to_rgb(&cpu_dst, &g2d_dst, 0.98, function!());
4386    }
4387
4388    #[test]
4389    fn test_yuyv_to_rgba_cpu() {
4390        let file = edgefirst_bench::testdata::read("camera720p.yuyv").to_vec();
4391        let src = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
4392        src.as_u8()
4393            .unwrap()
4394            .map()
4395            .unwrap()
4396            .as_mut_slice()
4397            .copy_from_slice(&file);
4398
4399        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4400        let mut cpu_converter = CPUProcessor::new();
4401
4402        let (result, _src, dst) = convert_img(
4403            &mut cpu_converter,
4404            src,
4405            dst,
4406            Rotation::None,
4407            Flip::None,
4408            Crop::no_crop(),
4409        );
4410        result.unwrap();
4411
4412        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4413        target_image
4414            .as_u8()
4415            .unwrap()
4416            .map()
4417            .unwrap()
4418            .as_mut_slice()
4419            .copy_from_slice(&edgefirst_bench::testdata::read("camera720p.rgba"));
4420
4421        compare_images(&dst, &target_image, 0.98, function!());
4422    }
4423
4424    #[test]
4425    fn test_yuyv_to_rgb_cpu() {
4426        let file = edgefirst_bench::testdata::read("camera720p.yuyv").to_vec();
4427        let src = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
4428        src.as_u8()
4429            .unwrap()
4430            .map()
4431            .unwrap()
4432            .as_mut_slice()
4433            .copy_from_slice(&file);
4434
4435        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4436        let mut cpu_converter = CPUProcessor::new();
4437
4438        let (result, _src, dst) = convert_img(
4439            &mut cpu_converter,
4440            src,
4441            dst,
4442            Rotation::None,
4443            Flip::None,
4444            Crop::no_crop(),
4445        );
4446        result.unwrap();
4447
4448        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4449        target_image
4450            .as_u8()
4451            .unwrap()
4452            .map()
4453            .unwrap()
4454            .as_mut_slice()
4455            .as_chunks_mut::<3>()
4456            .0
4457            .iter_mut()
4458            .zip(
4459                edgefirst_bench::testdata::read("camera720p.rgba")
4460                    .as_chunks::<4>()
4461                    .0,
4462            )
4463            .for_each(|(dst, src)| *dst = [src[0], src[1], src[2]]);
4464
4465        compare_images(&dst, &target_image, 0.98, function!());
4466    }
4467
4468    #[test]
4469    #[cfg(target_os = "linux")]
4470    fn test_yuyv_to_rgba_g2d() {
4471        if !is_g2d_available() {
4472            eprintln!("SKIPPED: test_yuyv_to_rgba_g2d - G2D library (libg2d.so.2) not available");
4473            return;
4474        }
4475        if !is_dma_available() {
4476            eprintln!(
4477                "SKIPPED: test_yuyv_to_rgba_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4478            );
4479            return;
4480        }
4481
4482        let src = load_bytes_to_tensor(
4483            1280,
4484            720,
4485            PixelFormat::Yuyv,
4486            None,
4487            &edgefirst_bench::testdata::read("camera720p.yuyv"),
4488        )
4489        .unwrap();
4490
4491        let dst = TensorDyn::image(
4492            1280,
4493            720,
4494            PixelFormat::Rgba,
4495            DType::U8,
4496            Some(TensorMemory::Dma),
4497        )
4498        .unwrap();
4499        let mut g2d_converter = G2DProcessor::new().unwrap();
4500
4501        let (result, _src, dst) = convert_img(
4502            &mut g2d_converter,
4503            src,
4504            dst,
4505            Rotation::None,
4506            Flip::None,
4507            Crop::no_crop(),
4508        );
4509        result.unwrap();
4510
4511        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4512        target_image
4513            .as_u8()
4514            .unwrap()
4515            .map()
4516            .unwrap()
4517            .as_mut_slice()
4518            .copy_from_slice(&edgefirst_bench::testdata::read("camera720p.rgba"));
4519
4520        compare_images(&dst, &target_image, 0.98, function!());
4521    }
4522
4523    #[test]
4524    #[cfg(target_os = "linux")]
4525    #[cfg(feature = "opengl")]
4526    fn test_yuyv_to_rgba_opengl() {
4527        if !is_opengl_available() {
4528            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4529            return;
4530        }
4531        if !is_dma_available() {
4532            eprintln!(
4533                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
4534                function!()
4535            );
4536            return;
4537        }
4538
4539        let src = load_bytes_to_tensor(
4540            1280,
4541            720,
4542            PixelFormat::Yuyv,
4543            Some(TensorMemory::Dma),
4544            &edgefirst_bench::testdata::read("camera720p.yuyv"),
4545        )
4546        .unwrap();
4547
4548        let dst = TensorDyn::image(
4549            1280,
4550            720,
4551            PixelFormat::Rgba,
4552            DType::U8,
4553            Some(TensorMemory::Dma),
4554        )
4555        .unwrap();
4556        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4557
4558        let (result, _src, dst) = convert_img(
4559            &mut gl_converter,
4560            src,
4561            dst,
4562            Rotation::None,
4563            Flip::None,
4564            Crop::no_crop(),
4565        );
4566        result.unwrap();
4567
4568        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4569        target_image
4570            .as_u8()
4571            .unwrap()
4572            .map()
4573            .unwrap()
4574            .as_mut_slice()
4575            .copy_from_slice(&edgefirst_bench::testdata::read("camera720p.rgba"));
4576
4577        compare_images(&dst, &target_image, 0.98, function!());
4578    }
4579
4580    #[test]
4581    #[cfg(target_os = "linux")]
4582    fn test_yuyv_to_rgb_g2d() {
4583        if !is_g2d_available() {
4584            eprintln!("SKIPPED: test_yuyv_to_rgb_g2d - G2D library (libg2d.so.2) not available");
4585            return;
4586        }
4587        if !is_dma_available() {
4588            eprintln!(
4589                "SKIPPED: test_yuyv_to_rgb_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4590            );
4591            return;
4592        }
4593
4594        let src = load_bytes_to_tensor(
4595            1280,
4596            720,
4597            PixelFormat::Yuyv,
4598            None,
4599            &edgefirst_bench::testdata::read("camera720p.yuyv"),
4600        )
4601        .unwrap();
4602
4603        let g2d_dst = TensorDyn::image(
4604            1280,
4605            720,
4606            PixelFormat::Rgb,
4607            DType::U8,
4608            Some(TensorMemory::Dma),
4609        )
4610        .unwrap();
4611        let mut g2d_converter = G2DProcessor::new().unwrap();
4612
4613        let (result, src, g2d_dst) = convert_img(
4614            &mut g2d_converter,
4615            src,
4616            g2d_dst,
4617            Rotation::None,
4618            Flip::None,
4619            Crop::no_crop(),
4620        );
4621        result.unwrap();
4622
4623        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4624        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
4625
4626        let (result, _src, cpu_dst) = convert_img(
4627            &mut cpu_converter,
4628            src,
4629            cpu_dst,
4630            Rotation::None,
4631            Flip::None,
4632            Crop::no_crop(),
4633        );
4634        result.unwrap();
4635
4636        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
4637    }
4638
4639    #[test]
4640    #[cfg(target_os = "linux")]
4641    fn test_yuyv_to_yuyv_resize_g2d() {
4642        if !is_g2d_available() {
4643            eprintln!(
4644                "SKIPPED: test_yuyv_to_yuyv_resize_g2d - G2D library (libg2d.so.2) not available"
4645            );
4646            return;
4647        }
4648        if !is_dma_available() {
4649            eprintln!(
4650                "SKIPPED: test_yuyv_to_yuyv_resize_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4651            );
4652            return;
4653        }
4654
4655        let src = load_bytes_to_tensor(
4656            1280,
4657            720,
4658            PixelFormat::Yuyv,
4659            None,
4660            &edgefirst_bench::testdata::read("camera720p.yuyv"),
4661        )
4662        .unwrap();
4663
4664        let g2d_dst = TensorDyn::image(
4665            600,
4666            400,
4667            PixelFormat::Yuyv,
4668            DType::U8,
4669            Some(TensorMemory::Dma),
4670        )
4671        .unwrap();
4672        let mut g2d_converter = G2DProcessor::new().unwrap();
4673
4674        let (result, src, g2d_dst) = convert_img(
4675            &mut g2d_converter,
4676            src,
4677            g2d_dst,
4678            Rotation::None,
4679            Flip::None,
4680            Crop::no_crop(),
4681        );
4682        result.unwrap();
4683
4684        let cpu_dst = TensorDyn::image(600, 400, PixelFormat::Yuyv, DType::U8, None).unwrap();
4685        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
4686
4687        let (result, _src, cpu_dst) = convert_img(
4688            &mut cpu_converter,
4689            src,
4690            cpu_dst,
4691            Rotation::None,
4692            Flip::None,
4693            Crop::no_crop(),
4694        );
4695        result.unwrap();
4696
4697        // TODO: compare PixelFormat::Yuyv and PixelFormat::Yuyv images without having to convert them to PixelFormat::Rgb
4698        compare_images_convert_to_rgb(&g2d_dst, &cpu_dst, 0.98, function!());
4699    }
4700
4701    #[test]
4702    fn test_yuyv_to_rgba_resize_cpu() {
4703        let src = load_bytes_to_tensor(
4704            1280,
4705            720,
4706            PixelFormat::Yuyv,
4707            None,
4708            &edgefirst_bench::testdata::read("camera720p.yuyv"),
4709        )
4710        .unwrap();
4711
4712        let (dst_width, dst_height) = (960, 540);
4713
4714        let dst =
4715            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4716        let mut cpu_converter = CPUProcessor::new();
4717
4718        let (result, _src, dst) = convert_img(
4719            &mut cpu_converter,
4720            src,
4721            dst,
4722            Rotation::None,
4723            Flip::None,
4724            Crop::no_crop(),
4725        );
4726        result.unwrap();
4727
4728        let dst_target =
4729            TensorDyn::image(dst_width, dst_height, PixelFormat::Rgba, DType::U8, None).unwrap();
4730        let src_target = load_bytes_to_tensor(
4731            1280,
4732            720,
4733            PixelFormat::Rgba,
4734            None,
4735            &edgefirst_bench::testdata::read("camera720p.rgba"),
4736        )
4737        .unwrap();
4738        let (result, _src_target, dst_target) = convert_img(
4739            &mut cpu_converter,
4740            src_target,
4741            dst_target,
4742            Rotation::None,
4743            Flip::None,
4744            Crop::no_crop(),
4745        );
4746        result.unwrap();
4747
4748        compare_images(&dst, &dst_target, 0.98, function!());
4749    }
4750
4751    #[test]
4752    #[cfg(target_os = "linux")]
4753    fn test_yuyv_to_rgba_crop_flip_g2d() {
4754        if !is_g2d_available() {
4755            eprintln!(
4756                "SKIPPED: test_yuyv_to_rgba_crop_flip_g2d - G2D library (libg2d.so.2) not available"
4757            );
4758            return;
4759        }
4760        if !is_dma_available() {
4761            eprintln!(
4762                "SKIPPED: test_yuyv_to_rgba_crop_flip_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
4763            );
4764            return;
4765        }
4766
4767        let src = load_bytes_to_tensor(
4768            1280,
4769            720,
4770            PixelFormat::Yuyv,
4771            Some(TensorMemory::Dma),
4772            &edgefirst_bench::testdata::read("camera720p.yuyv"),
4773        )
4774        .unwrap();
4775
4776        let (dst_width, dst_height) = (640, 640);
4777
4778        let dst_g2d = TensorDyn::image(
4779            dst_width,
4780            dst_height,
4781            PixelFormat::Rgba,
4782            DType::U8,
4783            Some(TensorMemory::Dma),
4784        )
4785        .unwrap();
4786        let mut g2d_converter = G2DProcessor::new().unwrap();
4787        let crop = Crop {
4788            src_rect: Some(Rect {
4789                left: 20,
4790                top: 15,
4791                width: 400,
4792                height: 300,
4793            }),
4794            dst_rect: None,
4795            dst_color: None,
4796        };
4797
4798        let (result, src, dst_g2d) = convert_img(
4799            &mut g2d_converter,
4800            src,
4801            dst_g2d,
4802            Rotation::None,
4803            Flip::Horizontal,
4804            crop,
4805        );
4806        result.unwrap();
4807
4808        let dst_cpu = TensorDyn::image(
4809            dst_width,
4810            dst_height,
4811            PixelFormat::Rgba,
4812            DType::U8,
4813            Some(TensorMemory::Dma),
4814        )
4815        .unwrap();
4816        let mut cpu_converter = CPUProcessor::new();
4817
4818        let (result, _src, dst_cpu) = convert_img(
4819            &mut cpu_converter,
4820            src,
4821            dst_cpu,
4822            Rotation::None,
4823            Flip::Horizontal,
4824            crop,
4825        );
4826        result.unwrap();
4827        compare_images(&dst_g2d, &dst_cpu, 0.98, function!());
4828    }
4829
4830    #[test]
4831    #[cfg(target_os = "linux")]
4832    #[cfg(feature = "opengl")]
4833    fn test_yuyv_to_rgba_crop_flip_opengl() {
4834        if !is_opengl_available() {
4835            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4836            return;
4837        }
4838
4839        if !is_dma_available() {
4840            eprintln!(
4841                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
4842                function!()
4843            );
4844            return;
4845        }
4846
4847        let src = load_bytes_to_tensor(
4848            1280,
4849            720,
4850            PixelFormat::Yuyv,
4851            Some(TensorMemory::Dma),
4852            &edgefirst_bench::testdata::read("camera720p.yuyv"),
4853        )
4854        .unwrap();
4855
4856        let (dst_width, dst_height) = (640, 640);
4857
4858        let dst_gl = TensorDyn::image(
4859            dst_width,
4860            dst_height,
4861            PixelFormat::Rgba,
4862            DType::U8,
4863            Some(TensorMemory::Dma),
4864        )
4865        .unwrap();
4866        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
4867        let crop = Crop {
4868            src_rect: Some(Rect {
4869                left: 20,
4870                top: 15,
4871                width: 400,
4872                height: 300,
4873            }),
4874            dst_rect: None,
4875            dst_color: None,
4876        };
4877
4878        let (result, src, dst_gl) = convert_img(
4879            &mut gl_converter,
4880            src,
4881            dst_gl,
4882            Rotation::None,
4883            Flip::Horizontal,
4884            crop,
4885        );
4886        result.unwrap();
4887
4888        let dst_cpu = TensorDyn::image(
4889            dst_width,
4890            dst_height,
4891            PixelFormat::Rgba,
4892            DType::U8,
4893            Some(TensorMemory::Dma),
4894        )
4895        .unwrap();
4896        let mut cpu_converter = CPUProcessor::new();
4897
4898        let (result, _src, dst_cpu) = convert_img(
4899            &mut cpu_converter,
4900            src,
4901            dst_cpu,
4902            Rotation::None,
4903            Flip::Horizontal,
4904            crop,
4905        );
4906        result.unwrap();
4907        compare_images(&dst_gl, &dst_cpu, 0.98, function!());
4908    }
4909
4910    #[test]
4911    fn test_vyuy_to_rgba_cpu() {
4912        let file = edgefirst_bench::testdata::read("camera720p.vyuy").to_vec();
4913        let src = TensorDyn::image(1280, 720, PixelFormat::Vyuy, DType::U8, None).unwrap();
4914        src.as_u8()
4915            .unwrap()
4916            .map()
4917            .unwrap()
4918            .as_mut_slice()
4919            .copy_from_slice(&file);
4920
4921        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4922        let mut cpu_converter = CPUProcessor::new();
4923
4924        let (result, _src, dst) = convert_img(
4925            &mut cpu_converter,
4926            src,
4927            dst,
4928            Rotation::None,
4929            Flip::None,
4930            Crop::no_crop(),
4931        );
4932        result.unwrap();
4933
4934        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
4935        target_image
4936            .as_u8()
4937            .unwrap()
4938            .map()
4939            .unwrap()
4940            .as_mut_slice()
4941            .copy_from_slice(&edgefirst_bench::testdata::read("camera720p.rgba"));
4942
4943        compare_images(&dst, &target_image, 0.98, function!());
4944    }
4945
4946    #[test]
4947    fn test_vyuy_to_rgb_cpu() {
4948        let file = edgefirst_bench::testdata::read("camera720p.vyuy").to_vec();
4949        let src = TensorDyn::image(1280, 720, PixelFormat::Vyuy, DType::U8, None).unwrap();
4950        src.as_u8()
4951            .unwrap()
4952            .map()
4953            .unwrap()
4954            .as_mut_slice()
4955            .copy_from_slice(&file);
4956
4957        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4958        let mut cpu_converter = CPUProcessor::new();
4959
4960        let (result, _src, dst) = convert_img(
4961            &mut cpu_converter,
4962            src,
4963            dst,
4964            Rotation::None,
4965            Flip::None,
4966            Crop::no_crop(),
4967        );
4968        result.unwrap();
4969
4970        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
4971        target_image
4972            .as_u8()
4973            .unwrap()
4974            .map()
4975            .unwrap()
4976            .as_mut_slice()
4977            .as_chunks_mut::<3>()
4978            .0
4979            .iter_mut()
4980            .zip(
4981                edgefirst_bench::testdata::read("camera720p.rgba")
4982                    .as_chunks::<4>()
4983                    .0,
4984            )
4985            .for_each(|(dst, src)| *dst = [src[0], src[1], src[2]]);
4986
4987        compare_images(&dst, &target_image, 0.98, function!());
4988    }
4989
4990    #[test]
4991    #[cfg(target_os = "linux")]
4992    #[ignore = "G2D does not support VYUY; re-enable when hardware support is added"]
4993    fn test_vyuy_to_rgba_g2d() {
4994        if !is_g2d_available() {
4995            eprintln!("SKIPPED: test_vyuy_to_rgba_g2d - G2D library (libg2d.so.2) not available");
4996            return;
4997        }
4998        if !is_dma_available() {
4999            eprintln!(
5000                "SKIPPED: test_vyuy_to_rgba_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
5001            );
5002            return;
5003        }
5004
5005        let src = load_bytes_to_tensor(
5006            1280,
5007            720,
5008            PixelFormat::Vyuy,
5009            None,
5010            &edgefirst_bench::testdata::read("camera720p.vyuy"),
5011        )
5012        .unwrap();
5013
5014        let dst = TensorDyn::image(
5015            1280,
5016            720,
5017            PixelFormat::Rgba,
5018            DType::U8,
5019            Some(TensorMemory::Dma),
5020        )
5021        .unwrap();
5022        let mut g2d_converter = G2DProcessor::new().unwrap();
5023
5024        let (result, _src, dst) = convert_img(
5025            &mut g2d_converter,
5026            src,
5027            dst,
5028            Rotation::None,
5029            Flip::None,
5030            Crop::no_crop(),
5031        );
5032        match result {
5033            Err(Error::G2D(_)) => {
5034                eprintln!("SKIPPED: test_vyuy_to_rgba_g2d - G2D does not support PixelFormat::Vyuy format");
5035                return;
5036            }
5037            r => r.unwrap(),
5038        }
5039
5040        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5041        target_image
5042            .as_u8()
5043            .unwrap()
5044            .map()
5045            .unwrap()
5046            .as_mut_slice()
5047            .copy_from_slice(&edgefirst_bench::testdata::read("camera720p.rgba"));
5048
5049        compare_images(&dst, &target_image, 0.98, function!());
5050    }
5051
5052    #[test]
5053    #[cfg(target_os = "linux")]
5054    #[ignore = "G2D does not support VYUY; re-enable when hardware support is added"]
5055    fn test_vyuy_to_rgb_g2d() {
5056        if !is_g2d_available() {
5057            eprintln!("SKIPPED: test_vyuy_to_rgb_g2d - G2D library (libg2d.so.2) not available");
5058            return;
5059        }
5060        if !is_dma_available() {
5061            eprintln!(
5062                "SKIPPED: test_vyuy_to_rgb_g2d - DMA memory allocation not available (permission denied or no DMA-BUF support)"
5063            );
5064            return;
5065        }
5066
5067        let src = load_bytes_to_tensor(
5068            1280,
5069            720,
5070            PixelFormat::Vyuy,
5071            None,
5072            &edgefirst_bench::testdata::read("camera720p.vyuy"),
5073        )
5074        .unwrap();
5075
5076        let g2d_dst = TensorDyn::image(
5077            1280,
5078            720,
5079            PixelFormat::Rgb,
5080            DType::U8,
5081            Some(TensorMemory::Dma),
5082        )
5083        .unwrap();
5084        let mut g2d_converter = G2DProcessor::new().unwrap();
5085
5086        let (result, src, g2d_dst) = convert_img(
5087            &mut g2d_converter,
5088            src,
5089            g2d_dst,
5090            Rotation::None,
5091            Flip::None,
5092            Crop::no_crop(),
5093        );
5094        match result {
5095            Err(Error::G2D(_)) => {
5096                eprintln!(
5097                    "SKIPPED: test_vyuy_to_rgb_g2d - G2D does not support PixelFormat::Vyuy format"
5098                );
5099                return;
5100            }
5101            r => r.unwrap(),
5102        }
5103
5104        let cpu_dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5105        let mut cpu_converter: CPUProcessor = CPUProcessor::new();
5106
5107        let (result, _src, cpu_dst) = convert_img(
5108            &mut cpu_converter,
5109            src,
5110            cpu_dst,
5111            Rotation::None,
5112            Flip::None,
5113            Crop::no_crop(),
5114        );
5115        result.unwrap();
5116
5117        compare_images(&g2d_dst, &cpu_dst, 0.98, function!());
5118    }
5119
5120    #[test]
5121    #[cfg(target_os = "linux")]
5122    #[cfg(feature = "opengl")]
5123    fn test_vyuy_to_rgba_opengl() {
5124        if !is_opengl_available() {
5125            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5126            return;
5127        }
5128        if !is_dma_available() {
5129            eprintln!(
5130                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
5131                function!()
5132            );
5133            return;
5134        }
5135
5136        let src = load_bytes_to_tensor(
5137            1280,
5138            720,
5139            PixelFormat::Vyuy,
5140            Some(TensorMemory::Dma),
5141            &edgefirst_bench::testdata::read("camera720p.vyuy"),
5142        )
5143        .unwrap();
5144
5145        let dst = TensorDyn::image(
5146            1280,
5147            720,
5148            PixelFormat::Rgba,
5149            DType::U8,
5150            Some(TensorMemory::Dma),
5151        )
5152        .unwrap();
5153        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
5154
5155        let (result, _src, dst) = convert_img(
5156            &mut gl_converter,
5157            src,
5158            dst,
5159            Rotation::None,
5160            Flip::None,
5161            Crop::no_crop(),
5162        );
5163        match result {
5164            Err(Error::NotSupported(_)) => {
5165                eprintln!(
5166                    "SKIPPED: {} - OpenGL does not support PixelFormat::Vyuy DMA format",
5167                    function!()
5168                );
5169                return;
5170            }
5171            r => r.unwrap(),
5172        }
5173
5174        let target_image = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5175        target_image
5176            .as_u8()
5177            .unwrap()
5178            .map()
5179            .unwrap()
5180            .as_mut_slice()
5181            .copy_from_slice(&edgefirst_bench::testdata::read("camera720p.rgba"));
5182
5183        compare_images(&dst, &target_image, 0.98, function!());
5184    }
5185
5186    #[test]
5187    fn test_nv12_to_rgba_cpu() {
5188        let file = edgefirst_bench::testdata::read("zidane.nv12").to_vec();
5189        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5190        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5191            .copy_from_slice(&file);
5192
5193        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgba, DType::U8, None).unwrap();
5194        let mut cpu_converter = CPUProcessor::new();
5195
5196        let (result, _src, dst) = convert_img(
5197            &mut cpu_converter,
5198            src,
5199            dst,
5200            Rotation::None,
5201            Flip::None,
5202            Crop::no_crop(),
5203        );
5204        result.unwrap();
5205
5206        let target_image = crate::load_image_test_helper(
5207            &edgefirst_bench::testdata::read("zidane.jpg"),
5208            Some(PixelFormat::Rgba),
5209            None,
5210        )
5211        .unwrap();
5212
5213        compare_images(&dst, &target_image, 0.98, function!());
5214    }
5215
5216    #[test]
5217    fn test_nv12_to_rgb_cpu() {
5218        let file = edgefirst_bench::testdata::read("zidane.nv12").to_vec();
5219        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5220        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5221            .copy_from_slice(&file);
5222
5223        let dst = TensorDyn::image(1280, 720, PixelFormat::Rgb, DType::U8, None).unwrap();
5224        let mut cpu_converter = CPUProcessor::new();
5225
5226        let (result, _src, dst) = convert_img(
5227            &mut cpu_converter,
5228            src,
5229            dst,
5230            Rotation::None,
5231            Flip::None,
5232            Crop::no_crop(),
5233        );
5234        result.unwrap();
5235
5236        let target_image = crate::load_image_test_helper(
5237            &edgefirst_bench::testdata::read("zidane.jpg"),
5238            Some(PixelFormat::Rgb),
5239            None,
5240        )
5241        .unwrap();
5242
5243        compare_images(&dst, &target_image, 0.98, function!());
5244    }
5245
5246    #[test]
5247    fn test_nv12_to_grey_cpu() {
5248        let file = edgefirst_bench::testdata::read("zidane.nv12").to_vec();
5249        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5250        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5251            .copy_from_slice(&file);
5252
5253        let dst = TensorDyn::image(1280, 720, PixelFormat::Grey, DType::U8, None).unwrap();
5254        let mut cpu_converter = CPUProcessor::new();
5255
5256        let (result, _src, dst) = convert_img(
5257            &mut cpu_converter,
5258            src,
5259            dst,
5260            Rotation::None,
5261            Flip::None,
5262            Crop::no_crop(),
5263        );
5264        result.unwrap();
5265
5266        let target_image = crate::load_image_test_helper(
5267            &edgefirst_bench::testdata::read("zidane.jpg"),
5268            Some(PixelFormat::Grey),
5269            None,
5270        )
5271        .unwrap();
5272
5273        compare_images(&dst, &target_image, 0.98, function!());
5274    }
5275
5276    #[test]
5277    fn test_nv12_to_yuyv_cpu() {
5278        let file = edgefirst_bench::testdata::read("zidane.nv12").to_vec();
5279        let src = TensorDyn::image(1280, 720, PixelFormat::Nv12, DType::U8, None).unwrap();
5280        src.as_u8().unwrap().map().unwrap().as_mut_slice()[0..(1280 * 720 * 3 / 2)]
5281            .copy_from_slice(&file);
5282
5283        let dst = TensorDyn::image(1280, 720, PixelFormat::Yuyv, DType::U8, None).unwrap();
5284        let mut cpu_converter = CPUProcessor::new();
5285
5286        let (result, _src, dst) = convert_img(
5287            &mut cpu_converter,
5288            src,
5289            dst,
5290            Rotation::None,
5291            Flip::None,
5292            Crop::no_crop(),
5293        );
5294        result.unwrap();
5295
5296        let target_image = crate::load_image_test_helper(
5297            &edgefirst_bench::testdata::read("zidane.jpg"),
5298            Some(PixelFormat::Rgb),
5299            None,
5300        )
5301        .unwrap();
5302
5303        compare_images_convert_to_rgb(&dst, &target_image, 0.98, function!());
5304    }
5305
5306    #[test]
5307    fn test_cpu_resize_planar_rgb() {
5308        let src = TensorDyn::image(4, 4, PixelFormat::Rgba, DType::U8, None).unwrap();
5309        #[rustfmt::skip]
5310        let src_image = [
5311                    255, 0, 0, 255,     0, 255, 0, 255,     0, 0, 255, 255,     255, 255, 0, 255,
5312                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5313                    0, 0, 255, 0,       0, 255, 255, 255,   255, 255, 0, 0,     0, 0, 0, 255,
5314                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5315        ];
5316        src.as_u8()
5317            .unwrap()
5318            .map()
5319            .unwrap()
5320            .as_mut_slice()
5321            .copy_from_slice(&src_image);
5322
5323        let cpu_dst = TensorDyn::image(5, 5, PixelFormat::PlanarRgb, DType::U8, None).unwrap();
5324        let mut cpu_converter = CPUProcessor::new();
5325
5326        let (result, _src, cpu_dst) = convert_img(
5327            &mut cpu_converter,
5328            src,
5329            cpu_dst,
5330            Rotation::None,
5331            Flip::None,
5332            Crop::new()
5333                .with_dst_rect(Some(Rect {
5334                    left: 1,
5335                    top: 1,
5336                    width: 4,
5337                    height: 4,
5338                }))
5339                .with_dst_color(Some([114, 114, 114, 255])),
5340        );
5341        result.unwrap();
5342
5343        #[rustfmt::skip]
5344        let expected_dst = [
5345            114, 114, 114, 114, 114,    114, 255, 0, 0, 255,    114, 255, 0, 255, 255,      114, 0, 0, 255, 0,        114, 255, 0, 255, 255,
5346            114, 114, 114, 114, 114,    114, 0, 255, 0, 255,    114, 0, 0, 0, 0,            114, 0, 255, 255, 0,      114, 0, 0, 0, 0,
5347            114, 114, 114, 114, 114,    114, 0, 0, 255, 0,      114, 0, 0, 255, 255,        114, 255, 255, 0, 0,      114, 0, 0, 255, 255,
5348        ];
5349
5350        assert_eq!(
5351            cpu_dst.as_u8().unwrap().map().unwrap().as_slice(),
5352            &expected_dst
5353        );
5354    }
5355
5356    #[test]
5357    fn test_cpu_resize_planar_rgba() {
5358        let src = TensorDyn::image(4, 4, PixelFormat::Rgba, DType::U8, None).unwrap();
5359        #[rustfmt::skip]
5360        let src_image = [
5361                    255, 0, 0, 255,     0, 255, 0, 255,     0, 0, 255, 255,     255, 255, 0, 255,
5362                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5363                    0, 0, 255, 0,       0, 255, 255, 255,   255, 255, 0, 0,     0, 0, 0, 255,
5364                    255, 0, 0, 0,       0, 0, 0, 255,       255,  0, 255, 0,    255, 0, 255, 255,
5365        ];
5366        src.as_u8()
5367            .unwrap()
5368            .map()
5369            .unwrap()
5370            .as_mut_slice()
5371            .copy_from_slice(&src_image);
5372
5373        let cpu_dst = TensorDyn::image(5, 5, PixelFormat::PlanarRgba, DType::U8, None).unwrap();
5374        let mut cpu_converter = CPUProcessor::new();
5375
5376        let (result, _src, cpu_dst) = convert_img(
5377            &mut cpu_converter,
5378            src,
5379            cpu_dst,
5380            Rotation::None,
5381            Flip::None,
5382            Crop::new()
5383                .with_dst_rect(Some(Rect {
5384                    left: 1,
5385                    top: 1,
5386                    width: 4,
5387                    height: 4,
5388                }))
5389                .with_dst_color(Some([114, 114, 114, 255])),
5390        );
5391        result.unwrap();
5392
5393        #[rustfmt::skip]
5394        let expected_dst = [
5395            114, 114, 114, 114, 114,    114, 255, 0, 0, 255,        114, 255, 0, 255, 255,      114, 0, 0, 255, 0,        114, 255, 0, 255, 255,
5396            114, 114, 114, 114, 114,    114, 0, 255, 0, 255,        114, 0, 0, 0, 0,            114, 0, 255, 255, 0,      114, 0, 0, 0, 0,
5397            114, 114, 114, 114, 114,    114, 0, 0, 255, 0,          114, 0, 0, 255, 255,        114, 255, 255, 0, 0,      114, 0, 0, 255, 255,
5398            255, 255, 255, 255, 255,    255, 255, 255, 255, 255,    255, 0, 255, 0, 255,        255, 0, 255, 0, 255,      255, 0, 255, 0, 255,
5399        ];
5400
5401        assert_eq!(
5402            cpu_dst.as_u8().unwrap().map().unwrap().as_slice(),
5403            &expected_dst
5404        );
5405    }
5406
5407    #[test]
5408    #[cfg(target_os = "linux")]
5409    #[cfg(feature = "opengl")]
5410    fn test_opengl_resize_planar_rgb() {
5411        if !is_opengl_available() {
5412            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5413            return;
5414        }
5415
5416        if !is_dma_available() {
5417            eprintln!(
5418                "SKIPPED: {} - DMA memory allocation not available (permission denied or no DMA-BUF support)",
5419                function!()
5420            );
5421            return;
5422        }
5423
5424        let dst_width = 640;
5425        let dst_height = 640;
5426        let file = edgefirst_bench::testdata::read("test_image.jpg").to_vec();
5427        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
5428
5429        let cpu_dst = TensorDyn::image(
5430            dst_width,
5431            dst_height,
5432            PixelFormat::PlanarRgb,
5433            DType::U8,
5434            None,
5435        )
5436        .unwrap();
5437        let mut cpu_converter = CPUProcessor::new();
5438        let (result, src, cpu_dst) = convert_img(
5439            &mut cpu_converter,
5440            src,
5441            cpu_dst,
5442            Rotation::None,
5443            Flip::None,
5444            Crop::no_crop(),
5445        );
5446        result.unwrap();
5447        let crop_letterbox = Crop::new()
5448            .with_dst_rect(Some(Rect {
5449                left: 102,
5450                top: 102,
5451                width: 440,
5452                height: 440,
5453            }))
5454            .with_dst_color(Some([114, 114, 114, 114]));
5455        let (result, src, cpu_dst) = convert_img(
5456            &mut cpu_converter,
5457            src,
5458            cpu_dst,
5459            Rotation::None,
5460            Flip::None,
5461            crop_letterbox,
5462        );
5463        result.unwrap();
5464
5465        let gl_dst = TensorDyn::image(
5466            dst_width,
5467            dst_height,
5468            PixelFormat::PlanarRgb,
5469            DType::U8,
5470            None,
5471        )
5472        .unwrap();
5473        let mut gl_converter = GLProcessorThreaded::new(None).unwrap();
5474
5475        let (result, _src, gl_dst) = convert_img(
5476            &mut gl_converter,
5477            src,
5478            gl_dst,
5479            Rotation::None,
5480            Flip::None,
5481            crop_letterbox,
5482        );
5483        result.unwrap();
5484        compare_images(&gl_dst, &cpu_dst, 0.98, function!());
5485    }
5486
5487    #[test]
5488    fn test_cpu_resize_nv16() {
5489        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
5490        let src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
5491
5492        let cpu_nv16_dst = TensorDyn::image(640, 640, PixelFormat::Nv16, DType::U8, None).unwrap();
5493        let cpu_rgb_dst = TensorDyn::image(640, 640, PixelFormat::Rgb, DType::U8, None).unwrap();
5494        let mut cpu_converter = CPUProcessor::new();
5495        let crop = Crop::new()
5496            .with_dst_rect(Some(Rect {
5497                left: 20,
5498                top: 140,
5499                width: 600,
5500                height: 360,
5501            }))
5502            .with_dst_color(Some([255, 128, 0, 255]));
5503
5504        let (result, src, cpu_nv16_dst) = convert_img(
5505            &mut cpu_converter,
5506            src,
5507            cpu_nv16_dst,
5508            Rotation::None,
5509            Flip::None,
5510            crop,
5511        );
5512        result.unwrap();
5513
5514        let (result, _src, cpu_rgb_dst) = convert_img(
5515            &mut cpu_converter,
5516            src,
5517            cpu_rgb_dst,
5518            Rotation::None,
5519            Flip::None,
5520            crop,
5521        );
5522        result.unwrap();
5523        compare_images_convert_to_rgb(&cpu_nv16_dst, &cpu_rgb_dst, 0.99, function!());
5524    }
5525
5526    fn load_bytes_to_tensor(
5527        width: usize,
5528        height: usize,
5529        format: PixelFormat,
5530        memory: Option<TensorMemory>,
5531        bytes: &[u8],
5532    ) -> Result<TensorDyn, Error> {
5533        let src = TensorDyn::image(width, height, format, DType::U8, memory)?;
5534        src.as_u8()
5535            .unwrap()
5536            .map()?
5537            .as_mut_slice()
5538            .copy_from_slice(bytes);
5539        Ok(src)
5540    }
5541
5542    fn compare_images(img1: &TensorDyn, img2: &TensorDyn, threshold: f64, name: &str) {
5543        assert_eq!(img1.height(), img2.height(), "Heights differ");
5544        assert_eq!(img1.width(), img2.width(), "Widths differ");
5545        assert_eq!(
5546            img1.format().unwrap(),
5547            img2.format().unwrap(),
5548            "PixelFormat differ"
5549        );
5550        assert!(
5551            matches!(
5552                img1.format().unwrap(),
5553                PixelFormat::Rgb | PixelFormat::Rgba | PixelFormat::Grey | PixelFormat::PlanarRgb
5554            ),
5555            "format must be Rgb or Rgba for comparison"
5556        );
5557
5558        let image1 = match img1.format().unwrap() {
5559            PixelFormat::Rgb => image::RgbImage::from_vec(
5560                img1.width().unwrap() as u32,
5561                img1.height().unwrap() as u32,
5562                img1.as_u8().unwrap().map().unwrap().to_vec(),
5563            )
5564            .unwrap(),
5565            PixelFormat::Rgba => image::RgbaImage::from_vec(
5566                img1.width().unwrap() as u32,
5567                img1.height().unwrap() as u32,
5568                img1.as_u8().unwrap().map().unwrap().to_vec(),
5569            )
5570            .unwrap()
5571            .convert(),
5572            PixelFormat::Grey => image::GrayImage::from_vec(
5573                img1.width().unwrap() as u32,
5574                img1.height().unwrap() as u32,
5575                img1.as_u8().unwrap().map().unwrap().to_vec(),
5576            )
5577            .unwrap()
5578            .convert(),
5579            PixelFormat::PlanarRgb => image::GrayImage::from_vec(
5580                img1.width().unwrap() as u32,
5581                (img1.height().unwrap() * 3) as u32,
5582                img1.as_u8().unwrap().map().unwrap().to_vec(),
5583            )
5584            .unwrap()
5585            .convert(),
5586            _ => return,
5587        };
5588
5589        let image2 = match img2.format().unwrap() {
5590            PixelFormat::Rgb => image::RgbImage::from_vec(
5591                img2.width().unwrap() as u32,
5592                img2.height().unwrap() as u32,
5593                img2.as_u8().unwrap().map().unwrap().to_vec(),
5594            )
5595            .unwrap(),
5596            PixelFormat::Rgba => image::RgbaImage::from_vec(
5597                img2.width().unwrap() as u32,
5598                img2.height().unwrap() as u32,
5599                img2.as_u8().unwrap().map().unwrap().to_vec(),
5600            )
5601            .unwrap()
5602            .convert(),
5603            PixelFormat::Grey => image::GrayImage::from_vec(
5604                img2.width().unwrap() as u32,
5605                img2.height().unwrap() as u32,
5606                img2.as_u8().unwrap().map().unwrap().to_vec(),
5607            )
5608            .unwrap()
5609            .convert(),
5610            PixelFormat::PlanarRgb => image::GrayImage::from_vec(
5611                img2.width().unwrap() as u32,
5612                (img2.height().unwrap() * 3) as u32,
5613                img2.as_u8().unwrap().map().unwrap().to_vec(),
5614            )
5615            .unwrap()
5616            .convert(),
5617            _ => return,
5618        };
5619
5620        let similarity = image_compare::rgb_similarity_structure(
5621            &image_compare::Algorithm::RootMeanSquared,
5622            &image1,
5623            &image2,
5624        )
5625        .expect("Image Comparison failed");
5626        if similarity.score < threshold {
5627            // image1.save(format!("{name}_1.png"));
5628            // image2.save(format!("{name}_2.png"));
5629            similarity
5630                .image
5631                .to_color_map()
5632                .save(format!("{name}.png"))
5633                .unwrap();
5634            panic!(
5635                "{name}: converted image and target image have similarity score too low: {} < {}",
5636                similarity.score, threshold
5637            )
5638        }
5639    }
5640
5641    fn compare_images_convert_to_rgb(
5642        img1: &TensorDyn,
5643        img2: &TensorDyn,
5644        threshold: f64,
5645        name: &str,
5646    ) {
5647        assert_eq!(img1.height(), img2.height(), "Heights differ");
5648        assert_eq!(img1.width(), img2.width(), "Widths differ");
5649
5650        let mut img_rgb1 = TensorDyn::image(
5651            img1.width().unwrap(),
5652            img1.height().unwrap(),
5653            PixelFormat::Rgb,
5654            DType::U8,
5655            Some(TensorMemory::Mem),
5656        )
5657        .unwrap();
5658        let mut img_rgb2 = TensorDyn::image(
5659            img1.width().unwrap(),
5660            img1.height().unwrap(),
5661            PixelFormat::Rgb,
5662            DType::U8,
5663            Some(TensorMemory::Mem),
5664        )
5665        .unwrap();
5666        let mut __cv = CPUProcessor::default();
5667        let r1 = __cv.convert(
5668            img1,
5669            &mut img_rgb1,
5670            crate::Rotation::None,
5671            crate::Flip::None,
5672            crate::Crop::default(),
5673        );
5674        let r2 = __cv.convert(
5675            img2,
5676            &mut img_rgb2,
5677            crate::Rotation::None,
5678            crate::Flip::None,
5679            crate::Crop::default(),
5680        );
5681        if r1.is_err() || r2.is_err() {
5682            // Fallback: compare raw bytes as greyscale strip
5683            let w = img1.width().unwrap() as u32;
5684            let data1 = img1.as_u8().unwrap().map().unwrap().to_vec();
5685            let data2 = img2.as_u8().unwrap().map().unwrap().to_vec();
5686            let h1 = (data1.len() as u32) / w;
5687            let h2 = (data2.len() as u32) / w;
5688            let g1 = image::GrayImage::from_vec(w, h1, data1).unwrap();
5689            let g2 = image::GrayImage::from_vec(w, h2, data2).unwrap();
5690            let similarity = image_compare::gray_similarity_structure(
5691                &image_compare::Algorithm::RootMeanSquared,
5692                &g1,
5693                &g2,
5694            )
5695            .expect("Image Comparison failed");
5696            if similarity.score < threshold {
5697                panic!(
5698                    "{name}: converted image and target image have similarity score too low: {} < {}",
5699                    similarity.score, threshold
5700                )
5701            }
5702            return;
5703        }
5704
5705        let image1 = image::RgbImage::from_vec(
5706            img_rgb1.width().unwrap() as u32,
5707            img_rgb1.height().unwrap() as u32,
5708            img_rgb1.as_u8().unwrap().map().unwrap().to_vec(),
5709        )
5710        .unwrap();
5711
5712        let image2 = image::RgbImage::from_vec(
5713            img_rgb2.width().unwrap() as u32,
5714            img_rgb2.height().unwrap() as u32,
5715            img_rgb2.as_u8().unwrap().map().unwrap().to_vec(),
5716        )
5717        .unwrap();
5718
5719        let similarity = image_compare::rgb_similarity_structure(
5720            &image_compare::Algorithm::RootMeanSquared,
5721            &image1,
5722            &image2,
5723        )
5724        .expect("Image Comparison failed");
5725        if similarity.score < threshold {
5726            // image1.save(format!("{name}_1.png"));
5727            // image2.save(format!("{name}_2.png"));
5728            similarity
5729                .image
5730                .to_color_map()
5731                .save(format!("{name}.png"))
5732                .unwrap();
5733            panic!(
5734                "{name}: converted image and target image have similarity score too low: {} < {}",
5735                similarity.score, threshold
5736            )
5737        }
5738    }
5739
5740    // =========================================================================
5741    // PixelFormat::Nv12 Format Tests
5742    // =========================================================================
5743
5744    #[test]
5745    fn test_nv12_image_creation() {
5746        let width = 640;
5747        let height = 480;
5748        let img = TensorDyn::image(width, height, PixelFormat::Nv12, DType::U8, None).unwrap();
5749
5750        assert_eq!(img.width(), Some(width));
5751        assert_eq!(img.height(), Some(height));
5752        assert_eq!(img.format().unwrap(), PixelFormat::Nv12);
5753        // PixelFormat::Nv12 uses shape [H*3/2, W] to store Y plane + UV plane
5754        assert_eq!(img.as_u8().unwrap().shape(), &[height * 3 / 2, width]);
5755    }
5756
5757    #[test]
5758    fn test_nv12_channels() {
5759        let img = TensorDyn::image(640, 480, PixelFormat::Nv12, DType::U8, None).unwrap();
5760        // PixelFormat::Nv12.channels() returns 1 (luma plane)
5761        assert_eq!(img.format().unwrap().channels(), 1);
5762    }
5763
5764    // =========================================================================
5765    // Tensor Format Metadata Tests
5766    // =========================================================================
5767
5768    #[test]
5769    fn test_tensor_set_format_planar() {
5770        let mut tensor = Tensor::<u8>::new(&[3, 480, 640], None, None).unwrap();
5771        tensor.set_format(PixelFormat::PlanarRgb).unwrap();
5772        assert_eq!(tensor.format(), Some(PixelFormat::PlanarRgb));
5773        assert_eq!(tensor.width(), Some(640));
5774        assert_eq!(tensor.height(), Some(480));
5775    }
5776
5777    #[test]
5778    fn test_tensor_set_format_interleaved() {
5779        let mut tensor = Tensor::<u8>::new(&[480, 640, 4], None, None).unwrap();
5780        tensor.set_format(PixelFormat::Rgba).unwrap();
5781        assert_eq!(tensor.format(), Some(PixelFormat::Rgba));
5782        assert_eq!(tensor.width(), Some(640));
5783        assert_eq!(tensor.height(), Some(480));
5784    }
5785
5786    #[test]
5787    fn test_tensordyn_image_rgb() {
5788        let img = TensorDyn::image(640, 480, PixelFormat::Rgb, DType::U8, None).unwrap();
5789        assert_eq!(img.width(), Some(640));
5790        assert_eq!(img.height(), Some(480));
5791        assert_eq!(img.format(), Some(PixelFormat::Rgb));
5792    }
5793
5794    #[test]
5795    fn test_tensordyn_image_planar_rgb() {
5796        let img = TensorDyn::image(640, 480, PixelFormat::PlanarRgb, DType::U8, None).unwrap();
5797        assert_eq!(img.width(), Some(640));
5798        assert_eq!(img.height(), Some(480));
5799        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
5800    }
5801
5802    #[test]
5803    fn test_rgb_int8_format() {
5804        // Int8 variant: same PixelFormat::Rgb but with DType::I8
5805        let img = TensorDyn::image(
5806            1280,
5807            720,
5808            PixelFormat::Rgb,
5809            DType::I8,
5810            Some(TensorMemory::Mem),
5811        )
5812        .unwrap();
5813        assert_eq!(img.width(), Some(1280));
5814        assert_eq!(img.height(), Some(720));
5815        assert_eq!(img.format(), Some(PixelFormat::Rgb));
5816        assert_eq!(img.dtype(), DType::I8);
5817    }
5818
5819    #[test]
5820    fn test_planar_rgb_int8_format() {
5821        let img = TensorDyn::image(
5822            1280,
5823            720,
5824            PixelFormat::PlanarRgb,
5825            DType::I8,
5826            Some(TensorMemory::Mem),
5827        )
5828        .unwrap();
5829        assert_eq!(img.width(), Some(1280));
5830        assert_eq!(img.height(), Some(720));
5831        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
5832        assert_eq!(img.dtype(), DType::I8);
5833    }
5834
5835    #[test]
5836    fn test_rgb_from_tensor() {
5837        let mut tensor = Tensor::<u8>::new(&[720, 1280, 3], None, None).unwrap();
5838        tensor.set_format(PixelFormat::Rgb).unwrap();
5839        let img = TensorDyn::from(tensor);
5840        assert_eq!(img.width(), Some(1280));
5841        assert_eq!(img.height(), Some(720));
5842        assert_eq!(img.format(), Some(PixelFormat::Rgb));
5843    }
5844
5845    #[test]
5846    fn test_planar_rgb_from_tensor() {
5847        let mut tensor = Tensor::<u8>::new(&[3, 720, 1280], None, None).unwrap();
5848        tensor.set_format(PixelFormat::PlanarRgb).unwrap();
5849        let img = TensorDyn::from(tensor);
5850        assert_eq!(img.width(), Some(1280));
5851        assert_eq!(img.height(), Some(720));
5852        assert_eq!(img.format(), Some(PixelFormat::PlanarRgb));
5853    }
5854
5855    #[test]
5856    fn test_dtype_determines_int8() {
5857        // DType::I8 indicates int8 data
5858        let u8_img = TensorDyn::image(64, 64, PixelFormat::Rgb, DType::U8, None).unwrap();
5859        let i8_img = TensorDyn::image(64, 64, PixelFormat::Rgb, DType::I8, None).unwrap();
5860        assert_eq!(u8_img.dtype(), DType::U8);
5861        assert_eq!(i8_img.dtype(), DType::I8);
5862    }
5863
5864    #[test]
5865    fn test_pixel_layout_packed_vs_planar() {
5866        // Packed vs planar layout classification
5867        assert_eq!(PixelFormat::Rgb.layout(), PixelLayout::Packed);
5868        assert_eq!(PixelFormat::Rgba.layout(), PixelLayout::Packed);
5869        assert_eq!(PixelFormat::PlanarRgb.layout(), PixelLayout::Planar);
5870        assert_eq!(PixelFormat::Nv12.layout(), PixelLayout::SemiPlanar);
5871    }
5872
5873    /// Integration test that exercises the PBO-to-PBO convert path.
5874    /// Uses ImageProcessor::create_image() to allocate PBO-backed tensors,
5875    /// then converts between them. Skipped when GL is unavailable or the
5876    /// backend is not PBO (e.g. DMA-buf systems).
5877    #[cfg(target_os = "linux")]
5878    #[cfg(feature = "opengl")]
5879    #[test]
5880    fn test_convert_pbo_to_pbo() {
5881        let mut converter = ImageProcessor::new().unwrap();
5882
5883        // Skip if GL is not available or backend is not PBO
5884        let is_pbo = converter
5885            .opengl
5886            .as_ref()
5887            .is_some_and(|gl| gl.transfer_backend() == opengl_headless::TransferBackend::Pbo);
5888        if !is_pbo {
5889            eprintln!("Skipping test_convert_pbo_to_pbo: backend is not PBO");
5890            return;
5891        }
5892
5893        let src_w = 640;
5894        let src_h = 480;
5895        let dst_w = 320;
5896        let dst_h = 240;
5897
5898        // Create PBO-backed source image
5899        let pbo_src = converter
5900            .create_image(src_w, src_h, PixelFormat::Rgba, DType::U8, None)
5901            .unwrap();
5902        assert_eq!(
5903            pbo_src.as_u8().unwrap().memory(),
5904            TensorMemory::Pbo,
5905            "create_image should produce a PBO tensor"
5906        );
5907
5908        // Fill source PBO with test pattern: load JPEG then convert Mem→PBO
5909        let file = edgefirst_bench::testdata::read("zidane.jpg").to_vec();
5910        let jpeg_src = crate::load_image_test_helper(&file, Some(PixelFormat::Rgba), None).unwrap();
5911
5912        // Resize JPEG into a Mem temp of the right size, then copy into PBO
5913        let mem_src = TensorDyn::image(
5914            src_w,
5915            src_h,
5916            PixelFormat::Rgba,
5917            DType::U8,
5918            Some(TensorMemory::Mem),
5919        )
5920        .unwrap();
5921        let (result, _jpeg_src, mem_src) = convert_img(
5922            &mut CPUProcessor::new(),
5923            jpeg_src,
5924            mem_src,
5925            Rotation::None,
5926            Flip::None,
5927            Crop::no_crop(),
5928        );
5929        result.unwrap();
5930
5931        // Copy pixel data into the PBO source by mapping it
5932        {
5933            let src_data = mem_src.as_u8().unwrap().map().unwrap();
5934            let mut pbo_map = pbo_src.as_u8().unwrap().map().unwrap();
5935            pbo_map.copy_from_slice(&src_data);
5936        }
5937
5938        // Create PBO-backed destination image
5939        let pbo_dst = converter
5940            .create_image(dst_w, dst_h, PixelFormat::Rgba, DType::U8, None)
5941            .unwrap();
5942        assert_eq!(pbo_dst.as_u8().unwrap().memory(), TensorMemory::Pbo);
5943
5944        // Convert PBO→PBO (this exercises convert_pbo_to_pbo)
5945        let mut pbo_dst = pbo_dst;
5946        let result = converter.convert(
5947            &pbo_src,
5948            &mut pbo_dst,
5949            Rotation::None,
5950            Flip::None,
5951            Crop::no_crop(),
5952        );
5953        result.unwrap();
5954
5955        // Verify: compare with CPU-only conversion of the same input
5956        let cpu_dst = TensorDyn::image(
5957            dst_w,
5958            dst_h,
5959            PixelFormat::Rgba,
5960            DType::U8,
5961            Some(TensorMemory::Mem),
5962        )
5963        .unwrap();
5964        let (result, _mem_src, cpu_dst) = convert_img(
5965            &mut CPUProcessor::new(),
5966            mem_src,
5967            cpu_dst,
5968            Rotation::None,
5969            Flip::None,
5970            Crop::no_crop(),
5971        );
5972        result.unwrap();
5973
5974        let pbo_dst_img = {
5975            let mut __t = pbo_dst.into_u8().unwrap();
5976            __t.set_format(PixelFormat::Rgba).unwrap();
5977            TensorDyn::from(__t)
5978        };
5979        compare_images(&pbo_dst_img, &cpu_dst, 0.95, function!());
5980        log::info!("test_convert_pbo_to_pbo: PASS — PBO-to-PBO convert matches CPU reference");
5981    }
5982
5983    #[test]
5984    fn test_image_bgra() {
5985        let img = TensorDyn::image(
5986            640,
5987            480,
5988            PixelFormat::Bgra,
5989            DType::U8,
5990            Some(edgefirst_tensor::TensorMemory::Mem),
5991        )
5992        .unwrap();
5993        assert_eq!(img.width(), Some(640));
5994        assert_eq!(img.height(), Some(480));
5995        assert_eq!(img.format().unwrap().channels(), 4);
5996        assert_eq!(img.format().unwrap(), PixelFormat::Bgra);
5997    }
5998
5999    // ========================================================================
6000    // Tests for EDGEFIRST_FORCE_BACKEND env var
6001    // ========================================================================
6002
6003    #[test]
6004    fn test_force_backend_cpu() {
6005        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6006        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
6007        let result = ImageProcessor::new();
6008        match original {
6009            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6010            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6011        }
6012        let converter = result.unwrap();
6013        assert!(converter.cpu.is_some());
6014        assert_eq!(converter.forced_backend, Some(ForcedBackend::Cpu));
6015    }
6016
6017    #[test]
6018    fn test_force_backend_invalid() {
6019        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6020        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "invalid") };
6021        let result = ImageProcessor::new();
6022        match original {
6023            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6024            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6025        }
6026        assert!(
6027            matches!(&result, Err(Error::ForcedBackendUnavailable(s)) if s.contains("unknown")),
6028            "invalid backend value should return ForcedBackendUnavailable error: {result:?}"
6029        );
6030    }
6031
6032    #[test]
6033    fn test_force_backend_unset() {
6034        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6035        unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") };
6036        let result = ImageProcessor::new();
6037        match original {
6038            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6039            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6040        }
6041        let converter = result.unwrap();
6042        assert!(converter.forced_backend.is_none());
6043    }
6044
6045    // ========================================================================
6046    // Tests for hybrid mask path error handling
6047    // ========================================================================
6048
6049    #[test]
6050    fn test_draw_proto_masks_no_cpu_returns_error() {
6051        // Disable CPU backend to trigger the error path
6052        let original_cpu = std::env::var("EDGEFIRST_DISABLE_CPU").ok();
6053        unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", "1") };
6054        let original_gl = std::env::var("EDGEFIRST_DISABLE_GL").ok();
6055        unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", "1") };
6056        let original_g2d = std::env::var("EDGEFIRST_DISABLE_G2D").ok();
6057        unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", "1") };
6058
6059        let result = ImageProcessor::new();
6060
6061        match original_cpu {
6062            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_CPU", s) },
6063            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_CPU") },
6064        }
6065        match original_gl {
6066            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_GL", s) },
6067            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_GL") },
6068        }
6069        match original_g2d {
6070            Some(s) => unsafe { std::env::set_var("EDGEFIRST_DISABLE_G2D", s) },
6071            None => unsafe { std::env::remove_var("EDGEFIRST_DISABLE_G2D") },
6072        }
6073
6074        let mut converter = result.unwrap();
6075        assert!(converter.cpu.is_none(), "CPU should be disabled");
6076
6077        let dst = TensorDyn::image(
6078            640,
6079            480,
6080            PixelFormat::Rgba,
6081            DType::U8,
6082            Some(TensorMemory::Mem),
6083        )
6084        .unwrap();
6085        let mut dst_dyn = dst;
6086        let det = [DetectBox {
6087            bbox: edgefirst_decoder::BoundingBox {
6088                xmin: 0.1,
6089                ymin: 0.1,
6090                xmax: 0.5,
6091                ymax: 0.5,
6092            },
6093            score: 0.9,
6094            label: 0,
6095        }];
6096        let proto_data = {
6097            use edgefirst_tensor::{Tensor, TensorDyn};
6098            let coeff_t = Tensor::<f32>::from_slice(&[0.5_f32; 4], &[1, 4]).unwrap();
6099            let protos_t =
6100                Tensor::<f32>::from_slice(&vec![0.0_f32; 8 * 8 * 4], &[8, 8, 4]).unwrap();
6101            ProtoData {
6102                mask_coefficients: TensorDyn::F32(coeff_t),
6103                protos: TensorDyn::F32(protos_t),
6104                layout: ProtoLayout::Nhwc,
6105            }
6106        };
6107        let result =
6108            converter.draw_proto_masks(&mut dst_dyn, &det, &proto_data, Default::default());
6109        assert!(
6110            matches!(&result, Err(Error::Internal(s)) if s.contains("CPU backend")),
6111            "draw_proto_masks without CPU should return Internal error: {result:?}"
6112        );
6113    }
6114
6115    #[test]
6116    fn test_draw_proto_masks_cpu_fallback_works() {
6117        // Force CPU-only backend to ensure the CPU fallback path executes
6118        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6119        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
6120        let result = ImageProcessor::new();
6121        match original {
6122            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6123            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6124        }
6125
6126        let mut converter = result.unwrap();
6127        assert!(converter.cpu.is_some());
6128
6129        let dst = TensorDyn::image(
6130            64,
6131            64,
6132            PixelFormat::Rgba,
6133            DType::U8,
6134            Some(TensorMemory::Mem),
6135        )
6136        .unwrap();
6137        let mut dst_dyn = dst;
6138        let det = [DetectBox {
6139            bbox: edgefirst_decoder::BoundingBox {
6140                xmin: 0.1,
6141                ymin: 0.1,
6142                xmax: 0.5,
6143                ymax: 0.5,
6144            },
6145            score: 0.9,
6146            label: 0,
6147        }];
6148        let proto_data = {
6149            use edgefirst_tensor::{Tensor, TensorDyn};
6150            let coeff_t = Tensor::<f32>::from_slice(&[0.5_f32; 4], &[1, 4]).unwrap();
6151            let protos_t =
6152                Tensor::<f32>::from_slice(&vec![0.0_f32; 8 * 8 * 4], &[8, 8, 4]).unwrap();
6153            ProtoData {
6154                mask_coefficients: TensorDyn::F32(coeff_t),
6155                protos: TensorDyn::F32(protos_t),
6156                layout: ProtoLayout::Nhwc,
6157            }
6158        };
6159        let result =
6160            converter.draw_proto_masks(&mut dst_dyn, &det, &proto_data, Default::default());
6161        assert!(result.is_ok(), "CPU fallback path should work: {result:?}");
6162    }
6163
6164    // ============================================================
6165    // draw_decoded_masks / draw_proto_masks — 4-scenario pixel-
6166    // verified tests. Exercises each backend against the full
6167    // output-contract matrix:
6168    //
6169    //   | detections | background | expected dst             |
6170    //   |------------|------------|--------------------------|
6171    //   | empty      | none       | fully cleared (0x00)     |
6172    //   | empty      | set        | fully equal to bg        |
6173    //   | set        | none       | cleared outside box +    |
6174    //   |            |            | mask-coloured inside     |
6175    //   | set        | set        | bg outside box + mask    |
6176    //   |            |            | blended inside           |
6177    //
6178    // Every test pre-fills dst with a non-zero "dirty" pattern so
6179    // that any silent `return Ok(())` leaks the pattern into the
6180    // asserted output and fails loudly.
6181    // ============================================================
6182
6183    /// Run `body` with `EDGEFIRST_FORCE_BACKEND` temporarily set (or
6184    /// removed), restoring the prior value afterward. Tests are mutated
6185    /// env-serialized via the process-wide `FORCE_BACKEND_MUTEX`.
6186    fn with_force_backend<R>(value: Option<&str>, body: impl FnOnce() -> R) -> R {
6187        use std::sync::{Mutex, MutexGuard, OnceLock};
6188        static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
6189        let _guard: MutexGuard<()> = LOCK
6190            .get_or_init(|| Mutex::new(()))
6191            .lock()
6192            .unwrap_or_else(|e| e.into_inner());
6193        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6194        match value {
6195            Some(v) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", v) },
6196            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6197        }
6198        let r = body();
6199        match original {
6200            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6201            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6202        }
6203        r
6204    }
6205
6206    /// Allocate an RGBA image tensor and pre-fill every byte with a
6207    /// distinctive non-zero pattern. Any test that relies on the old
6208    /// "dst is already cleared" assumption will see this pattern leak
6209    /// through to the output and fail.
6210    fn make_dirty_dst(w: usize, h: usize, mem: Option<TensorMemory>) -> TensorDyn {
6211        let dst = TensorDyn::image(w, h, PixelFormat::Rgba, DType::U8, mem).unwrap();
6212        {
6213            use edgefirst_tensor::TensorMapTrait;
6214            let u8t = dst.as_u8().unwrap();
6215            let mut map = u8t.map().unwrap();
6216            for (i, b) in map.as_mut_slice().iter_mut().enumerate() {
6217                *b = 0xA0u8.wrapping_add((i as u8) & 0x3F);
6218            }
6219        }
6220        dst
6221    }
6222
6223    /// Allocate an RGBA background filled with a constant colour.
6224    fn make_bg(w: usize, h: usize, mem: Option<TensorMemory>, rgba: [u8; 4]) -> TensorDyn {
6225        let bg = TensorDyn::image(w, h, PixelFormat::Rgba, DType::U8, mem).unwrap();
6226        {
6227            use edgefirst_tensor::TensorMapTrait;
6228            let u8t = bg.as_u8().unwrap();
6229            let mut map = u8t.map().unwrap();
6230            for chunk in map.as_mut_slice().chunks_exact_mut(4) {
6231                chunk.copy_from_slice(&rgba);
6232            }
6233        }
6234        bg
6235    }
6236
6237    fn pixel_at(dst: &TensorDyn, x: usize, y: usize) -> [u8; 4] {
6238        use edgefirst_tensor::TensorMapTrait;
6239        let w = dst.width().unwrap();
6240        let off = (y * w + x) * 4;
6241        let u8t = dst.as_u8().unwrap();
6242        let map = u8t.map().unwrap();
6243        let s = map.as_slice();
6244        [s[off], s[off + 1], s[off + 2], s[off + 3]]
6245    }
6246
6247    fn assert_every_pixel_eq(dst: &TensorDyn, expected: [u8; 4], case: &str) {
6248        use edgefirst_tensor::TensorMapTrait;
6249        let u8t = dst.as_u8().unwrap();
6250        let map = u8t.map().unwrap();
6251        for (i, chunk) in map.as_slice().chunks_exact(4).enumerate() {
6252            assert_eq!(
6253                chunk, &expected,
6254                "{case}: pixel idx {i} = {chunk:?}, expected {expected:?}"
6255            );
6256        }
6257    }
6258
6259    /// Scenario 1: empty detections, empty segmentation, no background
6260    /// → dst must be fully cleared to 0x00000000.
6261    fn scenario_empty_no_bg(processor: &mut ImageProcessor, case: &str) {
6262        let mut dst = make_dirty_dst(64, 64, None);
6263        processor
6264            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::default())
6265            .unwrap_or_else(|e| panic!("{case}/decoded_masks empty+no-bg failed: {e:?}"));
6266        assert_every_pixel_eq(&dst, [0, 0, 0, 0], &format!("{case}/decoded"));
6267
6268        let mut dst = make_dirty_dst(64, 64, None);
6269        let proto = {
6270            use edgefirst_tensor::{Tensor, TensorDyn};
6271            // Placeholder (no detections); shape [1, 4] to keep the tensor well-formed.
6272            let coeff_t = Tensor::<f32>::from_slice(&[0.0_f32; 4], &[1, 4]).unwrap();
6273            let protos_t =
6274                Tensor::<f32>::from_slice(&vec![0.0_f32; 8 * 8 * 4], &[8, 8, 4]).unwrap();
6275            ProtoData {
6276                mask_coefficients: TensorDyn::F32(coeff_t),
6277                protos: TensorDyn::F32(protos_t),
6278                layout: ProtoLayout::Nhwc,
6279            }
6280        };
6281        processor
6282            .draw_proto_masks(&mut dst, &[], &proto, MaskOverlay::default())
6283            .unwrap_or_else(|e| panic!("{case}/proto_masks empty+no-bg failed: {e:?}"));
6284        assert_every_pixel_eq(&dst, [0, 0, 0, 0], &format!("{case}/proto"));
6285    }
6286
6287    /// Scenario 2: empty detections, empty segmentation, background set
6288    /// → dst must be fully equal to bg.
6289    fn scenario_empty_with_bg(processor: &mut ImageProcessor, case: &str) {
6290        let bg_color = [42, 99, 200, 255];
6291        let bg = make_bg(64, 64, None, bg_color);
6292        let overlay = MaskOverlay::new().with_background(&bg);
6293
6294        let mut dst = make_dirty_dst(64, 64, None);
6295        processor
6296            .draw_decoded_masks(&mut dst, &[], &[], overlay)
6297            .unwrap_or_else(|e| panic!("{case}/decoded_masks empty+bg failed: {e:?}"));
6298        assert_every_pixel_eq(&dst, bg_color, &format!("{case}/decoded bg blit"));
6299
6300        let mut dst = make_dirty_dst(64, 64, None);
6301        let proto = {
6302            use edgefirst_tensor::{Tensor, TensorDyn};
6303            // Placeholder (no detections); shape [1, 4] to keep the tensor well-formed.
6304            let coeff_t = Tensor::<f32>::from_slice(&[0.0_f32; 4], &[1, 4]).unwrap();
6305            let protos_t =
6306                Tensor::<f32>::from_slice(&vec![0.0_f32; 8 * 8 * 4], &[8, 8, 4]).unwrap();
6307            ProtoData {
6308                mask_coefficients: TensorDyn::F32(coeff_t),
6309                protos: TensorDyn::F32(protos_t),
6310                layout: ProtoLayout::Nhwc,
6311            }
6312        };
6313        processor
6314            .draw_proto_masks(&mut dst, &[], &proto, overlay)
6315            .unwrap_or_else(|e| panic!("{case}/proto_masks empty+bg failed: {e:?}"));
6316        assert_every_pixel_eq(&dst, bg_color, &format!("{case}/proto bg blit"));
6317    }
6318
6319    /// Scenario 3: one detection with a fully-opaque segmentation fill,
6320    /// no background → outside the box dst must be 0x00, inside it must
6321    /// be a non-zero mask colour (the render_segmentation output).
6322    fn scenario_detect_no_bg(processor: &mut ImageProcessor, case: &str) {
6323        use edgefirst_decoder::Segmentation;
6324        use ndarray::Array3;
6325        processor
6326            .set_class_colors(&[[200, 80, 40, 255]])
6327            .expect("set_class_colors");
6328
6329        let detect = DetectBox {
6330            bbox: [0.25, 0.25, 0.75, 0.75].into(),
6331            score: 0.99,
6332            label: 0,
6333        };
6334        let seg_arr = Array3::from_shape_fn((4, 4, 1), |_| 255u8);
6335        let seg = Segmentation {
6336            segmentation: seg_arr,
6337            xmin: 0.25,
6338            ymin: 0.25,
6339            xmax: 0.75,
6340            ymax: 0.75,
6341        };
6342
6343        let mut dst = make_dirty_dst(64, 64, None);
6344        processor
6345            .draw_decoded_masks(&mut dst, &[detect], &[seg], MaskOverlay::default())
6346            .unwrap_or_else(|e| panic!("{case}/decoded_masks detect+no-bg failed: {e:?}"));
6347
6348        // Outside the bbox (corner): must be cleared black.
6349        let corner = pixel_at(&dst, 2, 2);
6350        assert_eq!(
6351            corner,
6352            [0, 0, 0, 0],
6353            "{case}/decoded: corner (2,2) leaked dirty pattern: {corner:?}"
6354        );
6355        // Inside the bbox (center): the mask colour must be visible.
6356        // Any non-zero pixel is acceptable — exact rendering varies
6357        // between backends (GL smoothstep, CPU nearest).
6358        let center = pixel_at(&dst, 32, 32);
6359        assert!(
6360            center != [0, 0, 0, 0],
6361            "{case}/decoded: center (32,32) was not coloured: {center:?}"
6362        );
6363    }
6364
6365    /// Scenario 4: detection + background. Outside the box must match
6366    /// bg; inside the box must NOT match bg (mask blended on top).
6367    fn scenario_detect_with_bg(processor: &mut ImageProcessor, case: &str) {
6368        use edgefirst_decoder::Segmentation;
6369        use ndarray::Array3;
6370        processor
6371            .set_class_colors(&[[200, 80, 40, 255]])
6372            .expect("set_class_colors");
6373        let bg_color = [10, 20, 30, 255];
6374        let bg = make_bg(64, 64, None, bg_color);
6375
6376        let detect = DetectBox {
6377            bbox: [0.25, 0.25, 0.75, 0.75].into(),
6378            score: 0.99,
6379            label: 0,
6380        };
6381        let seg_arr = Array3::from_shape_fn((4, 4, 1), |_| 255u8);
6382        let seg = Segmentation {
6383            segmentation: seg_arr,
6384            xmin: 0.25,
6385            ymin: 0.25,
6386            xmax: 0.75,
6387            ymax: 0.75,
6388        };
6389
6390        let overlay = MaskOverlay::new().with_background(&bg);
6391        let mut dst = make_dirty_dst(64, 64, None);
6392        processor
6393            .draw_decoded_masks(&mut dst, &[detect], &[seg], overlay)
6394            .unwrap_or_else(|e| panic!("{case}/decoded_masks detect+bg failed: {e:?}"));
6395
6396        // Outside the bbox (corner): bg colour.
6397        let corner = pixel_at(&dst, 2, 2);
6398        assert_eq!(
6399            corner, bg_color,
6400            "{case}/decoded: corner (2,2) should show bg {bg_color:?} got {corner:?}"
6401        );
6402        // Inside the bbox (center): mask blended on bg, must differ from
6403        // pure bg (alpha-blend with mask colour produces a distinct shade).
6404        let center = pixel_at(&dst, 32, 32);
6405        assert!(
6406            center != bg_color,
6407            "{case}/decoded: center (32,32) should differ from bg {bg_color:?}, got {center:?}"
6408        );
6409    }
6410
6411    /// Run all 4 scenarios against the processor. Skip gracefully if
6412    /// construction fails (backend unavailable on this host).
6413    fn run_all_scenarios(
6414        force_backend: Option<&'static str>,
6415        case: &'static str,
6416        require_dma_for_bg: bool,
6417    ) {
6418        if require_dma_for_bg && !edgefirst_tensor::is_dma_available() {
6419            eprintln!("SKIPPED: {case} — DMA not available on this host");
6420            return;
6421        }
6422        let processor_result = with_force_backend(force_backend, ImageProcessor::new);
6423        let mut processor = match processor_result {
6424            Ok(p) => p,
6425            Err(e) => {
6426                eprintln!("SKIPPED: {case} — backend init failed: {e:?}");
6427                return;
6428            }
6429        };
6430        scenario_empty_no_bg(&mut processor, case);
6431        scenario_empty_with_bg(&mut processor, case);
6432        scenario_detect_no_bg(&mut processor, case);
6433        scenario_detect_with_bg(&mut processor, case);
6434    }
6435
6436    #[test]
6437    fn test_draw_masks_4_scenarios_cpu() {
6438        run_all_scenarios(Some("cpu"), "cpu", false);
6439    }
6440
6441    #[test]
6442    fn test_draw_masks_4_scenarios_auto() {
6443        run_all_scenarios(None, "auto", false);
6444    }
6445
6446    #[cfg(target_os = "linux")]
6447    #[cfg(feature = "opengl")]
6448    #[test]
6449    fn test_draw_masks_4_scenarios_opengl() {
6450        run_all_scenarios(Some("opengl"), "opengl", false);
6451    }
6452
6453    /// G2D forced backend: exercises the zero-detection empty-frame
6454    /// paths via `g2d_clear` and `g2d_blit`. Scenarios 3 and 4 (with
6455    /// detections) expect `NotImplemented` since G2D has no rasterizer
6456    /// for boxes / masks.
6457    #[cfg(target_os = "linux")]
6458    #[test]
6459    fn test_draw_masks_zero_detection_g2d_forced() {
6460        if !edgefirst_tensor::is_dma_available() {
6461            eprintln!("SKIPPED: g2d forced — DMA not available on this host");
6462            return;
6463        }
6464        let processor_result = with_force_backend(Some("g2d"), ImageProcessor::new);
6465        let mut processor = match processor_result {
6466            Ok(p) => p,
6467            Err(e) => {
6468                eprintln!("SKIPPED: g2d forced — init failed: {e:?}");
6469                return;
6470            }
6471        };
6472
6473        // Case 1: empty + no bg. G2D requires DMA-backed dst.
6474        let mut dst = TensorDyn::image(
6475            64,
6476            64,
6477            PixelFormat::Rgba,
6478            DType::U8,
6479            Some(TensorMemory::Dma),
6480        )
6481        .unwrap();
6482        {
6483            use edgefirst_tensor::TensorMapTrait;
6484            let u8t = dst.as_u8_mut().unwrap();
6485            let mut map = u8t.map().unwrap();
6486            map.as_mut_slice().fill(0xBB);
6487        }
6488        processor
6489            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::default())
6490            .expect("g2d empty+no-bg");
6491        assert_every_pixel_eq(&dst, [0, 0, 0, 0], "g2d/case1 cleared");
6492
6493        // Case 2: empty + bg. Both surfaces DMA-backed for g2d_blit.
6494        let bg_color = [7, 11, 13, 255];
6495        let bg = {
6496            let t = TensorDyn::image(
6497                64,
6498                64,
6499                PixelFormat::Rgba,
6500                DType::U8,
6501                Some(TensorMemory::Dma),
6502            )
6503            .unwrap();
6504            {
6505                use edgefirst_tensor::TensorMapTrait;
6506                let u8t = t.as_u8().unwrap();
6507                let mut map = u8t.map().unwrap();
6508                for chunk in map.as_mut_slice().chunks_exact_mut(4) {
6509                    chunk.copy_from_slice(&bg_color);
6510                }
6511            }
6512            t
6513        };
6514        let mut dst = TensorDyn::image(
6515            64,
6516            64,
6517            PixelFormat::Rgba,
6518            DType::U8,
6519            Some(TensorMemory::Dma),
6520        )
6521        .unwrap();
6522        {
6523            use edgefirst_tensor::TensorMapTrait;
6524            let u8t = dst.as_u8_mut().unwrap();
6525            let mut map = u8t.map().unwrap();
6526            map.as_mut_slice().fill(0x55);
6527        }
6528        processor
6529            .draw_decoded_masks(&mut dst, &[], &[], MaskOverlay::new().with_background(&bg))
6530            .expect("g2d empty+bg");
6531        assert_every_pixel_eq(&dst, bg_color, "g2d/case2 bg blit");
6532
6533        // Case 3 and 4: detect present — must return NotImplemented.
6534        let detect = DetectBox {
6535            bbox: [0.25, 0.25, 0.75, 0.75].into(),
6536            score: 0.9,
6537            label: 0,
6538        };
6539        let mut dst = TensorDyn::image(
6540            64,
6541            64,
6542            PixelFormat::Rgba,
6543            DType::U8,
6544            Some(TensorMemory::Dma),
6545        )
6546        .unwrap();
6547        let err = processor
6548            .draw_decoded_masks(&mut dst, &[detect], &[], MaskOverlay::default())
6549            .expect_err("g2d must reject detect-present draw_decoded_masks");
6550        assert!(
6551            matches!(err, Error::NotImplemented(_)),
6552            "g2d case3 wrong error: {err:?}"
6553        );
6554    }
6555
6556    #[test]
6557    fn test_set_format_then_cpu_convert() {
6558        // Force CPU backend (save/restore to avoid leaking into other tests)
6559        let original = std::env::var("EDGEFIRST_FORCE_BACKEND").ok();
6560        unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", "cpu") };
6561        let mut processor = ImageProcessor::new().unwrap();
6562        match original {
6563            Some(s) => unsafe { std::env::set_var("EDGEFIRST_FORCE_BACKEND", s) },
6564            None => unsafe { std::env::remove_var("EDGEFIRST_FORCE_BACKEND") },
6565        }
6566
6567        // Load a source image
6568        let image = edgefirst_bench::testdata::read("zidane.jpg");
6569        let src = load_image_test_helper(&image, Some(PixelFormat::Rgba), None).unwrap();
6570
6571        // Create a raw tensor, then attach format — simulating the from_fd workflow
6572        let mut dst =
6573            TensorDyn::new(&[640, 640, 3], DType::U8, Some(TensorMemory::Mem), None).unwrap();
6574        dst.set_format(PixelFormat::Rgb).unwrap();
6575
6576        // Convert should work with the set_format-annotated tensor
6577        processor
6578            .convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
6579            .unwrap();
6580
6581        // Verify format survived conversion
6582        assert_eq!(dst.format(), Some(PixelFormat::Rgb));
6583        assert_eq!(dst.width(), Some(640));
6584        assert_eq!(dst.height(), Some(640));
6585    }
6586
6587    /// Verify that creating multiple ImageProcessors on the same thread and
6588    /// performing a resize on each does not deadlock or error.
6589    ///
6590    /// Uses automatic memory allocation (DMA → PBO → Mem fallback) so that
6591    /// hardware backends (OpenGL, G2D) are exercised on capable targets.
6592    #[test]
6593    fn test_multiple_image_processors_same_thread() {
6594        let mut processors: Vec<ImageProcessor> = (0..4)
6595            .map(|_| ImageProcessor::new().expect("ImageProcessor::new() failed"))
6596            .collect();
6597
6598        for proc in &mut processors {
6599            let src = proc
6600                .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
6601                .expect("create src failed");
6602            let mut dst = proc
6603                .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
6604                .expect("create dst failed");
6605            proc.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
6606                .expect("convert failed");
6607            assert_eq!(dst.width(), Some(64));
6608            assert_eq!(dst.height(), Some(64));
6609        }
6610    }
6611
6612    /// Verify that creating ImageProcessors on separate threads and performing
6613    /// a resize on each does not deadlock or error.
6614    ///
6615    /// Uses automatic memory allocation (DMA → PBO → Mem fallback) so that
6616    /// hardware backends (OpenGL, G2D) are exercised on capable targets.
6617    /// A 60-second timeout prevents CI from hanging on deadlock regressions.
6618    #[test]
6619    fn test_multiple_image_processors_separate_threads() {
6620        use std::sync::mpsc;
6621        use std::time::Duration;
6622
6623        const TIMEOUT: Duration = Duration::from_secs(60);
6624
6625        let (tx, rx) = mpsc::channel::<()>();
6626
6627        std::thread::spawn(move || {
6628            let handles: Vec<_> = (0..4)
6629                .map(|i| {
6630                    std::thread::spawn(move || {
6631                        let mut proc = ImageProcessor::new().unwrap_or_else(|e| {
6632                            panic!("ImageProcessor::new() failed on thread {i}: {e}")
6633                        });
6634                        let src = proc
6635                            .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
6636                            .unwrap_or_else(|e| panic!("create src failed on thread {i}: {e}"));
6637                        let mut dst = proc
6638                            .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
6639                            .unwrap_or_else(|e| panic!("create dst failed on thread {i}: {e}"));
6640                        proc.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::default())
6641                            .unwrap_or_else(|e| panic!("convert failed on thread {i}: {e}"));
6642                        assert_eq!(dst.width(), Some(64));
6643                        assert_eq!(dst.height(), Some(64));
6644                    })
6645                })
6646                .collect();
6647
6648            for (i, h) in handles.into_iter().enumerate() {
6649                h.join()
6650                    .unwrap_or_else(|e| panic!("thread {i} panicked: {e:?}"));
6651            }
6652
6653            let _ = tx.send(());
6654        });
6655
6656        rx.recv_timeout(TIMEOUT).unwrap_or_else(|_| {
6657            panic!("test_multiple_image_processors_separate_threads timed out after {TIMEOUT:?}")
6658        });
6659    }
6660
6661    /// Verify that 4 fully-initialized ImageProcessors on separate threads can
6662    /// all operate concurrently without deadlocking each other.
6663    ///
6664    /// All processors are created first, then a barrier synchronizes them so
6665    /// they all start converting at the same instant — maximizing contention.
6666    /// A 60-second timeout prevents CI from hanging on deadlock regressions.
6667    #[test]
6668    fn test_image_processors_concurrent_operations() {
6669        use std::sync::{mpsc, Arc, Barrier};
6670        use std::time::Duration;
6671
6672        const N: usize = 4;
6673        const ROUNDS: usize = 10;
6674        const TIMEOUT: Duration = Duration::from_secs(60);
6675
6676        let (tx, rx) = mpsc::channel::<()>();
6677
6678        std::thread::spawn(move || {
6679            let barrier = Arc::new(Barrier::new(N));
6680
6681            let handles: Vec<_> = (0..N)
6682                .map(|i| {
6683                    let barrier = Arc::clone(&barrier);
6684                    std::thread::spawn(move || {
6685                        let mut proc = ImageProcessor::new().unwrap_or_else(|e| {
6686                            panic!("ImageProcessor::new() failed on thread {i}: {e}")
6687                        });
6688
6689                        // All threads wait here until every processor is initialized.
6690                        barrier.wait();
6691
6692                        // Now all 4 hammer the GPU concurrently.
6693                        for round in 0..ROUNDS {
6694                            let src = proc
6695                                .create_image(128, 128, PixelFormat::Rgb, DType::U8, None)
6696                                .unwrap_or_else(|e| {
6697                                    panic!("create src failed on thread {i} round {round}: {e}")
6698                                });
6699                            let mut dst = proc
6700                                .create_image(64, 64, PixelFormat::Rgb, DType::U8, None)
6701                                .unwrap_or_else(|e| {
6702                                    panic!("create dst failed on thread {i} round {round}: {e}")
6703                                });
6704                            proc.convert(
6705                                &src,
6706                                &mut dst,
6707                                Rotation::None,
6708                                Flip::None,
6709                                Crop::default(),
6710                            )
6711                            .unwrap_or_else(|e| {
6712                                panic!("convert failed on thread {i} round {round}: {e}")
6713                            });
6714                            assert_eq!(dst.width(), Some(64));
6715                            assert_eq!(dst.height(), Some(64));
6716                        }
6717                    })
6718                })
6719                .collect();
6720
6721            for (i, h) in handles.into_iter().enumerate() {
6722                h.join()
6723                    .unwrap_or_else(|e| panic!("thread {i} panicked: {e:?}"));
6724            }
6725
6726            let _ = tx.send(());
6727        });
6728
6729        rx.recv_timeout(TIMEOUT).unwrap_or_else(|_| {
6730            panic!("test_image_processors_concurrent_operations timed out after {TIMEOUT:?}")
6731        });
6732    }
6733}