Skip to main content

edgefirst_image/
cpu.rs

1// SPDX-FileCopyrightText: Copyright 2025 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::{
5    fourcc_is_int8, fourcc_uint8_equivalent, Crop, Error, Flip, FunctionTimer, ImageProcessorTrait,
6    Rect, Result, Rotation, TensorImage, TensorImageDst, TensorImageRef, BGRA, GREY, NV12, NV16,
7    PLANAR_RGB, PLANAR_RGBA, RGB, RGBA, VYUY, YUYV,
8};
9use edgefirst_decoder::{DetectBox, ProtoData, Segmentation};
10use edgefirst_tensor::{TensorMapTrait, TensorTrait};
11use four_char_code::FourCharCode;
12use ndarray::{ArrayView3, ArrayViewMut3, Axis};
13use rayon::iter::{
14    IndexedParallelIterator, IntoParallelRefIterator, IntoParallelRefMutIterator, ParallelIterator,
15};
16use std::ops::Shr;
17
18/// CPUConverter implements the ImageProcessor trait using the fallback CPU
19/// implementation for image processing.
20#[derive(Debug, Clone)]
21pub struct CPUProcessor {
22    resizer: fast_image_resize::Resizer,
23    options: fast_image_resize::ResizeOptions,
24    colors: [[u8; 4]; 20],
25}
26
27unsafe impl Send for CPUProcessor {}
28unsafe impl Sync for CPUProcessor {}
29
30#[inline(always)]
31fn limit_to_full(l: u8) -> u8 {
32    (((l as u16 - 16) * 255 + (240 - 16) / 2) / (240 - 16)) as u8
33}
34
35#[inline(always)]
36fn full_to_limit(l: u8) -> u8 {
37    ((l as u16 * (240 - 16) + 255 / 2) / 255 + 16) as u8
38}
39
40impl Default for CPUProcessor {
41    fn default() -> Self {
42        Self::new_bilinear()
43    }
44}
45
46impl CPUProcessor {
47    /// Creates a new CPUConverter with bilinear resizing.
48    pub fn new() -> Self {
49        Self::new_bilinear()
50    }
51
52    /// Creates a new CPUConverter with bilinear resizing.
53    fn new_bilinear() -> Self {
54        let resizer = fast_image_resize::Resizer::new();
55        let options = fast_image_resize::ResizeOptions::new()
56            .resize_alg(fast_image_resize::ResizeAlg::Convolution(
57                fast_image_resize::FilterType::Bilinear,
58            ))
59            .use_alpha(false);
60
61        log::debug!("CPUConverter created");
62        Self {
63            resizer,
64            options,
65            colors: crate::DEFAULT_COLORS_U8,
66        }
67    }
68
69    /// Creates a new CPUConverter with nearest neighbor resizing.
70    pub fn new_nearest() -> Self {
71        let resizer = fast_image_resize::Resizer::new();
72        let options = fast_image_resize::ResizeOptions::new()
73            .resize_alg(fast_image_resize::ResizeAlg::Nearest)
74            .use_alpha(false);
75        log::debug!("CPUConverter created");
76        Self {
77            resizer,
78            options,
79            colors: crate::DEFAULT_COLORS_U8,
80        }
81    }
82
83    pub(crate) fn flip_rotate_ndarray(
84        src_map: &[u8],
85        dst_map: &mut [u8],
86        dst: &TensorImage,
87        rotation: Rotation,
88        flip: Flip,
89    ) -> Result<(), crate::Error> {
90        let mut dst_view =
91            ArrayViewMut3::from_shape((dst.height(), dst.width(), dst.channels()), dst_map)?;
92        let mut src_view = match rotation {
93            Rotation::None | Rotation::Rotate180 => {
94                ArrayView3::from_shape((dst.height(), dst.width(), dst.channels()), src_map)?
95            }
96            Rotation::Clockwise90 | Rotation::CounterClockwise90 => {
97                ArrayView3::from_shape((dst.width(), dst.height(), dst.channels()), src_map)?
98            }
99        };
100
101        match flip {
102            Flip::None => {}
103            Flip::Vertical => {
104                src_view.invert_axis(Axis(0));
105            }
106            Flip::Horizontal => {
107                src_view.invert_axis(Axis(1));
108            }
109        }
110
111        match rotation {
112            Rotation::None => {}
113            Rotation::Clockwise90 => {
114                src_view.swap_axes(0, 1);
115                src_view.invert_axis(Axis(1));
116            }
117            Rotation::Rotate180 => {
118                src_view.invert_axis(Axis(0));
119                src_view.invert_axis(Axis(1));
120            }
121            Rotation::CounterClockwise90 => {
122                src_view.swap_axes(0, 1);
123                src_view.invert_axis(Axis(0));
124            }
125        }
126
127        dst_view.assign(&src_view);
128
129        Ok(())
130    }
131
132    fn convert_nv12_to_rgb(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
133        assert_eq!(src.fourcc(), NV12);
134        assert_eq!(dst.fourcc(), RGB);
135        let map = src.tensor.map()?;
136        let y_stride = src.width() as u32;
137        let uv_stride = src.width() as u32;
138        let slices = map.as_slice().split_at(y_stride as usize * src.height());
139
140        let src = yuv::YuvBiPlanarImage {
141            y_plane: slices.0,
142            y_stride,
143            uv_plane: slices.1,
144            uv_stride,
145            width: src.width() as u32,
146            height: src.height() as u32,
147        };
148
149        Ok(yuv::yuv_nv12_to_rgb(
150            &src,
151            dst.tensor.map()?.as_mut_slice(),
152            dst.row_stride() as u32,
153            yuv::YuvRange::Limited,
154            yuv::YuvStandardMatrix::Bt709,
155            yuv::YuvConversionMode::Balanced,
156        )?)
157    }
158
159    // NOTE: The `*_to_rgba` helpers below all accept BGRA destinations
160    // (`assert!(matches!(dst.fourcc(), RGBA | BGRA))`). They always write
161    // pixels in RGBA channel order; for BGRA destinations the caller applies
162    // an R↔B swizzle afterwards via `swizzle_rb_4chan`.
163    fn convert_nv12_to_rgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
164        assert_eq!(src.fourcc(), NV12);
165        assert!(matches!(dst.fourcc(), RGBA | BGRA));
166        let map = src.tensor.map()?;
167        let y_stride = src.width() as u32;
168        let uv_stride = src.width() as u32;
169        let slices = map.as_slice().split_at(y_stride as usize * src.height());
170
171        let src = yuv::YuvBiPlanarImage {
172            y_plane: slices.0,
173            y_stride,
174            uv_plane: slices.1,
175            uv_stride,
176            width: src.width() as u32,
177            height: src.height() as u32,
178        };
179
180        Ok(yuv::yuv_nv12_to_rgba(
181            &src,
182            dst.tensor.map()?.as_mut_slice(),
183            dst.row_stride() as u32,
184            yuv::YuvRange::Limited,
185            yuv::YuvStandardMatrix::Bt709,
186            yuv::YuvConversionMode::Balanced,
187        )?)
188    }
189
190    fn convert_nv12_to_grey(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
191        assert_eq!(src.fourcc(), NV12);
192        assert_eq!(dst.fourcc(), GREY);
193        let src_map = src.tensor.map()?;
194        let mut dst_map = dst.tensor.map()?;
195        let y_stride = src.width() as u32;
196        let y_slice = src_map
197            .as_slice()
198            .split_at(y_stride as usize * src.height())
199            .0;
200        let src_chunks = y_slice.as_chunks::<8>();
201        let dst_chunks = dst_map.as_chunks_mut::<8>();
202        for (s, d) in src_chunks.0.iter().zip(dst_chunks.0) {
203            s.iter().zip(d).for_each(|(s, d)| *d = limit_to_full(*s));
204        }
205
206        for (s, d) in src_chunks.1.iter().zip(dst_chunks.1) {
207            *d = limit_to_full(*s);
208        }
209
210        Ok(())
211    }
212
213    fn convert_yuyv_to_rgb(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
214        assert_eq!(src.fourcc(), YUYV);
215        assert_eq!(dst.fourcc(), RGB);
216        let src = yuv::YuvPackedImage::<u8> {
217            yuy: &src.tensor.map()?,
218            yuy_stride: src.row_stride() as u32, // we assume packed yuyv
219            width: src.width() as u32,
220            height: src.height() as u32,
221        };
222
223        Ok(yuv::yuyv422_to_rgb(
224            &src,
225            dst.tensor.map()?.as_mut_slice(),
226            dst.width() as u32 * 3,
227            yuv::YuvRange::Limited,
228            yuv::YuvStandardMatrix::Bt709,
229        )?)
230    }
231
232    fn convert_yuyv_to_rgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
233        assert_eq!(src.fourcc(), YUYV);
234        assert!(matches!(dst.fourcc(), RGBA | BGRA));
235        let src = yuv::YuvPackedImage::<u8> {
236            yuy: &src.tensor.map()?,
237            yuy_stride: src.row_stride() as u32, // we assume packed yuyv
238            width: src.width() as u32,
239            height: src.height() as u32,
240        };
241
242        Ok(yuv::yuyv422_to_rgba(
243            &src,
244            dst.tensor.map()?.as_mut_slice(),
245            dst.row_stride() as u32,
246            yuv::YuvRange::Limited,
247            yuv::YuvStandardMatrix::Bt709,
248        )?)
249    }
250
251    fn convert_yuyv_to_8bps(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
252        assert_eq!(src.fourcc(), YUYV);
253        assert_eq!(dst.fourcc(), PLANAR_RGB);
254        let mut tmp = TensorImage::new(src.width(), src.height(), RGB, None)?;
255        Self::convert_yuyv_to_rgb(src, &mut tmp)?;
256        Self::convert_rgb_to_8bps(&tmp, dst)
257    }
258
259    fn convert_yuyv_to_prgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
260        assert_eq!(src.fourcc(), YUYV);
261        assert_eq!(dst.fourcc(), PLANAR_RGBA);
262        let mut tmp = TensorImage::new(src.width(), src.height(), RGB, None)?;
263        Self::convert_yuyv_to_rgb(src, &mut tmp)?;
264        Self::convert_rgb_to_prgba(&tmp, dst)
265    }
266
267    fn convert_yuyv_to_grey(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
268        assert_eq!(src.fourcc(), YUYV);
269        assert_eq!(dst.fourcc(), GREY);
270        let src_map = src.tensor.map()?;
271        let mut dst_map = dst.tensor.map()?;
272        let src_chunks = src_map.as_chunks::<16>();
273        let dst_chunks = dst_map.as_chunks_mut::<8>();
274        for (s, d) in src_chunks.0.iter().zip(dst_chunks.0) {
275            s.iter()
276                .step_by(2)
277                .zip(d)
278                .for_each(|(s, d)| *d = limit_to_full(*s));
279        }
280
281        for (s, d) in src_chunks.1.iter().step_by(2).zip(dst_chunks.1) {
282            *d = limit_to_full(*s);
283        }
284
285        Ok(())
286    }
287
288    fn convert_yuyv_to_nv16(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
289        assert_eq!(src.fourcc(), YUYV);
290        assert_eq!(dst.fourcc(), NV16);
291        let src_map = src.tensor.map()?;
292        let mut dst_map = dst.tensor.map()?;
293
294        let src_chunks = src_map.as_chunks::<2>().0;
295        let (y_plane, uv_plane) = dst_map.split_at_mut(dst.row_stride() * dst.height());
296
297        for ((s, y), uv) in src_chunks.iter().zip(y_plane).zip(uv_plane) {
298            *y = s[0];
299            *uv = s[1];
300        }
301        Ok(())
302    }
303
304    fn convert_vyuy_to_rgb(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
305        assert_eq!(src.fourcc(), VYUY);
306        assert_eq!(dst.fourcc(), RGB);
307        let src = yuv::YuvPackedImage::<u8> {
308            yuy: &src.tensor.map()?,
309            yuy_stride: src.row_stride() as u32,
310            width: src.width() as u32,
311            height: src.height() as u32,
312        };
313
314        Ok(yuv::vyuy422_to_rgb(
315            &src,
316            dst.tensor.map()?.as_mut_slice(),
317            dst.width() as u32 * 3,
318            yuv::YuvRange::Limited,
319            yuv::YuvStandardMatrix::Bt709,
320        )?)
321    }
322
323    fn convert_vyuy_to_rgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
324        assert_eq!(src.fourcc(), VYUY);
325        assert!(matches!(dst.fourcc(), RGBA | BGRA));
326        let src = yuv::YuvPackedImage::<u8> {
327            yuy: &src.tensor.map()?,
328            yuy_stride: src.row_stride() as u32,
329            width: src.width() as u32,
330            height: src.height() as u32,
331        };
332
333        Ok(yuv::vyuy422_to_rgba(
334            &src,
335            dst.tensor.map()?.as_mut_slice(),
336            dst.row_stride() as u32,
337            yuv::YuvRange::Limited,
338            yuv::YuvStandardMatrix::Bt709,
339        )?)
340    }
341
342    fn convert_vyuy_to_8bps(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
343        assert_eq!(src.fourcc(), VYUY);
344        assert_eq!(dst.fourcc(), PLANAR_RGB);
345        let mut tmp = TensorImage::new(src.width(), src.height(), RGB, None)?;
346        Self::convert_vyuy_to_rgb(src, &mut tmp)?;
347        Self::convert_rgb_to_8bps(&tmp, dst)
348    }
349
350    fn convert_vyuy_to_prgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
351        assert_eq!(src.fourcc(), VYUY);
352        assert_eq!(dst.fourcc(), PLANAR_RGBA);
353        let mut tmp = TensorImage::new(src.width(), src.height(), RGB, None)?;
354        Self::convert_vyuy_to_rgb(src, &mut tmp)?;
355        Self::convert_rgb_to_prgba(&tmp, dst)
356    }
357
358    fn convert_vyuy_to_grey(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
359        assert_eq!(src.fourcc(), VYUY);
360        assert_eq!(dst.fourcc(), GREY);
361        let src_map = src.tensor.map()?;
362        let mut dst_map = dst.tensor.map()?;
363        // VYUY byte order: [V, Y0, U, Y1] — Y at offsets 1, 3
364        let src_chunks = src_map.as_chunks::<16>();
365        let dst_chunks = dst_map.as_chunks_mut::<8>();
366        for (s, d) in src_chunks.0.iter().zip(dst_chunks.0) {
367            for (di, si) in (1..16).step_by(2).enumerate() {
368                d[di] = limit_to_full(s[si]);
369            }
370        }
371
372        for (di, si) in (1..src_chunks.1.len()).step_by(2).enumerate() {
373            dst_chunks.1[di] = limit_to_full(src_chunks.1[si]);
374        }
375
376        Ok(())
377    }
378
379    fn convert_vyuy_to_nv16(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
380        assert_eq!(src.fourcc(), VYUY);
381        assert_eq!(dst.fourcc(), NV16);
382        let src_map = src.tensor.map()?;
383        let mut dst_map = dst.tensor.map()?;
384
385        // VYUY byte order: [V, Y0, U, Y1] — per 4-byte macropixel
386        let src_chunks = src_map.as_chunks::<4>().0;
387        let (y_plane, uv_plane) = dst_map.split_at_mut(dst.row_stride() * dst.height());
388        let y_pairs = y_plane.as_chunks_mut::<2>().0;
389        let uv_pairs = uv_plane.as_chunks_mut::<2>().0;
390
391        for ((s, y), uv) in src_chunks.iter().zip(y_pairs).zip(uv_pairs) {
392            y[0] = s[1]; // Y0
393            y[1] = s[3]; // Y1
394            uv[0] = s[2]; // U
395            uv[1] = s[0]; // V
396        }
397        Ok(())
398    }
399
400    fn convert_grey_to_rgb(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
401        assert_eq!(src.fourcc(), GREY);
402        assert_eq!(dst.fourcc(), RGB);
403        let src = yuv::YuvGrayImage::<u8> {
404            y_plane: &src.tensor.map()?,
405            y_stride: src.row_stride() as u32, // we assume packed Y
406            width: src.width() as u32,
407            height: src.height() as u32,
408        };
409        Ok(yuv::yuv400_to_rgb(
410            &src,
411            dst.tensor.map()?.as_mut_slice(),
412            dst.row_stride() as u32,
413            yuv::YuvRange::Full,
414            yuv::YuvStandardMatrix::Bt709,
415        )?)
416    }
417
418    fn convert_grey_to_rgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
419        assert_eq!(src.fourcc(), GREY);
420        assert!(matches!(dst.fourcc(), RGBA | BGRA));
421        let src = yuv::YuvGrayImage::<u8> {
422            y_plane: &src.tensor.map()?,
423            y_stride: src.row_stride() as u32,
424            width: src.width() as u32,
425            height: src.height() as u32,
426        };
427        Ok(yuv::yuv400_to_rgba(
428            &src,
429            dst.tensor.map()?.as_mut_slice(),
430            dst.row_stride() as u32,
431            yuv::YuvRange::Full,
432            yuv::YuvStandardMatrix::Bt709,
433        )?)
434    }
435
436    fn convert_grey_to_8bps(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
437        assert_eq!(src.fourcc(), GREY);
438        assert_eq!(dst.fourcc(), PLANAR_RGB);
439
440        let src = src.tensor().map()?;
441        let src = src.as_slice();
442
443        let mut dst_map = dst.tensor().map()?;
444        let dst_ = dst_map.as_mut_slice();
445
446        let (dst0, dst1) = dst_.split_at_mut(dst.width() * dst.height());
447        let (dst1, dst2) = dst1.split_at_mut(dst.width() * dst.height());
448
449        rayon::scope(|s| {
450            s.spawn(|_| dst0.copy_from_slice(src));
451            s.spawn(|_| dst1.copy_from_slice(src));
452            s.spawn(|_| dst2.copy_from_slice(src));
453        });
454        Ok(())
455    }
456
457    fn convert_grey_to_prgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
458        assert_eq!(src.fourcc(), GREY);
459        assert_eq!(dst.fourcc(), PLANAR_RGBA);
460
461        let src = src.tensor().map()?;
462        let src = src.as_slice();
463
464        let mut dst_map = dst.tensor().map()?;
465        let dst_ = dst_map.as_mut_slice();
466
467        let (dst0, dst1) = dst_.split_at_mut(dst.width() * dst.height());
468        let (dst1, dst2) = dst1.split_at_mut(dst.width() * dst.height());
469        let (dst2, dst3) = dst2.split_at_mut(dst.width() * dst.height());
470        rayon::scope(|s| {
471            s.spawn(|_| dst0.copy_from_slice(src));
472            s.spawn(|_| dst1.copy_from_slice(src));
473            s.spawn(|_| dst2.copy_from_slice(src));
474            s.spawn(|_| dst3.fill(255));
475        });
476        Ok(())
477    }
478
479    fn convert_grey_to_yuyv(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
480        assert_eq!(src.fourcc(), GREY);
481        assert_eq!(dst.fourcc(), YUYV);
482
483        let src = src.tensor().map()?;
484        let src = src.as_slice();
485
486        let mut dst = dst.tensor().map()?;
487        let dst = dst.as_mut_slice();
488        for (s, d) in src
489            .as_chunks::<2>()
490            .0
491            .iter()
492            .zip(dst.as_chunks_mut::<4>().0.iter_mut())
493        {
494            d[0] = full_to_limit(s[0]);
495            d[1] = 128;
496
497            d[2] = full_to_limit(s[1]);
498            d[3] = 128;
499        }
500        Ok(())
501    }
502
503    fn convert_grey_to_nv16(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
504        assert_eq!(src.fourcc(), GREY);
505        assert_eq!(dst.fourcc(), NV16);
506
507        let src = src.tensor().map()?;
508        let src = src.as_slice();
509
510        let mut dst = dst.tensor().map()?;
511        let dst = dst.as_mut_slice();
512
513        for (s, d) in src.iter().zip(dst[0..src.len()].iter_mut()) {
514            *d = full_to_limit(*s);
515        }
516        dst[src.len()..].fill(128);
517
518        Ok(())
519    }
520
521    fn convert_rgba_to_rgb(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
522        assert_eq!(src.fourcc(), RGBA);
523        assert_eq!(dst.fourcc(), RGB);
524
525        Ok(yuv::rgba_to_rgb(
526            src.tensor.map()?.as_slice(),
527            (src.width() * src.channels()) as u32,
528            dst.tensor.map()?.as_mut_slice(),
529            (dst.width() * dst.channels()) as u32,
530            src.width() as u32,
531            src.height() as u32,
532        )?)
533    }
534
535    fn convert_rgba_to_grey(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
536        assert_eq!(src.fourcc(), RGBA);
537        assert_eq!(dst.fourcc(), GREY);
538
539        let mut dst = yuv::YuvGrayImageMut::<u8> {
540            y_plane: yuv::BufferStoreMut::Borrowed(&mut dst.tensor.map()?),
541            y_stride: dst.row_stride() as u32,
542            width: dst.width() as u32,
543            height: dst.height() as u32,
544        };
545        Ok(yuv::rgba_to_yuv400(
546            &mut dst,
547            src.tensor.map()?.as_slice(),
548            src.row_stride() as u32,
549            yuv::YuvRange::Full,
550            yuv::YuvStandardMatrix::Bt709,
551        )?)
552    }
553
554    fn convert_rgba_to_8bps(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
555        assert_eq!(src.fourcc(), RGBA);
556        assert_eq!(dst.fourcc(), PLANAR_RGB);
557
558        let src = src.tensor().map()?;
559        let src = src.as_slice();
560        let src = src.as_chunks::<4>().0;
561
562        let mut dst_map = dst.tensor().map()?;
563        let dst_ = dst_map.as_mut_slice();
564
565        let (dst0, dst1) = dst_.split_at_mut(dst.width() * dst.height());
566        let (dst1, dst2) = dst1.split_at_mut(dst.width() * dst.height());
567
568        src.par_iter()
569            .zip_eq(dst0)
570            .zip_eq(dst1)
571            .zip_eq(dst2)
572            .for_each(|(((s, d0), d1), d2)| {
573                *d0 = s[0];
574                *d1 = s[1];
575                *d2 = s[2];
576            });
577        Ok(())
578    }
579
580    fn convert_rgba_to_prgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
581        assert_eq!(src.fourcc(), RGBA);
582        assert_eq!(dst.fourcc(), PLANAR_RGBA);
583
584        let src = src.tensor().map()?;
585        let src = src.as_slice();
586        let src = src.as_chunks::<4>().0;
587
588        let mut dst_map = dst.tensor().map()?;
589        let dst_ = dst_map.as_mut_slice();
590
591        let (dst0, dst1) = dst_.split_at_mut(dst.width() * dst.height());
592        let (dst1, dst2) = dst1.split_at_mut(dst.width() * dst.height());
593        let (dst2, dst3) = dst2.split_at_mut(dst.width() * dst.height());
594
595        src.par_iter()
596            .zip_eq(dst0)
597            .zip_eq(dst1)
598            .zip_eq(dst2)
599            .zip_eq(dst3)
600            .for_each(|((((s, d0), d1), d2), d3)| {
601                *d0 = s[0];
602                *d1 = s[1];
603                *d2 = s[2];
604                *d3 = s[3];
605            });
606        Ok(())
607    }
608
609    fn convert_rgba_to_yuyv(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
610        assert_eq!(src.fourcc(), RGBA);
611        assert_eq!(dst.fourcc(), YUYV);
612
613        let src = src.tensor().map()?;
614        let src = src.as_slice();
615
616        let mut dst = dst.tensor().map()?;
617        let dst = dst.as_mut_slice();
618
619        // compute quantized Bt.709 limited range RGB to YUV matrix
620        const KR: f64 = 0.2126f64;
621        const KB: f64 = 0.0722f64;
622        const KG: f64 = 1.0 - KR - KB;
623        const BIAS: i32 = 20;
624
625        const Y_R: i32 = (KR * (219 << BIAS) as f64 / 255.0).round() as i32;
626        const Y_G: i32 = (KG * (219 << BIAS) as f64 / 255.0).round() as i32;
627        const Y_B: i32 = (KB * (219 << BIAS) as f64 / 255.0).round() as i32;
628
629        const U_R: i32 = (-KR / (KR + KG) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
630        const U_G: i32 = (-KG / (KR + KG) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
631        const U_B: i32 = (0.5_f64 * (224 << BIAS) as f64 / 255.0).ceil() as i32;
632
633        const V_R: i32 = (0.5_f64 * (224 << BIAS) as f64 / 255.0).ceil() as i32;
634        const V_G: i32 = (-KG / (KG + KB) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
635        const V_B: i32 = (-KB / (KG + KB) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
636        const ROUND: i32 = 1 << (BIAS - 1);
637        const ROUND2: i32 = 1 << BIAS;
638        let process_rgba_to_yuyv = |s: &[u8; 8], d: &mut [u8; 4]| {
639            let [r0, g0, b0, _, r1, g1, b1, _] = *s;
640            let r0 = r0 as i32;
641            let g0 = g0 as i32;
642            let b0 = b0 as i32;
643            let r1 = r1 as i32;
644            let g1 = g1 as i32;
645            let b1 = b1 as i32;
646            d[0] = ((Y_R * r0 + Y_G * g0 + Y_B * b0 + ROUND).shr(BIAS) + 16) as u8;
647            d[1] = ((U_R * r0 + U_G * g0 + U_B * b0 + U_R * r1 + U_G * g1 + U_B * b1 + ROUND2)
648                .shr(BIAS + 1)
649                + 128) as u8;
650            d[2] = ((Y_R * r1 + Y_G * g1 + Y_B * b1 + ROUND).shr(BIAS) + 16) as u8;
651            d[3] = ((V_R * r0 + V_G * g0 + V_B * b0 + V_R * r1 + V_G * g1 + V_B * b1 + ROUND2)
652                .shr(BIAS + 1)
653                + 128) as u8;
654        };
655
656        let src = src.as_chunks::<{ 8 * 32 }>();
657        let dst = dst.as_chunks_mut::<{ 4 * 32 }>();
658
659        for (s, d) in src.0.iter().zip(dst.0.iter_mut()) {
660            let s = s.as_chunks::<8>().0;
661            let d = d.as_chunks_mut::<4>().0;
662            for (s, d) in s.iter().zip(d.iter_mut()) {
663                process_rgba_to_yuyv(s, d);
664            }
665        }
666
667        let s = src.1.as_chunks::<8>().0;
668        let d = dst.1.as_chunks_mut::<4>().0;
669        for (s, d) in s.iter().zip(d.iter_mut()) {
670            process_rgba_to_yuyv(s, d);
671        }
672
673        Ok(())
674    }
675
676    fn convert_rgba_to_nv16(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
677        assert_eq!(src.fourcc(), RGBA);
678        assert_eq!(dst.fourcc(), NV16);
679
680        let mut dst_map = dst.tensor().map()?;
681
682        let (y_plane, uv_plane) = dst_map.split_at_mut(dst.width() * dst.height());
683        let mut bi_planar_image = yuv::YuvBiPlanarImageMut::<u8> {
684            y_plane: yuv::BufferStoreMut::Borrowed(y_plane),
685            y_stride: dst.width() as u32,
686            uv_plane: yuv::BufferStoreMut::Borrowed(uv_plane),
687            uv_stride: dst.width() as u32,
688            width: dst.width() as u32,
689            height: dst.height() as u32,
690        };
691
692        Ok(yuv::rgba_to_yuv_nv16(
693            &mut bi_planar_image,
694            src.tensor.map()?.as_slice(),
695            src.row_stride() as u32,
696            yuv::YuvRange::Limited,
697            yuv::YuvStandardMatrix::Bt709,
698            yuv::YuvConversionMode::Balanced,
699        )?)
700    }
701
702    fn convert_rgb_to_rgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
703        assert_eq!(src.fourcc(), RGB);
704        assert!(matches!(dst.fourcc(), RGBA | BGRA));
705
706        Ok(yuv::rgb_to_rgba(
707            src.tensor.map()?.as_slice(),
708            (src.width() * src.channels()) as u32,
709            dst.tensor.map()?.as_mut_slice(),
710            (dst.width() * dst.channels()) as u32,
711            src.width() as u32,
712            src.height() as u32,
713        )?)
714    }
715
716    fn convert_rgb_to_grey(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
717        assert_eq!(src.fourcc(), RGB);
718        assert_eq!(dst.fourcc(), GREY);
719
720        let mut dst = yuv::YuvGrayImageMut::<u8> {
721            y_plane: yuv::BufferStoreMut::Borrowed(&mut dst.tensor.map()?),
722            y_stride: dst.row_stride() as u32,
723            width: dst.width() as u32,
724            height: dst.height() as u32,
725        };
726        Ok(yuv::rgb_to_yuv400(
727            &mut dst,
728            src.tensor.map()?.as_slice(),
729            src.row_stride() as u32,
730            yuv::YuvRange::Full,
731            yuv::YuvStandardMatrix::Bt709,
732        )?)
733    }
734
735    fn convert_rgb_to_8bps(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
736        assert_eq!(src.fourcc(), RGB);
737        assert_eq!(dst.fourcc(), PLANAR_RGB);
738
739        let src = src.tensor().map()?;
740        let src = src.as_slice();
741        let src = src.as_chunks::<3>().0;
742
743        let mut dst_map = dst.tensor().map()?;
744        let dst_ = dst_map.as_mut_slice();
745
746        let (dst0, dst1) = dst_.split_at_mut(dst.width() * dst.height());
747        let (dst1, dst2) = dst1.split_at_mut(dst.width() * dst.height());
748
749        src.par_iter()
750            .zip_eq(dst0)
751            .zip_eq(dst1)
752            .zip_eq(dst2)
753            .for_each(|(((s, d0), d1), d2)| {
754                *d0 = s[0];
755                *d1 = s[1];
756                *d2 = s[2];
757            });
758        Ok(())
759    }
760
761    fn convert_rgb_to_prgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
762        assert_eq!(src.fourcc(), RGB);
763        assert_eq!(dst.fourcc(), PLANAR_RGBA);
764
765        let src = src.tensor().map()?;
766        let src = src.as_slice();
767        let src = src.as_chunks::<3>().0;
768
769        let mut dst_map = dst.tensor().map()?;
770        let dst_ = dst_map.as_mut_slice();
771
772        let (dst0, dst1) = dst_.split_at_mut(dst.width() * dst.height());
773        let (dst1, dst2) = dst1.split_at_mut(dst.width() * dst.height());
774        let (dst2, dst3) = dst2.split_at_mut(dst.width() * dst.height());
775
776        rayon::scope(|s| {
777            s.spawn(|_| {
778                src.par_iter()
779                    .zip_eq(dst0)
780                    .zip_eq(dst1)
781                    .zip_eq(dst2)
782                    .for_each(|(((s, d0), d1), d2)| {
783                        *d0 = s[0];
784                        *d1 = s[1];
785                        *d2 = s[2];
786                    })
787            });
788            s.spawn(|_| dst3.fill(255));
789        });
790        Ok(())
791    }
792
793    fn convert_rgb_to_yuyv(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
794        assert_eq!(src.fourcc(), RGB);
795        assert_eq!(dst.fourcc(), YUYV);
796
797        let src = src.tensor().map()?;
798        let src = src.as_slice();
799
800        let mut dst = dst.tensor().map()?;
801        let dst = dst.as_mut_slice();
802
803        // compute quantized Bt.709 limited range RGB to YUV matrix
804        const BIAS: i32 = 20;
805        const KR: f64 = 0.2126f64;
806        const KB: f64 = 0.0722f64;
807        const KG: f64 = 1.0 - KR - KB;
808        const Y_R: i32 = (KR * (219 << BIAS) as f64 / 255.0).round() as i32;
809        const Y_G: i32 = (KG * (219 << BIAS) as f64 / 255.0).round() as i32;
810        const Y_B: i32 = (KB * (219 << BIAS) as f64 / 255.0).round() as i32;
811
812        const U_R: i32 = (-KR / (KR + KG) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
813        const U_G: i32 = (-KG / (KR + KG) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
814        const U_B: i32 = (0.5_f64 * (224 << BIAS) as f64 / 255.0).ceil() as i32;
815
816        const V_R: i32 = (0.5_f64 * (224 << BIAS) as f64 / 255.0).ceil() as i32;
817        const V_G: i32 = (-KG / (KG + KB) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
818        const V_B: i32 = (-KB / (KG + KB) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
819        const ROUND: i32 = 1 << (BIAS - 1);
820        const ROUND2: i32 = 1 << BIAS;
821        let process_rgb_to_yuyv = |s: &[u8; 6], d: &mut [u8; 4]| {
822            let [r0, g0, b0, r1, g1, b1] = *s;
823            let r0 = r0 as i32;
824            let g0 = g0 as i32;
825            let b0 = b0 as i32;
826            let r1 = r1 as i32;
827            let g1 = g1 as i32;
828            let b1 = b1 as i32;
829            d[0] = ((Y_R * r0 + Y_G * g0 + Y_B * b0 + ROUND).shr(BIAS) + 16) as u8;
830            d[1] = ((U_R * r0 + U_G * g0 + U_B * b0 + U_R * r1 + U_G * g1 + U_B * b1 + ROUND2)
831                .shr(BIAS + 1)
832                + 128) as u8;
833            d[2] = ((Y_R * r1 + Y_G * g1 + Y_B * b1 + ROUND).shr(BIAS) + 16) as u8;
834            d[3] = ((V_R * r0 + V_G * g0 + V_B * b0 + V_R * r1 + V_G * g1 + V_B * b1 + ROUND2)
835                .shr(BIAS + 1)
836                + 128) as u8;
837        };
838
839        let src = src.as_chunks::<{ 6 * 32 }>();
840        let dst = dst.as_chunks_mut::<{ 4 * 32 }>();
841        for (s, d) in src.0.iter().zip(dst.0.iter_mut()) {
842            let s = s.as_chunks::<6>().0;
843            let d = d.as_chunks_mut::<4>().0;
844            for (s, d) in s.iter().zip(d.iter_mut()) {
845                process_rgb_to_yuyv(s, d);
846            }
847        }
848
849        let s = src.1.as_chunks::<6>().0;
850        let d = dst.1.as_chunks_mut::<4>().0;
851        for (s, d) in s.iter().zip(d.iter_mut()) {
852            process_rgb_to_yuyv(s, d);
853        }
854
855        Ok(())
856    }
857
858    fn convert_rgb_to_nv16(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
859        assert_eq!(src.fourcc(), RGB);
860        assert_eq!(dst.fourcc(), NV16);
861
862        let mut dst_map = dst.tensor().map()?;
863
864        let (y_plane, uv_plane) = dst_map.split_at_mut(dst.width() * dst.height());
865        let mut bi_planar_image = yuv::YuvBiPlanarImageMut::<u8> {
866            y_plane: yuv::BufferStoreMut::Borrowed(y_plane),
867            y_stride: dst.width() as u32,
868            uv_plane: yuv::BufferStoreMut::Borrowed(uv_plane),
869            uv_stride: dst.width() as u32,
870            width: dst.width() as u32,
871            height: dst.height() as u32,
872        };
873
874        Ok(yuv::rgb_to_yuv_nv16(
875            &mut bi_planar_image,
876            src.tensor.map()?.as_slice(),
877            src.row_stride() as u32,
878            yuv::YuvRange::Limited,
879            yuv::YuvStandardMatrix::Bt709,
880            yuv::YuvConversionMode::Balanced,
881        )?)
882    }
883
884    fn copy_image(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
885        assert_eq!(src.fourcc(), dst.fourcc());
886        dst.tensor().map()?.copy_from_slice(&src.tensor().map()?);
887        Ok(())
888    }
889
890    /// Swap R and B channels in-place for an interleaved 4-channel image.
891    fn swizzle_rb_4chan(dst: &mut TensorImage) -> Result<()> {
892        let mut map = dst.tensor().map()?;
893        let buf = map.as_mut_slice();
894        for chunk in buf.chunks_exact_mut(4) {
895            chunk.swap(0, 2);
896        }
897        Ok(())
898    }
899
900    fn convert_nv16_to_rgb(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
901        assert_eq!(src.fourcc(), NV16);
902        assert_eq!(dst.fourcc(), RGB);
903        let map = src.tensor.map()?;
904        let y_stride = src.width() as u32;
905        let uv_stride = src.width() as u32;
906        let slices = map.as_slice().split_at(y_stride as usize * src.height());
907
908        let src = yuv::YuvBiPlanarImage {
909            y_plane: slices.0,
910            y_stride,
911            uv_plane: slices.1,
912            uv_stride,
913            width: src.width() as u32,
914            height: src.height() as u32,
915        };
916
917        Ok(yuv::yuv_nv16_to_rgb(
918            &src,
919            dst.tensor.map()?.as_mut_slice(),
920            dst.row_stride() as u32,
921            yuv::YuvRange::Limited,
922            yuv::YuvStandardMatrix::Bt709,
923            yuv::YuvConversionMode::Balanced,
924        )?)
925    }
926
927    fn convert_nv16_to_rgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
928        assert_eq!(src.fourcc(), NV16);
929        assert!(matches!(dst.fourcc(), RGBA | BGRA));
930        let map = src.tensor.map()?;
931        let y_stride = src.width() as u32;
932        let uv_stride = src.width() as u32;
933        let slices = map.as_slice().split_at(y_stride as usize * src.height());
934
935        let src = yuv::YuvBiPlanarImage {
936            y_plane: slices.0,
937            y_stride,
938            uv_plane: slices.1,
939            uv_stride,
940            width: src.width() as u32,
941            height: src.height() as u32,
942        };
943
944        Ok(yuv::yuv_nv16_to_rgba(
945            &src,
946            dst.tensor.map()?.as_mut_slice(),
947            dst.row_stride() as u32,
948            yuv::YuvRange::Limited,
949            yuv::YuvStandardMatrix::Bt709,
950            yuv::YuvConversionMode::Balanced,
951        )?)
952    }
953
954    fn convert_8bps_to_rgb(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
955        assert_eq!(src.fourcc(), PLANAR_RGB);
956        assert_eq!(dst.fourcc(), RGB);
957
958        let src_map = src.tensor().map()?;
959        let src_ = src_map.as_slice();
960
961        let (src0, src1) = src_.split_at(src.width() * src.height());
962        let (src1, src2) = src1.split_at(src.width() * src.height());
963
964        let mut dst_map = dst.tensor().map()?;
965        let dst_ = dst_map.as_mut_slice();
966
967        src0.par_iter()
968            .zip_eq(src1)
969            .zip_eq(src2)
970            .zip_eq(dst_.as_chunks_mut::<3>().0.par_iter_mut())
971            .for_each(|(((s0, s1), s2), d)| {
972                d[0] = *s0;
973                d[1] = *s1;
974                d[2] = *s2;
975            });
976        Ok(())
977    }
978
979    fn convert_8bps_to_rgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
980        assert_eq!(src.fourcc(), PLANAR_RGB);
981        assert!(matches!(dst.fourcc(), RGBA | BGRA));
982
983        let src_map = src.tensor().map()?;
984        let src_ = src_map.as_slice();
985
986        let (src0, src1) = src_.split_at(src.width() * src.height());
987        let (src1, src2) = src1.split_at(src.width() * src.height());
988
989        let mut dst_map = dst.tensor().map()?;
990        let dst_ = dst_map.as_mut_slice();
991
992        src0.par_iter()
993            .zip_eq(src1)
994            .zip_eq(src2)
995            .zip_eq(dst_.as_chunks_mut::<4>().0.par_iter_mut())
996            .for_each(|(((s0, s1), s2), d)| {
997                d[0] = *s0;
998                d[1] = *s1;
999                d[2] = *s2;
1000                d[3] = 255;
1001            });
1002        Ok(())
1003    }
1004
1005    fn convert_prgba_to_rgb(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
1006        assert_eq!(src.fourcc(), PLANAR_RGBA);
1007        assert_eq!(dst.fourcc(), RGB);
1008
1009        let src_map = src.tensor().map()?;
1010        let src_ = src_map.as_slice();
1011
1012        let (src0, src1) = src_.split_at(src.width() * src.height());
1013        let (src1, src2) = src1.split_at(src.width() * src.height());
1014        let (src2, _src3) = src2.split_at(src.width() * src.height());
1015
1016        let mut dst_map = dst.tensor().map()?;
1017        let dst_ = dst_map.as_mut_slice();
1018
1019        src0.par_iter()
1020            .zip_eq(src1)
1021            .zip_eq(src2)
1022            .zip_eq(dst_.as_chunks_mut::<3>().0.par_iter_mut())
1023            .for_each(|(((s0, s1), s2), d)| {
1024                d[0] = *s0;
1025                d[1] = *s1;
1026                d[2] = *s2;
1027            });
1028        Ok(())
1029    }
1030
1031    fn convert_prgba_to_rgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
1032        assert_eq!(src.fourcc(), PLANAR_RGBA);
1033        assert!(matches!(dst.fourcc(), RGBA | BGRA));
1034
1035        let src_map = src.tensor().map()?;
1036        let src_ = src_map.as_slice();
1037
1038        let (src0, src1) = src_.split_at(src.width() * src.height());
1039        let (src1, src2) = src1.split_at(src.width() * src.height());
1040        let (src2, src3) = src2.split_at(src.width() * src.height());
1041
1042        let mut dst_map = dst.tensor().map()?;
1043        let dst_ = dst_map.as_mut_slice();
1044
1045        src0.par_iter()
1046            .zip_eq(src1)
1047            .zip_eq(src2)
1048            .zip_eq(src3)
1049            .zip_eq(dst_.as_chunks_mut::<4>().0.par_iter_mut())
1050            .for_each(|((((s0, s1), s2), s3), d)| {
1051                d[0] = *s0;
1052                d[1] = *s1;
1053                d[2] = *s2;
1054                d[3] = *s3;
1055            });
1056        Ok(())
1057    }
1058
1059    pub(crate) fn support_conversion(src: FourCharCode, dst: FourCharCode) -> bool {
1060        matches!(
1061            (src, dst),
1062            (NV12, RGB)
1063                | (NV12, RGBA)
1064                | (NV12, GREY)
1065                | (NV16, RGB)
1066                | (NV16, RGBA)
1067                | (YUYV, RGB)
1068                | (YUYV, RGBA)
1069                | (YUYV, GREY)
1070                | (YUYV, YUYV)
1071                | (YUYV, PLANAR_RGB)
1072                | (YUYV, PLANAR_RGBA)
1073                | (YUYV, NV16)
1074                | (VYUY, RGB)
1075                | (VYUY, RGBA)
1076                | (VYUY, GREY)
1077                | (VYUY, VYUY)
1078                | (VYUY, PLANAR_RGB)
1079                | (VYUY, PLANAR_RGBA)
1080                | (VYUY, NV16)
1081                | (RGBA, RGB)
1082                | (RGBA, RGBA)
1083                | (RGBA, GREY)
1084                | (RGBA, YUYV)
1085                | (RGBA, PLANAR_RGB)
1086                | (RGBA, PLANAR_RGBA)
1087                | (RGBA, NV16)
1088                | (RGB, RGB)
1089                | (RGB, RGBA)
1090                | (RGB, GREY)
1091                | (RGB, YUYV)
1092                | (RGB, PLANAR_RGB)
1093                | (RGB, PLANAR_RGBA)
1094                | (RGB, NV16)
1095                | (GREY, RGB)
1096                | (GREY, RGBA)
1097                | (GREY, GREY)
1098                | (GREY, YUYV)
1099                | (GREY, PLANAR_RGB)
1100                | (GREY, PLANAR_RGBA)
1101                | (GREY, NV16)
1102                | (NV12, BGRA)
1103                | (YUYV, BGRA)
1104                | (VYUY, BGRA)
1105                | (RGBA, BGRA)
1106                | (RGB, BGRA)
1107                | (GREY, BGRA)
1108                | (BGRA, BGRA)
1109        )
1110    }
1111
1112    pub(crate) fn convert_format(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
1113        // shapes should be equal
1114        let _timer = FunctionTimer::new(format!(
1115            "ImageProcessor::convert_format {} to {}",
1116            src.fourcc().display(),
1117            dst.fourcc().display()
1118        ));
1119        assert_eq!(src.height(), dst.height());
1120        assert_eq!(src.width(), dst.width());
1121
1122        match (src.fourcc(), dst.fourcc()) {
1123            (NV12, RGB) => Self::convert_nv12_to_rgb(src, dst),
1124            (NV12, RGBA) => Self::convert_nv12_to_rgba(src, dst),
1125            (NV12, GREY) => Self::convert_nv12_to_grey(src, dst),
1126            (YUYV, RGB) => Self::convert_yuyv_to_rgb(src, dst),
1127            (YUYV, RGBA) => Self::convert_yuyv_to_rgba(src, dst),
1128            (YUYV, GREY) => Self::convert_yuyv_to_grey(src, dst),
1129            (YUYV, YUYV) => Self::copy_image(src, dst),
1130            (YUYV, PLANAR_RGB) => Self::convert_yuyv_to_8bps(src, dst),
1131            (YUYV, PLANAR_RGBA) => Self::convert_yuyv_to_prgba(src, dst),
1132            (YUYV, NV16) => Self::convert_yuyv_to_nv16(src, dst),
1133            (VYUY, RGB) => Self::convert_vyuy_to_rgb(src, dst),
1134            (VYUY, RGBA) => Self::convert_vyuy_to_rgba(src, dst),
1135            (VYUY, GREY) => Self::convert_vyuy_to_grey(src, dst),
1136            (VYUY, VYUY) => Self::copy_image(src, dst),
1137            (VYUY, PLANAR_RGB) => Self::convert_vyuy_to_8bps(src, dst),
1138            (VYUY, PLANAR_RGBA) => Self::convert_vyuy_to_prgba(src, dst),
1139            (VYUY, NV16) => Self::convert_vyuy_to_nv16(src, dst),
1140            (RGBA, RGB) => Self::convert_rgba_to_rgb(src, dst),
1141            (RGBA, RGBA) => Self::copy_image(src, dst),
1142            (RGBA, GREY) => Self::convert_rgba_to_grey(src, dst),
1143            (RGBA, YUYV) => Self::convert_rgba_to_yuyv(src, dst),
1144            (RGBA, PLANAR_RGB) => Self::convert_rgba_to_8bps(src, dst),
1145            (RGBA, PLANAR_RGBA) => Self::convert_rgba_to_prgba(src, dst),
1146            (RGBA, NV16) => Self::convert_rgba_to_nv16(src, dst),
1147            (RGB, RGB) => Self::copy_image(src, dst),
1148            (RGB, RGBA) => Self::convert_rgb_to_rgba(src, dst),
1149            (RGB, GREY) => Self::convert_rgb_to_grey(src, dst),
1150            (RGB, YUYV) => Self::convert_rgb_to_yuyv(src, dst),
1151            (RGB, PLANAR_RGB) => Self::convert_rgb_to_8bps(src, dst),
1152            (RGB, PLANAR_RGBA) => Self::convert_rgb_to_prgba(src, dst),
1153            (RGB, NV16) => Self::convert_rgb_to_nv16(src, dst),
1154            (GREY, RGB) => Self::convert_grey_to_rgb(src, dst),
1155            (GREY, RGBA) => Self::convert_grey_to_rgba(src, dst),
1156            (GREY, GREY) => Self::copy_image(src, dst),
1157            (GREY, YUYV) => Self::convert_grey_to_yuyv(src, dst),
1158            (GREY, PLANAR_RGB) => Self::convert_grey_to_8bps(src, dst),
1159            (GREY, PLANAR_RGBA) => Self::convert_grey_to_prgba(src, dst),
1160            (GREY, NV16) => Self::convert_grey_to_nv16(src, dst),
1161
1162            // the following converts are added for use in testing
1163            (NV16, RGB) => Self::convert_nv16_to_rgb(src, dst),
1164            (NV16, RGBA) => Self::convert_nv16_to_rgba(src, dst),
1165            (PLANAR_RGB, RGB) => Self::convert_8bps_to_rgb(src, dst),
1166            (PLANAR_RGB, RGBA) => Self::convert_8bps_to_rgba(src, dst),
1167            (PLANAR_RGBA, RGB) => Self::convert_prgba_to_rgb(src, dst),
1168            (PLANAR_RGBA, RGBA) => Self::convert_prgba_to_rgba(src, dst),
1169
1170            // BGRA destination: convert to RGBA layout, then swap R and B
1171            (BGRA, BGRA) => Self::copy_image(src, dst),
1172            (NV12, BGRA) => {
1173                Self::convert_nv12_to_rgba(src, dst)?;
1174                Self::swizzle_rb_4chan(dst)
1175            }
1176            (NV16, BGRA) => {
1177                Self::convert_nv16_to_rgba(src, dst)?;
1178                Self::swizzle_rb_4chan(dst)
1179            }
1180            (YUYV, BGRA) => {
1181                Self::convert_yuyv_to_rgba(src, dst)?;
1182                Self::swizzle_rb_4chan(dst)
1183            }
1184            (VYUY, BGRA) => {
1185                Self::convert_vyuy_to_rgba(src, dst)?;
1186                Self::swizzle_rb_4chan(dst)
1187            }
1188            (RGBA, BGRA) => {
1189                dst.tensor().map()?.copy_from_slice(&src.tensor().map()?);
1190                Self::swizzle_rb_4chan(dst)
1191            }
1192            (RGB, BGRA) => {
1193                Self::convert_rgb_to_rgba(src, dst)?;
1194                Self::swizzle_rb_4chan(dst)
1195            }
1196            (GREY, BGRA) => {
1197                Self::convert_grey_to_rgba(src, dst)?;
1198                Self::swizzle_rb_4chan(dst)
1199            }
1200            (PLANAR_RGB, BGRA) => {
1201                Self::convert_8bps_to_rgba(src, dst)?;
1202                Self::swizzle_rb_4chan(dst)
1203            }
1204            (PLANAR_RGBA, BGRA) => {
1205                Self::convert_prgba_to_rgba(src, dst)?;
1206                Self::swizzle_rb_4chan(dst)
1207            }
1208
1209            (s, d) => Err(Error::NotSupported(format!(
1210                "Conversion from {} to {}",
1211                s.display(),
1212                d.display()
1213            ))),
1214        }
1215    }
1216
1217    /// Generic RGB to PLANAR_RGB conversion that works with any TensorImageDst.
1218    fn convert_rgb_to_planar_rgb_generic<D: TensorImageDst>(
1219        src: &TensorImage,
1220        dst: &mut D,
1221    ) -> Result<()> {
1222        assert_eq!(src.fourcc(), RGB);
1223        assert_eq!(dst.fourcc(), PLANAR_RGB);
1224
1225        let src = src.tensor().map()?;
1226        let src = src.as_slice();
1227        let src = src.as_chunks::<3>().0;
1228
1229        let mut dst_map = dst.tensor_mut().map()?;
1230        let dst_ = dst_map.as_mut_slice();
1231
1232        let (dst0, dst1) = dst_.split_at_mut(dst.width() * dst.height());
1233        let (dst1, dst2) = dst1.split_at_mut(dst.width() * dst.height());
1234
1235        src.par_iter()
1236            .zip_eq(dst0)
1237            .zip_eq(dst1)
1238            .zip_eq(dst2)
1239            .for_each(|(((s, d0), d1), d2)| {
1240                *d0 = s[0];
1241                *d1 = s[1];
1242                *d2 = s[2];
1243            });
1244        Ok(())
1245    }
1246
1247    /// Generic RGBA to PLANAR_RGB conversion that works with any
1248    /// TensorImageDst.
1249    fn convert_rgba_to_planar_rgb_generic<D: TensorImageDst>(
1250        src: &TensorImage,
1251        dst: &mut D,
1252    ) -> Result<()> {
1253        assert_eq!(src.fourcc(), RGBA);
1254        assert_eq!(dst.fourcc(), PLANAR_RGB);
1255
1256        let src = src.tensor().map()?;
1257        let src = src.as_slice();
1258        let src = src.as_chunks::<4>().0;
1259
1260        let mut dst_map = dst.tensor_mut().map()?;
1261        let dst_ = dst_map.as_mut_slice();
1262
1263        let (dst0, dst1) = dst_.split_at_mut(dst.width() * dst.height());
1264        let (dst1, dst2) = dst1.split_at_mut(dst.width() * dst.height());
1265
1266        src.par_iter()
1267            .zip_eq(dst0)
1268            .zip_eq(dst1)
1269            .zip_eq(dst2)
1270            .for_each(|(((s, d0), d1), d2)| {
1271                *d0 = s[0];
1272                *d1 = s[1];
1273                *d2 = s[2];
1274            });
1275        Ok(())
1276    }
1277
1278    /// Generic copy for same-format images that works with any TensorImageDst.
1279    fn copy_image_generic<D: TensorImageDst>(src: &TensorImage, dst: &mut D) -> Result<()> {
1280        assert_eq!(src.fourcc(), dst.fourcc());
1281        dst.tensor_mut()
1282            .map()?
1283            .copy_from_slice(&src.tensor().map()?);
1284        Ok(())
1285    }
1286
1287    /// Format conversion that writes to a generic TensorImageDst.
1288    /// Supports common zero-copy preprocessing cases.
1289    pub(crate) fn convert_format_generic<D: TensorImageDst>(
1290        src: &TensorImage,
1291        dst: &mut D,
1292    ) -> Result<()> {
1293        let _timer = FunctionTimer::new(format!(
1294            "ImageProcessor::convert_format_generic {} to {}",
1295            src.fourcc().display(),
1296            dst.fourcc().display()
1297        ));
1298        assert_eq!(src.height(), dst.height());
1299        assert_eq!(src.width(), dst.width());
1300
1301        match (src.fourcc(), dst.fourcc()) {
1302            (RGB, PLANAR_RGB) => Self::convert_rgb_to_planar_rgb_generic(src, dst),
1303            (RGBA, PLANAR_RGB) => Self::convert_rgba_to_planar_rgb_generic(src, dst),
1304            (f1, f2) if f1 == f2 => Self::copy_image_generic(src, dst),
1305            (s, d) => Err(Error::NotSupported(format!(
1306                "Generic conversion from {} to {} not supported",
1307                s.display(),
1308                d.display()
1309            ))),
1310        }
1311    }
1312
1313    /// The src and dest img should be in RGB/RGBA/grey format for correct
1314    /// output. If the format is not 1, 3, or 4 bits per pixel, and error will
1315    /// be returned. The src and dest img must have the same fourcc,
1316    /// otherwise the function will panic.
1317    fn resize_flip_rotate(
1318        &mut self,
1319        src: &TensorImage,
1320        dst: &mut TensorImage,
1321        rotation: Rotation,
1322        flip: Flip,
1323        crop: Crop,
1324    ) -> Result<()> {
1325        let _timer = FunctionTimer::new(format!(
1326            "ImageProcessor::resize_flip_rotate {}x{} to {}x{} {}",
1327            src.width(),
1328            src.height(),
1329            dst.width(),
1330            dst.height(),
1331            dst.fourcc().display()
1332        ));
1333        assert_eq!(src.fourcc(), dst.fourcc());
1334
1335        let src_type = match src.channels() {
1336            1 => fast_image_resize::PixelType::U8,
1337            3 => fast_image_resize::PixelType::U8x3,
1338            4 => fast_image_resize::PixelType::U8x4,
1339            _ => {
1340                return Err(Error::NotImplemented(
1341                    "Unsupported source image format".to_string(),
1342                ));
1343            }
1344        };
1345
1346        let mut src_map = src.tensor().map()?;
1347
1348        let mut dst_map = dst.tensor().map()?;
1349
1350        let options = if let Some(crop) = crop.src_rect {
1351            self.options.crop(
1352                crop.left as f64,
1353                crop.top as f64,
1354                crop.width as f64,
1355                crop.height as f64,
1356            )
1357        } else {
1358            self.options
1359        };
1360
1361        let mut dst_rect = crop.dst_rect.unwrap_or_else(|| Rect {
1362            left: 0,
1363            top: 0,
1364            width: dst.width(),
1365            height: dst.height(),
1366        });
1367
1368        // adjust crop box for rotation/flip
1369        Self::adjust_dest_rect_for_rotate_flip(&mut dst_rect, dst, rotation, flip);
1370
1371        let needs_resize = src.width() != dst.width()
1372            || src.height() != dst.height()
1373            || crop.src_rect.is_some_and(|crop| {
1374                crop != Rect {
1375                    left: 0,
1376                    top: 0,
1377                    width: src.width(),
1378                    height: src.height(),
1379                }
1380            })
1381            || crop.dst_rect.is_some_and(|crop| {
1382                crop != Rect {
1383                    left: 0,
1384                    top: 0,
1385                    width: dst.width(),
1386                    height: dst.height(),
1387                }
1388            });
1389
1390        if needs_resize {
1391            let src_view = fast_image_resize::images::Image::from_slice_u8(
1392                src.width() as u32,
1393                src.height() as u32,
1394                &mut src_map,
1395                src_type,
1396            )?;
1397
1398            match (rotation, flip) {
1399                (Rotation::None, Flip::None) => {
1400                    let mut dst_view = fast_image_resize::images::Image::from_slice_u8(
1401                        dst.width() as u32,
1402                        dst.height() as u32,
1403                        &mut dst_map,
1404                        src_type,
1405                    )?;
1406
1407                    let mut dst_view = fast_image_resize::images::CroppedImageMut::new(
1408                        &mut dst_view,
1409                        dst_rect.left as u32,
1410                        dst_rect.top as u32,
1411                        dst_rect.width as u32,
1412                        dst_rect.height as u32,
1413                    )?;
1414
1415                    self.resizer.resize(&src_view, &mut dst_view, &options)?;
1416                }
1417                (Rotation::Clockwise90, _) | (Rotation::CounterClockwise90, _) => {
1418                    let mut tmp = vec![0; dst.row_stride() * dst.height()];
1419                    let mut tmp_view = fast_image_resize::images::Image::from_slice_u8(
1420                        dst.height() as u32,
1421                        dst.width() as u32,
1422                        &mut tmp,
1423                        src_type,
1424                    )?;
1425
1426                    let mut tmp_view = fast_image_resize::images::CroppedImageMut::new(
1427                        &mut tmp_view,
1428                        dst_rect.left as u32,
1429                        dst_rect.top as u32,
1430                        dst_rect.width as u32,
1431                        dst_rect.height as u32,
1432                    )?;
1433
1434                    self.resizer.resize(&src_view, &mut tmp_view, &options)?;
1435                    Self::flip_rotate_ndarray(&tmp, &mut dst_map, dst, rotation, flip)?;
1436                }
1437                (Rotation::None, _) | (Rotation::Rotate180, _) => {
1438                    let mut tmp = vec![0; dst.row_stride() * dst.height()];
1439                    let mut tmp_view = fast_image_resize::images::Image::from_slice_u8(
1440                        dst.width() as u32,
1441                        dst.height() as u32,
1442                        &mut tmp,
1443                        src_type,
1444                    )?;
1445
1446                    let mut tmp_view = fast_image_resize::images::CroppedImageMut::new(
1447                        &mut tmp_view,
1448                        dst_rect.left as u32,
1449                        dst_rect.top as u32,
1450                        dst_rect.width as u32,
1451                        dst_rect.height as u32,
1452                    )?;
1453
1454                    self.resizer.resize(&src_view, &mut tmp_view, &options)?;
1455                    Self::flip_rotate_ndarray(&tmp, &mut dst_map, dst, rotation, flip)?;
1456                }
1457            }
1458        } else {
1459            Self::flip_rotate_ndarray(&src_map, &mut dst_map, dst, rotation, flip)?;
1460        }
1461        Ok(())
1462    }
1463
1464    fn adjust_dest_rect_for_rotate_flip(
1465        crop: &mut Rect,
1466        dst: &TensorImage,
1467        rot: Rotation,
1468        flip: Flip,
1469    ) {
1470        match rot {
1471            Rotation::None => {}
1472            Rotation::Clockwise90 => {
1473                *crop = Rect {
1474                    left: crop.top,
1475                    top: dst.width() - crop.left - crop.width,
1476                    width: crop.height,
1477                    height: crop.width,
1478                }
1479            }
1480            Rotation::Rotate180 => {
1481                *crop = Rect {
1482                    left: dst.width() - crop.left - crop.width,
1483                    top: dst.height() - crop.top - crop.height,
1484                    width: crop.width,
1485                    height: crop.height,
1486                }
1487            }
1488            Rotation::CounterClockwise90 => {
1489                *crop = Rect {
1490                    left: dst.height() - crop.top - crop.height,
1491                    top: crop.left,
1492                    width: crop.height,
1493                    height: crop.width,
1494                }
1495            }
1496        }
1497
1498        match flip {
1499            Flip::None => {}
1500            Flip::Vertical => crop.top = dst.height() - crop.top - crop.height,
1501            Flip::Horizontal => crop.left = dst.width() - crop.left - crop.width,
1502        }
1503    }
1504
1505    /// Fills the area outside a crop rectangle with the specified color.
1506    pub fn fill_image_outside_crop(dst: &mut TensorImage, rgba: [u8; 4], crop: Rect) -> Result<()> {
1507        let dst_fourcc = dst.fourcc();
1508        let mut dst_map = dst.tensor().map()?;
1509        let dst = (dst_map.as_mut_slice(), dst.width(), dst.height());
1510        match dst_fourcc {
1511            RGBA => Self::fill_image_outside_crop_(dst, rgba, crop),
1512            RGB => Self::fill_image_outside_crop_(dst, Self::rgba_to_rgb(rgba), crop),
1513            GREY => Self::fill_image_outside_crop_(dst, Self::rgba_to_grey(rgba), crop),
1514            YUYV => Self::fill_image_outside_crop_(
1515                (dst.0, dst.1 / 2, dst.2),
1516                Self::rgba_to_yuyv(rgba),
1517                Rect::new(crop.left / 2, crop.top, crop.width.div_ceil(2), crop.height),
1518            ),
1519            PLANAR_RGB => Self::fill_image_outside_crop_planar(dst, Self::rgba_to_rgb(rgba), crop),
1520            PLANAR_RGBA => Self::fill_image_outside_crop_planar(dst, rgba, crop),
1521            NV16 => {
1522                let yuyv = Self::rgba_to_yuyv(rgba);
1523                Self::fill_image_outside_crop_yuv_semiplanar(dst, yuyv[0], [yuyv[1], yuyv[3]], crop)
1524            }
1525            _ => Err(Error::Internal(format!(
1526                "Found unexpected destination {}",
1527                dst_fourcc.display()
1528            ))),
1529        }
1530    }
1531
1532    /// Generic fill for TensorImageDst types.
1533    pub(crate) fn fill_image_outside_crop_generic<D: TensorImageDst>(
1534        dst: &mut D,
1535        rgba: [u8; 4],
1536        crop: Rect,
1537    ) -> Result<()> {
1538        let dst_fourcc = dst.fourcc();
1539        let dst_width = dst.width();
1540        let dst_height = dst.height();
1541        let mut dst_map = dst.tensor_mut().map()?;
1542        let dst = (dst_map.as_mut_slice(), dst_width, dst_height);
1543        match dst_fourcc {
1544            RGBA => Self::fill_image_outside_crop_(dst, rgba, crop),
1545            RGB => Self::fill_image_outside_crop_(dst, Self::rgba_to_rgb(rgba), crop),
1546            GREY => Self::fill_image_outside_crop_(dst, Self::rgba_to_grey(rgba), crop),
1547            YUYV => Self::fill_image_outside_crop_(
1548                (dst.0, dst.1 / 2, dst.2),
1549                Self::rgba_to_yuyv(rgba),
1550                Rect::new(crop.left / 2, crop.top, crop.width.div_ceil(2), crop.height),
1551            ),
1552            PLANAR_RGB => Self::fill_image_outside_crop_planar(dst, Self::rgba_to_rgb(rgba), crop),
1553            PLANAR_RGBA => Self::fill_image_outside_crop_planar(dst, rgba, crop),
1554            NV16 => {
1555                let yuyv = Self::rgba_to_yuyv(rgba);
1556                Self::fill_image_outside_crop_yuv_semiplanar(dst, yuyv[0], [yuyv[1], yuyv[3]], crop)
1557            }
1558            _ => Err(Error::Internal(format!(
1559                "Found unexpected destination {}",
1560                dst_fourcc.display()
1561            ))),
1562        }
1563    }
1564
1565    fn fill_image_outside_crop_<const N: usize>(
1566        (dst, dst_width, _dst_height): (&mut [u8], usize, usize),
1567        pix: [u8; N],
1568        crop: Rect,
1569    ) -> Result<()> {
1570        use rayon::{
1571            iter::{IntoParallelRefMutIterator, ParallelIterator},
1572            prelude::ParallelSliceMut,
1573        };
1574
1575        let s = dst.as_chunks_mut::<N>().0;
1576        // calculate the top/bottom
1577        let top_offset = (0, (crop.top * dst_width + crop.left));
1578        let bottom_offset = (
1579            ((crop.top + crop.height) * dst_width + crop.left).min(s.len()),
1580            s.len(),
1581        );
1582
1583        s[top_offset.0..top_offset.1]
1584            .par_iter_mut()
1585            .for_each(|x| *x = pix);
1586
1587        s[bottom_offset.0..bottom_offset.1]
1588            .par_iter_mut()
1589            .for_each(|x| *x = pix);
1590
1591        if dst_width == crop.width {
1592            return Ok(());
1593        }
1594
1595        // the middle part has a stride as well
1596        let middle_stride = dst_width - crop.width;
1597        let middle_offset = (
1598            (crop.top * dst_width + crop.left + crop.width),
1599            ((crop.top + crop.height) * dst_width + crop.left + crop.width).min(s.len()),
1600        );
1601
1602        s[middle_offset.0..middle_offset.1]
1603            .par_chunks_exact_mut(dst_width)
1604            .for_each(|row| {
1605                for p in &mut row[0..middle_stride] {
1606                    *p = pix;
1607                }
1608            });
1609
1610        Ok(())
1611    }
1612
1613    fn fill_image_outside_crop_planar<const N: usize>(
1614        (dst, dst_width, dst_height): (&mut [u8], usize, usize),
1615        pix: [u8; N],
1616        crop: Rect,
1617    ) -> Result<()> {
1618        use rayon::{
1619            iter::{IntoParallelRefMutIterator, ParallelIterator},
1620            prelude::ParallelSliceMut,
1621        };
1622
1623        // map.as_mut_slice().splitn_mut(n, pred)
1624        let s_rem = dst;
1625
1626        s_rem
1627            .par_chunks_exact_mut(dst_height * dst_width)
1628            .zip(pix)
1629            .for_each(|(s, p)| {
1630                let top_offset = (0, (crop.top * dst_width + crop.left));
1631                let bottom_offset = (
1632                    ((crop.top + crop.height) * dst_width + crop.left).min(s.len()),
1633                    s.len(),
1634                );
1635
1636                s[top_offset.0..top_offset.1]
1637                    .par_iter_mut()
1638                    .for_each(|x| *x = p);
1639
1640                s[bottom_offset.0..bottom_offset.1]
1641                    .par_iter_mut()
1642                    .for_each(|x| *x = p);
1643
1644                if dst_width == crop.width {
1645                    return;
1646                }
1647
1648                // the middle part has a stride as well
1649                let middle_stride = dst_width - crop.width;
1650                let middle_offset = (
1651                    (crop.top * dst_width + crop.left + crop.width),
1652                    ((crop.top + crop.height) * dst_width + crop.left + crop.width).min(s.len()),
1653                );
1654
1655                s[middle_offset.0..middle_offset.1]
1656                    .par_chunks_exact_mut(dst_width)
1657                    .for_each(|row| {
1658                        for x in &mut row[0..middle_stride] {
1659                            *x = p;
1660                        }
1661                    });
1662            });
1663        Ok(())
1664    }
1665
1666    fn fill_image_outside_crop_yuv_semiplanar(
1667        (dst, dst_width, dst_height): (&mut [u8], usize, usize),
1668        y: u8,
1669        uv: [u8; 2],
1670        mut crop: Rect,
1671    ) -> Result<()> {
1672        let (y_plane, uv_plane) = dst.split_at_mut(dst_width * dst_height);
1673        Self::fill_image_outside_crop_::<1>((y_plane, dst_width, dst_height), [y], crop)?;
1674        crop.left /= 2;
1675        crop.width /= 2;
1676        Self::fill_image_outside_crop_::<2>((uv_plane, dst_width / 2, dst_height), uv, crop)?;
1677        Ok(())
1678    }
1679
1680    fn rgba_to_rgb(rgba: [u8; 4]) -> [u8; 3] {
1681        let [r, g, b, _] = rgba;
1682        [r, g, b]
1683    }
1684
1685    fn rgba_to_grey(rgba: [u8; 4]) -> [u8; 1] {
1686        const BIAS: i32 = 20;
1687        const KR: f64 = 0.2126f64;
1688        const KB: f64 = 0.0722f64;
1689        const KG: f64 = 1.0 - KR - KB;
1690        const Y_R: i32 = (KR * (255 << BIAS) as f64 / 255.0).round() as i32;
1691        const Y_G: i32 = (KG * (255 << BIAS) as f64 / 255.0).round() as i32;
1692        const Y_B: i32 = (KB * (255 << BIAS) as f64 / 255.0).round() as i32;
1693
1694        const ROUND: i32 = 1 << (BIAS - 1);
1695
1696        let [r, g, b, _] = rgba;
1697        let y = ((Y_R * r as i32 + Y_G * g as i32 + Y_B * b as i32 + ROUND) >> BIAS) as u8;
1698        [y]
1699    }
1700
1701    fn rgba_to_yuyv(rgba: [u8; 4]) -> [u8; 4] {
1702        const KR: f64 = 0.2126f64;
1703        const KB: f64 = 0.0722f64;
1704        const KG: f64 = 1.0 - KR - KB;
1705        const BIAS: i32 = 20;
1706
1707        const Y_R: i32 = (KR * (219 << BIAS) as f64 / 255.0).round() as i32;
1708        const Y_G: i32 = (KG * (219 << BIAS) as f64 / 255.0).round() as i32;
1709        const Y_B: i32 = (KB * (219 << BIAS) as f64 / 255.0).round() as i32;
1710
1711        const U_R: i32 = (-KR / (KR + KG) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
1712        const U_G: i32 = (-KG / (KR + KG) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
1713        const U_B: i32 = (0.5_f64 * (224 << BIAS) as f64 / 255.0).ceil() as i32;
1714
1715        const V_R: i32 = (0.5_f64 * (224 << BIAS) as f64 / 255.0).ceil() as i32;
1716        const V_G: i32 = (-KG / (KG + KB) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
1717        const V_B: i32 = (-KB / (KG + KB) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
1718        const ROUND: i32 = 1 << (BIAS - 1);
1719
1720        let [r, g, b, _] = rgba;
1721        let r = r as i32;
1722        let g = g as i32;
1723        let b = b as i32;
1724        let y = (((Y_R * r + Y_G * g + Y_B * b + ROUND) >> BIAS) + 16) as u8;
1725        let u = (((U_R * r + U_G * g + U_B * b + ROUND) >> BIAS) + 128) as u8;
1726        let v = (((V_R * r + V_G * g + V_B * b + ROUND) >> BIAS) + 128) as u8;
1727
1728        [y, u, y, v]
1729    }
1730
1731    fn render_modelpack_segmentation(
1732        &mut self,
1733        dst: &TensorImage,
1734        dst_slice: &mut [u8],
1735        segmentation: &Segmentation,
1736    ) -> Result<()> {
1737        use ndarray_stats::QuantileExt;
1738
1739        let seg = &segmentation.segmentation;
1740        let [seg_height, seg_width, seg_classes] = *seg.shape() else {
1741            unreachable!("Array3 did not have [usize; 3] as shape");
1742        };
1743        let start_y = (dst.height() as f32 * segmentation.ymin).round();
1744        let end_y = (dst.height() as f32 * segmentation.ymax).round();
1745        let start_x = (dst.width() as f32 * segmentation.xmin).round();
1746        let end_x = (dst.width() as f32 * segmentation.xmax).round();
1747
1748        let scale_x = (seg_width as f32 - 1.0) / ((end_x - start_x) - 1.0);
1749        let scale_y = (seg_height as f32 - 1.0) / ((end_y - start_y) - 1.0);
1750
1751        let start_x_u = (start_x as usize).min(dst.width());
1752        let start_y_u = (start_y as usize).min(dst.height());
1753        let end_x_u = (end_x as usize).min(dst.width());
1754        let end_y_u = (end_y as usize).min(dst.height());
1755
1756        let argmax = seg.map_axis(Axis(2), |r| r.argmax().unwrap());
1757        let get_value_at_nearest = |x: f32, y: f32| -> usize {
1758            let x = x.round() as usize;
1759            let y = y.round() as usize;
1760            argmax
1761                .get([y.min(seg_height - 1), x.min(seg_width - 1)])
1762                .copied()
1763                .unwrap_or(0)
1764        };
1765
1766        for y in start_y_u..end_y_u {
1767            for x in start_x_u..end_x_u {
1768                let seg_x = (x as f32 - start_x) * scale_x;
1769                let seg_y = (y as f32 - start_y) * scale_y;
1770                let label = get_value_at_nearest(seg_x, seg_y);
1771
1772                if label == seg_classes - 1 {
1773                    continue;
1774                }
1775
1776                let color = self.colors[label % self.colors.len()];
1777
1778                let alpha = color[3] as u16;
1779
1780                let dst_index = (y * dst.row_stride()) + (x * dst.channels());
1781                for c in 0..3 {
1782                    dst_slice[dst_index + c] = ((color[c] as u16 * alpha
1783                        + dst_slice[dst_index + c] as u16 * (255 - alpha))
1784                        / 255) as u8;
1785                }
1786            }
1787        }
1788
1789        Ok(())
1790    }
1791
1792    fn render_yolo_segmentation(
1793        &mut self,
1794        dst: &TensorImage,
1795        dst_slice: &mut [u8],
1796        segmentation: &Segmentation,
1797        class: usize,
1798    ) -> Result<()> {
1799        let seg = &segmentation.segmentation;
1800        let [seg_height, seg_width, classes] = *seg.shape() else {
1801            unreachable!("Array3 did not have [usize;3] as shape");
1802        };
1803        debug_assert_eq!(classes, 1);
1804
1805        let start_y = (dst.height() as f32 * segmentation.ymin).round();
1806        let end_y = (dst.height() as f32 * segmentation.ymax).round();
1807        let start_x = (dst.width() as f32 * segmentation.xmin).round();
1808        let end_x = (dst.width() as f32 * segmentation.xmax).round();
1809
1810        let scale_x = (seg_width as f32 - 1.0) / ((end_x - start_x) - 1.0);
1811        let scale_y = (seg_height as f32 - 1.0) / ((end_y - start_y) - 1.0);
1812
1813        let start_x_u = (start_x as usize).min(dst.width());
1814        let start_y_u = (start_y as usize).min(dst.height());
1815        let end_x_u = (end_x as usize).min(dst.width());
1816        let end_y_u = (end_y as usize).min(dst.height());
1817
1818        for y in start_y_u..end_y_u {
1819            for x in start_x_u..end_x_u {
1820                let seg_x = ((x as f32 - start_x) * scale_x) as usize;
1821                let seg_y = ((y as f32 - start_y) * scale_y) as usize;
1822                let val = *seg.get([seg_y, seg_x, 0]).unwrap_or(&0);
1823
1824                if val < 127 {
1825                    continue;
1826                }
1827
1828                let color = self.colors[class % self.colors.len()];
1829
1830                let alpha = color[3] as u16;
1831
1832                let dst_index = (y * dst.row_stride()) + (x * dst.channels());
1833                for c in 0..3 {
1834                    dst_slice[dst_index + c] = ((color[c] as u16 * alpha
1835                        + dst_slice[dst_index + c] as u16 * (255 - alpha))
1836                        / 255) as u8;
1837                }
1838            }
1839        }
1840
1841        Ok(())
1842    }
1843
1844    fn render_box(
1845        &mut self,
1846        dst: &TensorImage,
1847        dst_slice: &mut [u8],
1848        detect: &[DetectBox],
1849    ) -> Result<()> {
1850        const LINE_THICKNESS: usize = 3;
1851        for d in detect {
1852            use edgefirst_decoder::BoundingBox;
1853
1854            let label = d.label;
1855            let [r, g, b, _] = self.colors[label % self.colors.len()];
1856            let bbox = d.bbox.to_canonical();
1857            let bbox = BoundingBox {
1858                xmin: bbox.xmin.clamp(0.0, 1.0),
1859                ymin: bbox.ymin.clamp(0.0, 1.0),
1860                xmax: bbox.xmax.clamp(0.0, 1.0),
1861                ymax: bbox.ymax.clamp(0.0, 1.0),
1862            };
1863            let inner = [
1864                ((dst.width() - 1) as f32 * bbox.xmin - 0.5).round() as usize,
1865                ((dst.height() - 1) as f32 * bbox.ymin - 0.5).round() as usize,
1866                ((dst.width() - 1) as f32 * bbox.xmax + 0.5).round() as usize,
1867                ((dst.height() - 1) as f32 * bbox.ymax + 0.5).round() as usize,
1868            ];
1869
1870            let outer = [
1871                inner[0].saturating_sub(LINE_THICKNESS),
1872                inner[1].saturating_sub(LINE_THICKNESS),
1873                (inner[2] + LINE_THICKNESS).min(dst.width()),
1874                (inner[3] + LINE_THICKNESS).min(dst.height()),
1875            ];
1876
1877            // top line
1878            for y in outer[1] + 1..=inner[1] {
1879                for x in outer[0] + 1..outer[2] {
1880                    let index = (y * dst.row_stride()) + (x * dst.channels());
1881                    dst_slice[index..(index + 3)].copy_from_slice(&[r, g, b]);
1882                }
1883            }
1884
1885            // left and right lines
1886            for y in inner[1]..inner[3] {
1887                for x in outer[0] + 1..=inner[0] {
1888                    let index = (y * dst.row_stride()) + (x * dst.channels());
1889                    dst_slice[index..(index + 3)].copy_from_slice(&[r, g, b]);
1890                }
1891
1892                for x in inner[2]..outer[2] {
1893                    let index = (y * dst.row_stride()) + (x * dst.channels());
1894                    dst_slice[index..(index + 3)].copy_from_slice(&[r, g, b]);
1895                }
1896            }
1897
1898            // bottom line
1899            for y in inner[3]..outer[3] {
1900                for x in outer[0] + 1..outer[2] {
1901                    let index = (y * dst.row_stride()) + (x * dst.channels());
1902                    dst_slice[index..(index + 3)].copy_from_slice(&[r, g, b]);
1903                }
1904            }
1905        }
1906        Ok(())
1907    }
1908
1909    /// Materialize segmentation masks from proto data into `Vec<Segmentation>`.
1910    ///
1911    /// This is the CPU-side decode step of the hybrid mask rendering path:
1912    /// call this to get pre-decoded masks, then pass them to
1913    /// [`draw_masks`](crate::ImageProcessorTrait::draw_masks) for GPU overlay.
1914    /// Benchmarks show this hybrid path (CPU decode + GL overlay) is faster
1915    /// than the fused GPU `draw_masks_proto` on all tested platforms.
1916    pub fn materialize_segmentations(
1917        &self,
1918        detect: &[crate::DetectBox],
1919        proto_data: &crate::ProtoData,
1920    ) -> crate::Result<Vec<edgefirst_decoder::Segmentation>> {
1921        if detect.is_empty() || proto_data.mask_coefficients.is_empty() {
1922            return Ok(Vec::new());
1923        }
1924
1925        let protos_cow = proto_data.protos.as_f32();
1926        let protos = protos_cow.as_ref();
1927        let proto_h = protos.shape()[0];
1928        let proto_w = protos.shape()[1];
1929        let num_protos = protos.shape()[2];
1930
1931        detect
1932            .iter()
1933            .zip(proto_data.mask_coefficients.iter())
1934            .map(|(det, coeff)| {
1935                // Clamp bbox to [0, 1]
1936                let xmin = det.bbox.xmin.clamp(0.0, 1.0);
1937                let ymin = det.bbox.ymin.clamp(0.0, 1.0);
1938                let xmax = det.bbox.xmax.clamp(0.0, 1.0);
1939                let ymax = det.bbox.ymax.clamp(0.0, 1.0);
1940
1941                // Map to proto-space pixel coordinates (clamp to valid range)
1942                let x0 = ((xmin * proto_w as f32) as usize).min(proto_w.saturating_sub(1));
1943                let y0 = ((ymin * proto_h as f32) as usize).min(proto_h.saturating_sub(1));
1944                let x1 = ((xmax * proto_w as f32).ceil() as usize).min(proto_w);
1945                let y1 = ((ymax * proto_h as f32).ceil() as usize).min(proto_h);
1946
1947                let roi_w = x1.saturating_sub(x0).max(1);
1948                let roi_h = y1.saturating_sub(y0).max(1);
1949
1950                // Extract proto ROI and compute mask_coeff @ protos
1951                let roi = protos.slice(ndarray::s![y0..y0 + roi_h, x0..x0 + roi_w, ..]);
1952                let coeff_arr = ndarray::Array2::from_shape_vec((1, num_protos), coeff.clone())
1953                    .map_err(|e| crate::Error::Internal(format!("mask coeff shape: {e}")))?;
1954                let protos_2d = roi
1955                    .to_shape((roi_h * roi_w, num_protos))
1956                    .map_err(|e| crate::Error::Internal(format!("proto reshape: {e}")))?
1957                    .reversed_axes();
1958                let mask = coeff_arr.dot(&protos_2d);
1959                let mask = mask
1960                    .into_shape_with_order((roi_h, roi_w, 1))
1961                    .map_err(|e| crate::Error::Internal(format!("mask reshape: {e}")))?
1962                    .mapv(|x: f32| {
1963                        let sigmoid = 1.0 / (1.0 + (-x).exp());
1964                        (sigmoid * 255.0).round() as u8
1965                    });
1966
1967                Ok(edgefirst_decoder::Segmentation {
1968                    xmin: x0 as f32 / proto_w as f32,
1969                    ymin: y0 as f32 / proto_h as f32,
1970                    xmax: x1 as f32 / proto_w as f32,
1971                    ymax: y1 as f32 / proto_h as f32,
1972                    segmentation: mask,
1973                })
1974            })
1975            .collect::<crate::Result<Vec<_>>>()
1976    }
1977
1978    /// Renders per-instance grayscale masks from raw prototype data at full
1979    /// output resolution. Used internally by [`decode_masks_atlas`] to generate
1980    /// per-detection mask crops that are then packed into the atlas.
1981    fn render_masks_from_protos(
1982        &mut self,
1983        detect: &[crate::DetectBox],
1984        proto_data: crate::ProtoData,
1985        output_width: usize,
1986        output_height: usize,
1987    ) -> Result<Vec<crate::MaskResult>> {
1988        use crate::FunctionTimer;
1989
1990        let _timer = FunctionTimer::new("CPUProcessor::render_masks_from_protos");
1991
1992        if detect.is_empty() || proto_data.mask_coefficients.is_empty() {
1993            return Ok(Vec::new());
1994        }
1995
1996        let protos_cow = proto_data.protos.as_f32();
1997        let protos = protos_cow.as_ref();
1998        let proto_h = protos.shape()[0];
1999        let proto_w = protos.shape()[1];
2000        let num_protos = protos.shape()[2];
2001
2002        let mut results = Vec::with_capacity(detect.len());
2003
2004        for (det, coeff) in detect.iter().zip(proto_data.mask_coefficients.iter()) {
2005            let start_x = (output_width as f32 * det.bbox.xmin).round() as usize;
2006            let start_y = (output_height as f32 * det.bbox.ymin).round() as usize;
2007            // Use span-based rounding to match the numpy reference convention.
2008            let bbox_w = ((det.bbox.xmax - det.bbox.xmin) * output_width as f32)
2009                .round()
2010                .max(1.0) as usize;
2011            let bbox_h = ((det.bbox.ymax - det.bbox.ymin) * output_height as f32)
2012                .round()
2013                .max(1.0) as usize;
2014            let bbox_w = bbox_w.min(output_width.saturating_sub(start_x));
2015            let bbox_h = bbox_h.min(output_height.saturating_sub(start_y));
2016
2017            let mut pixels = vec![0u8; bbox_w * bbox_h];
2018
2019            for row in 0..bbox_h {
2020                let y = start_y + row;
2021                for col in 0..bbox_w {
2022                    let x = start_x + col;
2023                    let px = (x as f32 / output_width as f32) * proto_w as f32 - 0.5;
2024                    let py = (y as f32 / output_height as f32) * proto_h as f32 - 0.5;
2025                    let acc = bilinear_dot(protos, coeff, num_protos, px, py, proto_w, proto_h);
2026                    let mask = 1.0 / (1.0 + (-acc).exp());
2027                    pixels[row * bbox_w + col] = if mask > 0.5 { 255 } else { 0 };
2028                }
2029            }
2030
2031            results.push(crate::MaskResult {
2032                x: start_x,
2033                y: start_y,
2034                w: bbox_w,
2035                h: bbox_h,
2036                pixels,
2037            });
2038        }
2039
2040        Ok(results)
2041    }
2042}
2043
2044impl ImageProcessorTrait for CPUProcessor {
2045    fn convert(
2046        &mut self,
2047        src: &TensorImage,
2048        dst: &mut TensorImage,
2049        rotation: Rotation,
2050        flip: Flip,
2051        crop: Crop,
2052    ) -> Result<()> {
2053        // Int8 formats: convert directly into dst as uint8 (layouts are
2054        // identical), then XOR 0x80 in-place. Avoids a temporary allocation.
2055        if fourcc_is_int8(dst.fourcc()) {
2056            let int8_fourcc = dst.fourcc();
2057            dst.set_fourcc(fourcc_uint8_equivalent(int8_fourcc));
2058            self.convert(src, dst, rotation, flip, crop)?;
2059            dst.set_fourcc(int8_fourcc);
2060            let mut dst_map = dst.tensor().map()?;
2061            for byte in dst_map.iter_mut() {
2062                *byte ^= 0x80;
2063            }
2064            return Ok(());
2065        }
2066
2067        crop.check_crop(src, dst)?;
2068        // supported destinations and srcs:
2069        let intermediate = match (src.fourcc(), dst.fourcc()) {
2070            (NV12, RGB) => RGB,
2071            (NV12, RGBA) => RGBA,
2072            (NV12, GREY) => GREY,
2073            (NV12, YUYV) => RGBA, // RGBA intermediary for YUYV dest resize/convert/rotation/flip
2074            (NV12, NV16) => RGBA, // RGBA intermediary for YUYV dest resize/convert/rotation/flip
2075            (NV12, PLANAR_RGB) => RGB,
2076            (NV12, PLANAR_RGBA) => RGBA,
2077            (YUYV, RGB) => RGB,
2078            (YUYV, RGBA) => RGBA,
2079            (YUYV, GREY) => GREY,
2080            (YUYV, YUYV) => RGBA, // RGBA intermediary for YUYV dest resize/convert/rotation/flip
2081            (YUYV, PLANAR_RGB) => RGB,
2082            (YUYV, PLANAR_RGBA) => RGBA,
2083            (YUYV, NV16) => RGBA,
2084            (VYUY, RGB) => RGB,
2085            (VYUY, RGBA) => RGBA,
2086            (VYUY, GREY) => GREY,
2087            (VYUY, VYUY) => RGBA, // RGBA intermediary for VYUY dest resize/convert/rotation/flip
2088            (VYUY, PLANAR_RGB) => RGB,
2089            (VYUY, PLANAR_RGBA) => RGBA,
2090            (VYUY, NV16) => RGBA,
2091            (RGBA, RGB) => RGBA,
2092            (RGBA, RGBA) => RGBA,
2093            (RGBA, GREY) => GREY,
2094            (RGBA, YUYV) => RGBA, // RGBA intermediary for YUYV dest resize/convert/rotation/flip
2095            (RGBA, PLANAR_RGB) => RGBA,
2096            (RGBA, PLANAR_RGBA) => RGBA,
2097            (RGBA, NV16) => RGBA,
2098            (RGB, RGB) => RGB,
2099            (RGB, RGBA) => RGB,
2100            (RGB, GREY) => GREY,
2101            (RGB, YUYV) => RGB, // RGB intermediary for YUYV dest resize/convert/rotation/flip
2102            (RGB, PLANAR_RGB) => RGB,
2103            (RGB, PLANAR_RGBA) => RGB,
2104            (RGB, NV16) => RGB,
2105            (GREY, RGB) => RGB,
2106            (GREY, RGBA) => RGBA,
2107            (GREY, GREY) => GREY,
2108            (GREY, YUYV) => GREY,
2109            (GREY, PLANAR_RGB) => GREY,
2110            (GREY, PLANAR_RGBA) => GREY,
2111            (GREY, NV16) => GREY,
2112            (NV12, BGRA) => RGBA,
2113            (YUYV, BGRA) => RGBA,
2114            (VYUY, BGRA) => RGBA,
2115            (RGBA, BGRA) => RGBA,
2116            (RGB, BGRA) => RGB,
2117            (GREY, BGRA) => GREY,
2118            (BGRA, BGRA) => BGRA,
2119            (s, d) => {
2120                return Err(Error::NotSupported(format!(
2121                    "Conversion from {} to {}",
2122                    s.display(),
2123                    d.display()
2124                )));
2125            }
2126        };
2127
2128        // let crop = crop.src_rect;
2129
2130        let need_resize_flip_rotation = rotation != Rotation::None
2131            || flip != Flip::None
2132            || src.width() != dst.width()
2133            || src.height() != dst.height()
2134            || crop.src_rect.is_some_and(|crop| {
2135                crop != Rect {
2136                    left: 0,
2137                    top: 0,
2138                    width: src.width(),
2139                    height: src.height(),
2140                }
2141            })
2142            || crop.dst_rect.is_some_and(|crop| {
2143                crop != Rect {
2144                    left: 0,
2145                    top: 0,
2146                    width: dst.width(),
2147                    height: dst.height(),
2148                }
2149            });
2150
2151        // check if a direct conversion can be done
2152        if !need_resize_flip_rotation && Self::support_conversion(src.fourcc(), dst.fourcc()) {
2153            return Self::convert_format(src, dst);
2154        };
2155
2156        // any extra checks
2157        if dst.fourcc() == YUYV && !dst.width().is_multiple_of(2) {
2158            return Err(Error::NotSupported(format!(
2159                "{} destination must have width divisible by 2",
2160                dst.fourcc().display(),
2161            )));
2162        }
2163
2164        // create tmp buffer
2165        let mut tmp_buffer;
2166        let tmp;
2167        if intermediate != src.fourcc() {
2168            tmp_buffer = TensorImage::new(
2169                src.width(),
2170                src.height(),
2171                intermediate,
2172                Some(edgefirst_tensor::TensorMemory::Mem),
2173            )?;
2174
2175            Self::convert_format(src, &mut tmp_buffer)?;
2176            tmp = &tmp_buffer;
2177        } else {
2178            tmp = src;
2179        }
2180
2181        // format must be RGB/RGBA/GREY
2182        matches!(tmp.fourcc(), RGB | RGBA | GREY);
2183        if tmp.fourcc() == dst.fourcc() {
2184            self.resize_flip_rotate(tmp, dst, rotation, flip, crop)?;
2185        } else if !need_resize_flip_rotation {
2186            Self::convert_format(tmp, dst)?;
2187        } else {
2188            let mut tmp2 = TensorImage::new(
2189                dst.width(),
2190                dst.height(),
2191                tmp.fourcc(),
2192                Some(edgefirst_tensor::TensorMemory::Mem),
2193            )?;
2194            if crop.dst_rect.is_some_and(|crop| {
2195                crop != Rect {
2196                    left: 0,
2197                    top: 0,
2198                    width: dst.width(),
2199                    height: dst.height(),
2200                }
2201            }) && crop.dst_color.is_none()
2202            {
2203                // convert the dst into tmp2 when there is a dst crop
2204                // TODO: this could be optimized by changing convert_format to take a
2205                // destination crop?
2206
2207                Self::convert_format(dst, &mut tmp2)?;
2208            }
2209            self.resize_flip_rotate(tmp, &mut tmp2, rotation, flip, crop)?;
2210            Self::convert_format(&tmp2, dst)?;
2211        }
2212        if let (Some(dst_rect), Some(dst_color)) = (crop.dst_rect, crop.dst_color) {
2213            let full_rect = Rect {
2214                left: 0,
2215                top: 0,
2216                width: dst.width(),
2217                height: dst.height(),
2218            };
2219            if dst_rect != full_rect {
2220                Self::fill_image_outside_crop(dst, dst_color, dst_rect)?;
2221            }
2222        }
2223
2224        Ok(())
2225    }
2226
2227    fn convert_ref(
2228        &mut self,
2229        src: &TensorImage,
2230        dst: &mut TensorImageRef<'_>,
2231        rotation: Rotation,
2232        flip: Flip,
2233        crop: Crop,
2234    ) -> Result<()> {
2235        crop.check_crop_ref(src, dst)?;
2236
2237        // Determine intermediate format needed for conversion
2238        let intermediate = match (src.fourcc(), dst.fourcc()) {
2239            (NV12, RGB) => RGB,
2240            (NV12, RGBA) => RGBA,
2241            (NV12, GREY) => GREY,
2242            (NV12, PLANAR_RGB) => RGB,
2243            (NV12, PLANAR_RGBA) => RGBA,
2244            (YUYV, RGB) => RGB,
2245            (YUYV, RGBA) => RGBA,
2246            (YUYV, GREY) => GREY,
2247            (YUYV, PLANAR_RGB) => RGB,
2248            (YUYV, PLANAR_RGBA) => RGBA,
2249            (VYUY, RGB) => RGB,
2250            (VYUY, RGBA) => RGBA,
2251            (VYUY, GREY) => GREY,
2252            (VYUY, PLANAR_RGB) => RGB,
2253            (VYUY, PLANAR_RGBA) => RGBA,
2254            (RGBA, RGB) => RGBA,
2255            (RGBA, RGBA) => RGBA,
2256            (RGBA, GREY) => GREY,
2257            (RGBA, PLANAR_RGB) => RGBA,
2258            (RGBA, PLANAR_RGBA) => RGBA,
2259            (RGB, RGB) => RGB,
2260            (RGB, RGBA) => RGB,
2261            (RGB, GREY) => GREY,
2262            (RGB, PLANAR_RGB) => RGB,
2263            (RGB, PLANAR_RGBA) => RGB,
2264            (GREY, RGB) => RGB,
2265            (GREY, RGBA) => RGBA,
2266            (GREY, GREY) => GREY,
2267            (GREY, PLANAR_RGB) => GREY,
2268            (GREY, PLANAR_RGBA) => GREY,
2269            (s, d) => {
2270                return Err(Error::NotSupported(format!(
2271                    "Conversion from {} to {}",
2272                    s.display(),
2273                    d.display()
2274                )));
2275            }
2276        };
2277
2278        let need_resize_flip_rotation = rotation != Rotation::None
2279            || flip != Flip::None
2280            || src.width() != dst.width()
2281            || src.height() != dst.height()
2282            || crop.src_rect.is_some_and(|crop| {
2283                crop != Rect {
2284                    left: 0,
2285                    top: 0,
2286                    width: src.width(),
2287                    height: src.height(),
2288                }
2289            })
2290            || crop.dst_rect.is_some_and(|crop| {
2291                crop != Rect {
2292                    left: 0,
2293                    top: 0,
2294                    width: dst.width(),
2295                    height: dst.height(),
2296                }
2297            });
2298
2299        // Simple case: no resize/flip/rotation needed
2300        if !need_resize_flip_rotation {
2301            // Try direct generic conversion (zero-copy path)
2302            if let Ok(()) = Self::convert_format_generic(src, dst) {
2303                return Ok(());
2304            }
2305        }
2306
2307        // Complex case: need intermediate buffers
2308        // First, convert source to intermediate format if needed
2309        let mut tmp_buffer;
2310        let tmp: &TensorImage;
2311        if intermediate != src.fourcc() {
2312            tmp_buffer = TensorImage::new(
2313                src.width(),
2314                src.height(),
2315                intermediate,
2316                Some(edgefirst_tensor::TensorMemory::Mem),
2317            )?;
2318            Self::convert_format(src, &mut tmp_buffer)?;
2319            tmp = &tmp_buffer;
2320        } else {
2321            tmp = src;
2322        }
2323
2324        // Process resize/flip/rotation if needed
2325        if need_resize_flip_rotation {
2326            // Create intermediate buffer for resize output
2327            let mut tmp2 = TensorImage::new(
2328                dst.width(),
2329                dst.height(),
2330                tmp.fourcc(),
2331                Some(edgefirst_tensor::TensorMemory::Mem),
2332            )?;
2333            self.resize_flip_rotate(tmp, &mut tmp2, rotation, flip, crop)?;
2334
2335            // Final conversion to destination (zero-copy into dst)
2336            Self::convert_format_generic(&tmp2, dst)?;
2337        } else {
2338            // Direct conversion (already checked above, but handle edge cases)
2339            Self::convert_format_generic(tmp, dst)?;
2340        }
2341
2342        // Handle destination crop fill if needed
2343        if let (Some(dst_rect), Some(dst_color)) = (crop.dst_rect, crop.dst_color) {
2344            let full_rect = Rect {
2345                left: 0,
2346                top: 0,
2347                width: dst.width(),
2348                height: dst.height(),
2349            };
2350            if dst_rect != full_rect {
2351                Self::fill_image_outside_crop_generic(dst, dst_color, dst_rect)?;
2352            }
2353        }
2354
2355        Ok(())
2356    }
2357
2358    fn draw_masks(
2359        &mut self,
2360        dst: &mut TensorImage,
2361        detect: &[DetectBox],
2362        segmentation: &[Segmentation],
2363    ) -> Result<()> {
2364        if !matches!(dst.fourcc(), RGBA | RGB) {
2365            return Err(crate::Error::NotSupported(
2366                "CPU image rendering only supports RGBA or RGB images".to_string(),
2367            ));
2368        }
2369
2370        let _timer = FunctionTimer::new("CPUProcessor::draw_masks");
2371
2372        let mut map = dst.tensor.map()?;
2373        let dst_slice = map.as_mut_slice();
2374
2375        self.render_box(dst, dst_slice, detect)?;
2376
2377        if segmentation.is_empty() {
2378            return Ok(());
2379        }
2380
2381        // Semantic segmentation (e.g. ModelPack) has C > 1 (multi-class),
2382        // instance segmentation (e.g. YOLO) has C = 1 (binary per-instance).
2383        let is_semantic = segmentation[0].segmentation.shape()[2] > 1;
2384
2385        if is_semantic {
2386            self.render_modelpack_segmentation(dst, dst_slice, &segmentation[0])?;
2387        } else {
2388            for (seg, detect) in segmentation.iter().zip(detect) {
2389                self.render_yolo_segmentation(dst, dst_slice, seg, detect.label)?;
2390            }
2391        }
2392
2393        Ok(())
2394    }
2395
2396    fn draw_masks_proto(
2397        &mut self,
2398        dst: &mut TensorImage,
2399        detect: &[DetectBox],
2400        proto_data: &ProtoData,
2401    ) -> Result<()> {
2402        if !matches!(dst.fourcc(), RGBA | RGB) {
2403            return Err(crate::Error::NotSupported(
2404                "CPU image rendering only supports RGBA or RGB images".to_string(),
2405            ));
2406        }
2407
2408        let _timer = FunctionTimer::new("CPUProcessor::draw_masks_proto");
2409
2410        let mut map = dst.tensor.map()?;
2411        let dst_slice = map.as_mut_slice();
2412
2413        self.render_box(dst, dst_slice, detect)?;
2414
2415        if detect.is_empty() || proto_data.mask_coefficients.is_empty() {
2416            return Ok(());
2417        }
2418
2419        let protos_cow = proto_data.protos.as_f32();
2420        let protos = protos_cow.as_ref();
2421        let proto_h = protos.shape()[0];
2422        let proto_w = protos.shape()[1];
2423        let num_protos = protos.shape()[2];
2424        let dst_w = dst.width();
2425        let dst_h = dst.height();
2426        let row_stride = dst.row_stride();
2427        let channels = dst.channels();
2428
2429        for (det, coeff) in detect.iter().zip(proto_data.mask_coefficients.iter()) {
2430            let color = self.colors[det.label % self.colors.len()];
2431            let alpha = color[3] as u16;
2432
2433            // Pixel bounds of the detection in dst image space
2434            let start_x = (dst_w as f32 * det.bbox.xmin).round() as usize;
2435            let start_y = (dst_h as f32 * det.bbox.ymin).round() as usize;
2436            let end_x = ((dst_w as f32 * det.bbox.xmax).round() as usize).min(dst_w);
2437            let end_y = ((dst_h as f32 * det.bbox.ymax).round() as usize).min(dst_h);
2438
2439            for y in start_y..end_y {
2440                for x in start_x..end_x {
2441                    // Map pixel (x, y) to proto space
2442                    let px = (x as f32 / dst_w as f32) * proto_w as f32 - 0.5;
2443                    let py = (y as f32 / dst_h as f32) * proto_h as f32 - 0.5;
2444
2445                    // Bilinear interpolation + dot product
2446                    let acc = bilinear_dot(protos, coeff, num_protos, px, py, proto_w, proto_h);
2447
2448                    // Sigmoid threshold
2449                    let mask = 1.0 / (1.0 + (-acc).exp());
2450                    if mask < 0.5 {
2451                        continue;
2452                    }
2453
2454                    // Alpha blend
2455                    let dst_index = y * row_stride + x * channels;
2456                    for c in 0..3 {
2457                        dst_slice[dst_index + c] = ((color[c] as u16 * alpha
2458                            + dst_slice[dst_index + c] as u16 * (255 - alpha))
2459                            / 255) as u8;
2460                    }
2461                }
2462            }
2463        }
2464
2465        Ok(())
2466    }
2467
2468    fn decode_masks_atlas(
2469        &mut self,
2470        detect: &[crate::DetectBox],
2471        proto_data: crate::ProtoData,
2472        output_width: usize,
2473        output_height: usize,
2474    ) -> Result<(Vec<u8>, Vec<crate::MaskRegion>)> {
2475        use crate::FunctionTimer;
2476
2477        let _timer = FunctionTimer::new("CPUProcessor::decode_masks_atlas");
2478
2479        let padding = 4usize;
2480
2481        // Render per-detection masks via existing path
2482        let mask_results =
2483            self.render_masks_from_protos(detect, proto_data, output_width, output_height)?;
2484
2485        // Pack into compact atlas: each strip is padded bbox height
2486        let ow = output_width as i32;
2487        let oh = output_height as i32;
2488        let pad = padding as i32;
2489
2490        let mut regions = Vec::with_capacity(mask_results.len());
2491        let mut atlas_y = 0usize;
2492
2493        // Pre-compute regions
2494        for mr in &mask_results {
2495            let bx = mr.x as i32;
2496            let by = mr.y as i32;
2497            let bw = mr.w as i32;
2498            let bh = mr.h as i32;
2499            let padded_x = (bx - pad).max(0);
2500            let padded_y = (by - pad).max(0);
2501            let padded_w = ((bx + bw + pad).min(ow) - padded_x).max(1);
2502            let padded_h = ((by + bh + pad).min(oh) - padded_y).max(1);
2503            regions.push(crate::MaskRegion {
2504                atlas_y_offset: atlas_y,
2505                padded_x: padded_x as usize,
2506                padded_y: padded_y as usize,
2507                padded_w: padded_w as usize,
2508                padded_h: padded_h as usize,
2509                bbox_x: mr.x,
2510                bbox_y: mr.y,
2511                bbox_w: mr.w,
2512                bbox_h: mr.h,
2513            });
2514            atlas_y += padded_h as usize;
2515        }
2516
2517        let atlas_height = atlas_y;
2518        let mut atlas = vec![0u8; output_width * atlas_height];
2519
2520        for (mr, region) in mask_results.iter().zip(regions.iter()) {
2521            // Copy mask pixels into the atlas at the correct position
2522            for row in 0..mr.h {
2523                let dst_row = region.atlas_y_offset + (mr.y - region.padded_y) + row;
2524                let dst_start = dst_row * output_width + mr.x;
2525                let src_start = row * mr.w;
2526                if dst_start + mr.w <= atlas.len() && src_start + mr.w <= mr.pixels.len() {
2527                    atlas[dst_start..dst_start + mr.w]
2528                        .copy_from_slice(&mr.pixels[src_start..src_start + mr.w]);
2529                }
2530            }
2531        }
2532
2533        Ok((atlas, regions))
2534    }
2535
2536    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()> {
2537        for (c, new_c) in self.colors.iter_mut().zip(colors.iter()) {
2538            *c = *new_c;
2539        }
2540        Ok(())
2541    }
2542}
2543
2544/// Bilinear interpolation of proto values at `(px, py)` combined with dot
2545/// product against `coeff`. Returns the scalar accumulator before sigmoid.
2546///
2547/// Samples the four nearest proto texels, weights by bilinear coefficients,
2548/// and simultaneously computes the dot product with the mask coefficients.
2549#[inline]
2550fn bilinear_dot(
2551    protos: &ndarray::Array3<f32>,
2552    coeff: &[f32],
2553    num_protos: usize,
2554    px: f32,
2555    py: f32,
2556    proto_w: usize,
2557    proto_h: usize,
2558) -> f32 {
2559    let x0 = (px.floor() as isize).clamp(0, proto_w as isize - 1) as usize;
2560    let y0 = (py.floor() as isize).clamp(0, proto_h as isize - 1) as usize;
2561    let x1 = (x0 + 1).min(proto_w - 1);
2562    let y1 = (y0 + 1).min(proto_h - 1);
2563
2564    let fx = px - px.floor();
2565    let fy = py - py.floor();
2566
2567    let w00 = (1.0 - fx) * (1.0 - fy);
2568    let w10 = fx * (1.0 - fy);
2569    let w01 = (1.0 - fx) * fy;
2570    let w11 = fx * fy;
2571
2572    let mut acc = 0.0f32;
2573    for p in 0..num_protos {
2574        let val = w00 * protos[[y0, x0, p]]
2575            + w10 * protos[[y0, x1, p]]
2576            + w01 * protos[[y1, x0, p]]
2577            + w11 * protos[[y1, x1, p]];
2578        acc += coeff[p] * val;
2579    }
2580    acc
2581}
2582
2583#[cfg(test)]
2584#[cfg_attr(coverage_nightly, coverage(off))]
2585mod cpu_tests {
2586
2587    use super::*;
2588    use crate::{CPUProcessor, Rotation, TensorImageRef, BGRA, RGBA};
2589    use edgefirst_tensor::{Tensor, TensorMapTrait, TensorMemory};
2590    use image::buffer::ConvertBuffer;
2591
2592    macro_rules! function {
2593        () => {{
2594            fn f() {}
2595            fn type_name_of<T>(_: T) -> &'static str {
2596                std::any::type_name::<T>()
2597            }
2598            let name = type_name_of(f);
2599
2600            // Find and cut the rest of the path
2601            match &name[..name.len() - 3].rfind(':') {
2602                Some(pos) => &name[pos + 1..name.len() - 3],
2603                None => &name[..name.len() - 3],
2604            }
2605        }};
2606    }
2607
2608    fn compare_images_convert_to_grey(
2609        img1: &TensorImage,
2610        img2: &TensorImage,
2611        threshold: f64,
2612        name: &str,
2613    ) {
2614        assert_eq!(img1.height(), img2.height(), "Heights differ");
2615        assert_eq!(img1.width(), img2.width(), "Widths differ");
2616
2617        let mut img_rgb1 = TensorImage::new(img1.width(), img1.height(), RGBA, None).unwrap();
2618        let mut img_rgb2 = TensorImage::new(img1.width(), img1.height(), RGBA, None).unwrap();
2619        CPUProcessor::convert_format(img1, &mut img_rgb1).unwrap();
2620        CPUProcessor::convert_format(img2, &mut img_rgb2).unwrap();
2621
2622        let image1 = image::RgbaImage::from_vec(
2623            img_rgb1.width() as u32,
2624            img_rgb1.height() as u32,
2625            img_rgb1.tensor().map().unwrap().to_vec(),
2626        )
2627        .unwrap();
2628
2629        let image2 = image::RgbaImage::from_vec(
2630            img_rgb2.width() as u32,
2631            img_rgb2.height() as u32,
2632            img_rgb2.tensor().map().unwrap().to_vec(),
2633        )
2634        .unwrap();
2635
2636        let similarity = image_compare::gray_similarity_structure(
2637            &image_compare::Algorithm::RootMeanSquared,
2638            &image1.convert(),
2639            &image2.convert(),
2640        )
2641        .expect("Image Comparison failed");
2642        if similarity.score < threshold {
2643            // image1.save(format!("{name}_1.png"));
2644            // image2.save(format!("{name}_2.png"));
2645            similarity
2646                .image
2647                .to_color_map()
2648                .save(format!("{name}.png"))
2649                .unwrap();
2650            panic!(
2651                "{name}: converted image and target image have similarity score too low: {} < {}",
2652                similarity.score, threshold
2653            )
2654        }
2655    }
2656
2657    fn compare_images_convert_to_rgb(
2658        img1: &TensorImage,
2659        img2: &TensorImage,
2660        threshold: f64,
2661        name: &str,
2662    ) {
2663        assert_eq!(img1.height(), img2.height(), "Heights differ");
2664        assert_eq!(img1.width(), img2.width(), "Widths differ");
2665
2666        let mut img_rgb1 = TensorImage::new(img1.width(), img1.height(), RGB, None).unwrap();
2667        let mut img_rgb2 = TensorImage::new(img1.width(), img1.height(), RGB, None).unwrap();
2668        CPUProcessor::convert_format(img1, &mut img_rgb1).unwrap();
2669        CPUProcessor::convert_format(img2, &mut img_rgb2).unwrap();
2670
2671        let image1 = image::RgbImage::from_vec(
2672            img_rgb1.width() as u32,
2673            img_rgb1.height() as u32,
2674            img_rgb1.tensor().map().unwrap().to_vec(),
2675        )
2676        .unwrap();
2677
2678        let image2 = image::RgbImage::from_vec(
2679            img_rgb2.width() as u32,
2680            img_rgb2.height() as u32,
2681            img_rgb2.tensor().map().unwrap().to_vec(),
2682        )
2683        .unwrap();
2684
2685        let similarity = image_compare::rgb_similarity_structure(
2686            &image_compare::Algorithm::RootMeanSquared,
2687            &image1,
2688            &image2,
2689        )
2690        .expect("Image Comparison failed");
2691        if similarity.score < threshold {
2692            // image1.save(format!("{name}_1.png"));
2693            // image2.save(format!("{name}_2.png"));
2694            similarity
2695                .image
2696                .to_color_map()
2697                .save(format!("{name}.png"))
2698                .unwrap();
2699            panic!(
2700                "{name}: converted image and target image have similarity score too low: {} < {}",
2701                similarity.score, threshold
2702            )
2703        }
2704    }
2705
2706    fn load_bytes_to_tensor(
2707        width: usize,
2708        height: usize,
2709        fourcc: FourCharCode,
2710        memory: Option<TensorMemory>,
2711        bytes: &[u8],
2712    ) -> Result<TensorImage, Error> {
2713        log::debug!("Current function is {}", function!());
2714        let src = TensorImage::new(width, height, fourcc, memory)?;
2715        src.tensor().map()?.as_mut_slice()[0..bytes.len()].copy_from_slice(bytes);
2716        Ok(src)
2717    }
2718
2719    macro_rules! generate_conversion_tests {
2720        (
2721        $src_fmt:ident,  $src_file:expr, $dst_fmt:ident, $dst_file:expr
2722    ) => {{
2723            // Load source
2724            let src = load_bytes_to_tensor(
2725                1280,
2726                720,
2727                $src_fmt,
2728                None,
2729                include_bytes!(concat!("../../../testdata/", $src_file)),
2730            )?;
2731
2732            // Load destination reference
2733            let dst = load_bytes_to_tensor(
2734                1280,
2735                720,
2736                $dst_fmt,
2737                None,
2738                include_bytes!(concat!("../../../testdata/", $dst_file)),
2739            )?;
2740
2741            let mut converter = CPUProcessor::default();
2742
2743            let mut converted = TensorImage::new(src.width(), src.height(), dst.fourcc(), None)?;
2744
2745            converter.convert(
2746                &src,
2747                &mut converted,
2748                Rotation::None,
2749                Flip::None,
2750                Crop::default(),
2751            )?;
2752
2753            compare_images_convert_to_rgb(&dst, &converted, 0.99, function!());
2754
2755            Ok(())
2756        }};
2757    }
2758
2759    macro_rules! generate_conversion_tests_greyscale {
2760        (
2761        $src_fmt:ident,  $src_file:expr, $dst_fmt:ident, $dst_file:expr
2762    ) => {{
2763            // Load source
2764            let src = load_bytes_to_tensor(
2765                1280,
2766                720,
2767                $src_fmt,
2768                None,
2769                include_bytes!(concat!("../../../testdata/", $src_file)),
2770            )?;
2771
2772            // Load destination reference
2773            let dst = load_bytes_to_tensor(
2774                1280,
2775                720,
2776                $dst_fmt,
2777                None,
2778                include_bytes!(concat!("../../../testdata/", $dst_file)),
2779            )?;
2780
2781            let mut converter = CPUProcessor::default();
2782
2783            let mut converted = TensorImage::new(src.width(), src.height(), dst.fourcc(), None)?;
2784
2785            converter.convert(
2786                &src,
2787                &mut converted,
2788                Rotation::None,
2789                Flip::None,
2790                Crop::default(),
2791            )?;
2792
2793            compare_images_convert_to_grey(&dst, &converted, 0.985, function!());
2794
2795            Ok(())
2796        }};
2797    }
2798
2799    // let mut dsts = [yuyv, rgb, rgba, grey, nv16, planar_rgb, planar_rgba];
2800
2801    #[test]
2802    fn test_cpu_yuyv_to_yuyv() -> Result<()> {
2803        generate_conversion_tests!(YUYV, "camera720p.yuyv", YUYV, "camera720p.yuyv")
2804    }
2805
2806    #[test]
2807    fn test_cpu_yuyv_to_rgb() -> Result<()> {
2808        generate_conversion_tests!(YUYV, "camera720p.yuyv", RGB, "camera720p.rgb")
2809    }
2810
2811    #[test]
2812    fn test_cpu_yuyv_to_rgba() -> Result<()> {
2813        generate_conversion_tests!(YUYV, "camera720p.yuyv", RGBA, "camera720p.rgba")
2814    }
2815
2816    #[test]
2817    fn test_cpu_yuyv_to_grey() -> Result<()> {
2818        generate_conversion_tests!(YUYV, "camera720p.yuyv", GREY, "camera720p.y800")
2819    }
2820
2821    #[test]
2822    fn test_cpu_yuyv_to_nv16() -> Result<()> {
2823        generate_conversion_tests!(YUYV, "camera720p.yuyv", NV16, "camera720p.nv16")
2824    }
2825
2826    #[test]
2827    fn test_cpu_yuyv_to_planar_rgb() -> Result<()> {
2828        generate_conversion_tests!(YUYV, "camera720p.yuyv", PLANAR_RGB, "camera720p.8bps")
2829    }
2830
2831    #[test]
2832    fn test_cpu_yuyv_to_planar_rgba() -> Result<()> {
2833        generate_conversion_tests!(YUYV, "camera720p.yuyv", PLANAR_RGBA, "camera720p.8bpa")
2834    }
2835
2836    #[test]
2837    fn test_cpu_rgb_to_yuyv() -> Result<()> {
2838        generate_conversion_tests!(RGB, "camera720p.rgb", YUYV, "camera720p.yuyv")
2839    }
2840
2841    #[test]
2842    fn test_cpu_rgb_to_rgb() -> Result<()> {
2843        generate_conversion_tests!(RGB, "camera720p.rgb", RGB, "camera720p.rgb")
2844    }
2845
2846    #[test]
2847    fn test_cpu_rgb_to_rgba() -> Result<()> {
2848        generate_conversion_tests!(RGB, "camera720p.rgb", RGBA, "camera720p.rgba")
2849    }
2850
2851    #[test]
2852    fn test_cpu_rgb_to_grey() -> Result<()> {
2853        generate_conversion_tests!(RGB, "camera720p.rgb", GREY, "camera720p.y800")
2854    }
2855
2856    #[test]
2857    fn test_cpu_rgb_to_nv16() -> Result<()> {
2858        generate_conversion_tests!(RGB, "camera720p.rgb", NV16, "camera720p.nv16")
2859    }
2860
2861    #[test]
2862    fn test_cpu_rgb_to_planar_rgb() -> Result<()> {
2863        generate_conversion_tests!(RGB, "camera720p.rgb", PLANAR_RGB, "camera720p.8bps")
2864    }
2865
2866    #[test]
2867    fn test_cpu_rgb_to_planar_rgba() -> Result<()> {
2868        generate_conversion_tests!(RGB, "camera720p.rgb", PLANAR_RGBA, "camera720p.8bpa")
2869    }
2870
2871    #[test]
2872    fn test_cpu_rgba_to_yuyv() -> Result<()> {
2873        generate_conversion_tests!(RGBA, "camera720p.rgba", YUYV, "camera720p.yuyv")
2874    }
2875
2876    #[test]
2877    fn test_cpu_rgba_to_rgb() -> Result<()> {
2878        generate_conversion_tests!(RGBA, "camera720p.rgba", RGB, "camera720p.rgb")
2879    }
2880
2881    #[test]
2882    fn test_cpu_rgba_to_rgba() -> Result<()> {
2883        generate_conversion_tests!(RGBA, "camera720p.rgba", RGBA, "camera720p.rgba")
2884    }
2885
2886    #[test]
2887    fn test_cpu_rgba_to_grey() -> Result<()> {
2888        generate_conversion_tests!(RGBA, "camera720p.rgba", GREY, "camera720p.y800")
2889    }
2890
2891    #[test]
2892    fn test_cpu_rgba_to_nv16() -> Result<()> {
2893        generate_conversion_tests!(RGBA, "camera720p.rgba", NV16, "camera720p.nv16")
2894    }
2895
2896    #[test]
2897    fn test_cpu_rgba_to_planar_rgb() -> Result<()> {
2898        generate_conversion_tests!(RGBA, "camera720p.rgba", PLANAR_RGB, "camera720p.8bps")
2899    }
2900
2901    #[test]
2902    fn test_cpu_rgba_to_planar_rgba() -> Result<()> {
2903        generate_conversion_tests!(RGBA, "camera720p.rgba", PLANAR_RGBA, "camera720p.8bpa")
2904    }
2905
2906    #[test]
2907    fn test_cpu_nv12_to_rgb() -> Result<()> {
2908        generate_conversion_tests!(NV12, "camera720p.nv12", RGB, "camera720p.rgb")
2909    }
2910
2911    #[test]
2912    fn test_cpu_nv12_to_yuyv() -> Result<()> {
2913        generate_conversion_tests!(NV12, "camera720p.nv12", YUYV, "camera720p.yuyv")
2914    }
2915
2916    #[test]
2917    fn test_cpu_nv12_to_rgba() -> Result<()> {
2918        generate_conversion_tests!(NV12, "camera720p.nv12", RGBA, "camera720p.rgba")
2919    }
2920
2921    #[test]
2922    fn test_cpu_nv12_to_grey() -> Result<()> {
2923        generate_conversion_tests!(NV12, "camera720p.nv12", GREY, "camera720p.y800")
2924    }
2925
2926    #[test]
2927    fn test_cpu_nv12_to_nv16() -> Result<()> {
2928        generate_conversion_tests!(NV12, "camera720p.nv12", NV16, "camera720p.nv16")
2929    }
2930
2931    #[test]
2932    fn test_cpu_nv12_to_planar_rgb() -> Result<()> {
2933        generate_conversion_tests!(NV12, "camera720p.nv12", PLANAR_RGB, "camera720p.8bps")
2934    }
2935
2936    #[test]
2937    fn test_cpu_nv12_to_planar_rgba() -> Result<()> {
2938        generate_conversion_tests!(NV12, "camera720p.nv12", PLANAR_RGBA, "camera720p.8bpa")
2939    }
2940
2941    #[test]
2942    fn test_cpu_grey_to_yuyv() -> Result<()> {
2943        generate_conversion_tests_greyscale!(GREY, "camera720p.y800", YUYV, "camera720p.yuyv")
2944    }
2945
2946    #[test]
2947    fn test_cpu_grey_to_rgb() -> Result<()> {
2948        generate_conversion_tests_greyscale!(GREY, "camera720p.y800", RGB, "camera720p.rgb")
2949    }
2950
2951    #[test]
2952    fn test_cpu_grey_to_rgba() -> Result<()> {
2953        generate_conversion_tests_greyscale!(GREY, "camera720p.y800", RGBA, "camera720p.rgba")
2954    }
2955
2956    #[test]
2957    fn test_cpu_grey_to_grey() -> Result<()> {
2958        generate_conversion_tests_greyscale!(GREY, "camera720p.y800", GREY, "camera720p.y800")
2959    }
2960
2961    #[test]
2962    fn test_cpu_grey_to_nv16() -> Result<()> {
2963        generate_conversion_tests_greyscale!(GREY, "camera720p.y800", NV16, "camera720p.nv16")
2964    }
2965
2966    #[test]
2967    fn test_cpu_grey_to_planar_rgb() -> Result<()> {
2968        generate_conversion_tests_greyscale!(GREY, "camera720p.y800", PLANAR_RGB, "camera720p.8bps")
2969    }
2970
2971    #[test]
2972    fn test_cpu_grey_to_planar_rgba() -> Result<()> {
2973        generate_conversion_tests_greyscale!(
2974            GREY,
2975            "camera720p.y800",
2976            PLANAR_RGBA,
2977            "camera720p.8bpa"
2978        )
2979    }
2980
2981    #[test]
2982    fn test_cpu_nearest() -> Result<()> {
2983        // Load source
2984        let src = load_bytes_to_tensor(2, 1, RGB, None, &[0, 0, 0, 255, 255, 255])?;
2985
2986        let mut converter = CPUProcessor::new_nearest();
2987
2988        let mut converted = TensorImage::new(4, 1, RGB, None)?;
2989
2990        converter.convert(
2991            &src,
2992            &mut converted,
2993            Rotation::None,
2994            Flip::None,
2995            Crop::default(),
2996        )?;
2997
2998        assert_eq!(
2999            &converted.tensor().map()?.as_slice(),
3000            &[0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255]
3001        );
3002
3003        Ok(())
3004    }
3005
3006    #[test]
3007    fn test_cpu_rotate_cw() -> Result<()> {
3008        // Load source
3009        let src = load_bytes_to_tensor(
3010            2,
3011            2,
3012            RGBA,
3013            None,
3014            &[0, 0, 0, 255, 1, 1, 1, 255, 2, 2, 2, 255, 3, 3, 3, 255],
3015        )?;
3016
3017        let mut converter = CPUProcessor::default();
3018
3019        let mut converted = TensorImage::new(4, 4, RGBA, None)?;
3020
3021        converter.convert(
3022            &src,
3023            &mut converted,
3024            Rotation::Clockwise90,
3025            Flip::None,
3026            Crop::default(),
3027        )?;
3028
3029        assert_eq!(&converted.tensor().map()?.as_slice()[0..4], &[2, 2, 2, 255]);
3030        assert_eq!(
3031            &converted.tensor().map()?.as_slice()[12..16],
3032            &[0, 0, 0, 255]
3033        );
3034        assert_eq!(
3035            &converted.tensor().map()?.as_slice()[48..52],
3036            &[3, 3, 3, 255]
3037        );
3038
3039        assert_eq!(
3040            &converted.tensor().map()?.as_slice()[60..64],
3041            &[1, 1, 1, 255]
3042        );
3043
3044        Ok(())
3045    }
3046
3047    #[test]
3048    fn test_cpu_rotate_ccw() -> Result<()> {
3049        // Load source
3050        let src = load_bytes_to_tensor(
3051            2,
3052            2,
3053            RGBA,
3054            None,
3055            &[0, 0, 0, 255, 1, 1, 1, 255, 2, 2, 2, 255, 3, 3, 3, 255],
3056        )?;
3057
3058        let mut converter = CPUProcessor::default();
3059
3060        let mut converted = TensorImage::new(4, 4, RGBA, None)?;
3061
3062        converter.convert(
3063            &src,
3064            &mut converted,
3065            Rotation::CounterClockwise90,
3066            Flip::None,
3067            Crop::default(),
3068        )?;
3069
3070        assert_eq!(&converted.tensor().map()?.as_slice()[0..4], &[1, 1, 1, 255]);
3071        assert_eq!(
3072            &converted.tensor().map()?.as_slice()[12..16],
3073            &[3, 3, 3, 255]
3074        );
3075        assert_eq!(
3076            &converted.tensor().map()?.as_slice()[48..52],
3077            &[0, 0, 0, 255]
3078        );
3079
3080        assert_eq!(
3081            &converted.tensor().map()?.as_slice()[60..64],
3082            &[2, 2, 2, 255]
3083        );
3084
3085        Ok(())
3086    }
3087
3088    #[test]
3089    fn test_cpu_rotate_180() -> Result<()> {
3090        // Load source
3091        let src = load_bytes_to_tensor(
3092            2,
3093            2,
3094            RGBA,
3095            None,
3096            &[0, 0, 0, 255, 1, 1, 1, 255, 2, 2, 2, 255, 3, 3, 3, 255],
3097        )?;
3098
3099        let mut converter = CPUProcessor::default();
3100
3101        let mut converted = TensorImage::new(4, 4, RGBA, None)?;
3102
3103        converter.convert(
3104            &src,
3105            &mut converted,
3106            Rotation::Rotate180,
3107            Flip::None,
3108            Crop::default(),
3109        )?;
3110
3111        assert_eq!(&converted.tensor().map()?.as_slice()[0..4], &[3, 3, 3, 255]);
3112        assert_eq!(
3113            &converted.tensor().map()?.as_slice()[12..16],
3114            &[2, 2, 2, 255]
3115        );
3116        assert_eq!(
3117            &converted.tensor().map()?.as_slice()[48..52],
3118            &[1, 1, 1, 255]
3119        );
3120
3121        assert_eq!(
3122            &converted.tensor().map()?.as_slice()[60..64],
3123            &[0, 0, 0, 255]
3124        );
3125
3126        Ok(())
3127    }
3128
3129    #[test]
3130    fn test_cpu_flip_v() -> Result<()> {
3131        // Load source
3132        let src = load_bytes_to_tensor(
3133            2,
3134            2,
3135            RGBA,
3136            None,
3137            &[0, 0, 0, 255, 1, 1, 1, 255, 2, 2, 2, 255, 3, 3, 3, 255],
3138        )?;
3139
3140        let mut converter = CPUProcessor::default();
3141
3142        let mut converted = TensorImage::new(4, 4, RGBA, None)?;
3143
3144        converter.convert(
3145            &src,
3146            &mut converted,
3147            Rotation::None,
3148            Flip::Vertical,
3149            Crop::default(),
3150        )?;
3151
3152        assert_eq!(&converted.tensor().map()?.as_slice()[0..4], &[2, 2, 2, 255]);
3153        assert_eq!(
3154            &converted.tensor().map()?.as_slice()[12..16],
3155            &[3, 3, 3, 255]
3156        );
3157        assert_eq!(
3158            &converted.tensor().map()?.as_slice()[48..52],
3159            &[0, 0, 0, 255]
3160        );
3161
3162        assert_eq!(
3163            &converted.tensor().map()?.as_slice()[60..64],
3164            &[1, 1, 1, 255]
3165        );
3166
3167        Ok(())
3168    }
3169
3170    #[test]
3171    fn test_cpu_flip_h() -> Result<()> {
3172        // Load source
3173        let src = load_bytes_to_tensor(
3174            2,
3175            2,
3176            RGBA,
3177            None,
3178            &[0, 0, 0, 255, 1, 1, 1, 255, 2, 2, 2, 255, 3, 3, 3, 255],
3179        )?;
3180
3181        let mut converter = CPUProcessor::default();
3182
3183        let mut converted = TensorImage::new(4, 4, RGBA, None)?;
3184
3185        converter.convert(
3186            &src,
3187            &mut converted,
3188            Rotation::None,
3189            Flip::Horizontal,
3190            Crop::default(),
3191        )?;
3192
3193        assert_eq!(&converted.tensor().map()?.as_slice()[0..4], &[1, 1, 1, 255]);
3194        assert_eq!(
3195            &converted.tensor().map()?.as_slice()[12..16],
3196            &[0, 0, 0, 255]
3197        );
3198        assert_eq!(
3199            &converted.tensor().map()?.as_slice()[48..52],
3200            &[3, 3, 3, 255]
3201        );
3202
3203        assert_eq!(
3204            &converted.tensor().map()?.as_slice()[60..64],
3205            &[2, 2, 2, 255]
3206        );
3207
3208        Ok(())
3209    }
3210
3211    #[test]
3212    fn test_cpu_src_crop() -> Result<()> {
3213        // Load source
3214        let src = load_bytes_to_tensor(2, 2, GREY, None, &[10, 20, 30, 40])?;
3215
3216        let mut converter = CPUProcessor::default();
3217
3218        let mut converted = TensorImage::new(2, 2, RGBA, None)?;
3219
3220        converter.convert(
3221            &src,
3222            &mut converted,
3223            Rotation::None,
3224            Flip::None,
3225            Crop::new().with_src_rect(Some(Rect::new(0, 0, 1, 2))),
3226        )?;
3227
3228        assert_eq!(
3229            converted.tensor().map()?.as_slice(),
3230            &[10, 10, 10, 255, 13, 13, 13, 255, 30, 30, 30, 255, 33, 33, 33, 255]
3231        );
3232        Ok(())
3233    }
3234
3235    #[test]
3236    fn test_cpu_dst_crop() -> Result<()> {
3237        // Load source
3238        let src = load_bytes_to_tensor(2, 2, GREY, None, &[2, 4, 6, 8])?;
3239
3240        let mut converter = CPUProcessor::default();
3241
3242        let mut converted =
3243            load_bytes_to_tensor(2, 2, YUYV, None, &[200, 128, 200, 128, 200, 128, 200, 128])?;
3244
3245        converter.convert(
3246            &src,
3247            &mut converted,
3248            Rotation::None,
3249            Flip::None,
3250            Crop::new().with_dst_rect(Some(Rect::new(0, 0, 2, 1))),
3251        )?;
3252
3253        assert_eq!(
3254            converted.tensor().map()?.as_slice(),
3255            &[20, 128, 21, 128, 200, 128, 200, 128]
3256        );
3257        Ok(())
3258    }
3259
3260    #[test]
3261    fn test_cpu_fill_rgba() -> Result<()> {
3262        // Load source
3263        let src = load_bytes_to_tensor(1, 1, RGBA, None, &[3, 3, 3, 255])?;
3264
3265        let mut converter = CPUProcessor::default();
3266
3267        let mut converted = TensorImage::new(2, 2, RGBA, None)?;
3268
3269        converter.convert(
3270            &src,
3271            &mut converted,
3272            Rotation::None,
3273            Flip::None,
3274            Crop {
3275                src_rect: None,
3276                dst_rect: Some(Rect {
3277                    left: 1,
3278                    top: 1,
3279                    width: 1,
3280                    height: 1,
3281                }),
3282                dst_color: Some([255, 0, 0, 255]),
3283            },
3284        )?;
3285
3286        assert_eq!(
3287            converted.tensor().map()?.as_slice(),
3288            &[255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 3, 3, 3, 255]
3289        );
3290        Ok(())
3291    }
3292
3293    #[test]
3294    fn test_cpu_fill_yuyv() -> Result<()> {
3295        // Load source
3296        let src = load_bytes_to_tensor(2, 1, RGBA, None, &[3, 3, 3, 255, 3, 3, 3, 255])?;
3297
3298        let mut converter = CPUProcessor::default();
3299
3300        let mut converted = TensorImage::new(2, 3, YUYV, None)?;
3301
3302        converter.convert(
3303            &src,
3304            &mut converted,
3305            Rotation::None,
3306            Flip::None,
3307            Crop {
3308                src_rect: None,
3309                dst_rect: Some(Rect {
3310                    left: 0,
3311                    top: 1,
3312                    width: 2,
3313                    height: 1,
3314                }),
3315                dst_color: Some([255, 0, 0, 255]),
3316            },
3317        )?;
3318
3319        assert_eq!(
3320            converted.tensor().map()?.as_slice(),
3321            &[63, 102, 63, 240, 19, 128, 19, 128, 63, 102, 63, 240]
3322        );
3323        Ok(())
3324    }
3325
3326    #[test]
3327    fn test_cpu_fill_grey() -> Result<()> {
3328        // Load source
3329        let src = load_bytes_to_tensor(2, 1, RGBA, None, &[3, 3, 3, 255, 3, 3, 3, 255])?;
3330
3331        let mut converter = CPUProcessor::default();
3332
3333        let mut converted = TensorImage::new(2, 3, GREY, None)?;
3334
3335        converter.convert(
3336            &src,
3337            &mut converted,
3338            Rotation::None,
3339            Flip::None,
3340            Crop {
3341                src_rect: None,
3342                dst_rect: Some(Rect {
3343                    left: 0,
3344                    top: 1,
3345                    width: 2,
3346                    height: 1,
3347                }),
3348                dst_color: Some([200, 200, 200, 255]),
3349            },
3350        )?;
3351
3352        assert_eq!(
3353            converted.tensor().map()?.as_slice(),
3354            &[200, 200, 3, 3, 200, 200]
3355        );
3356        Ok(())
3357    }
3358
3359    #[test]
3360    fn test_segmentation() {
3361        use edgefirst_decoder::Segmentation;
3362        use ndarray::Array3;
3363
3364        let mut image = TensorImage::load(
3365            include_bytes!("../../../testdata/giraffe.jpg"),
3366            Some(RGBA),
3367            None,
3368        )
3369        .unwrap();
3370
3371        let mut segmentation = Array3::from_shape_vec(
3372            (2, 160, 160),
3373            include_bytes!("../../../testdata/modelpack_seg_2x160x160.bin").to_vec(),
3374        )
3375        .unwrap();
3376        segmentation.swap_axes(0, 1);
3377        segmentation.swap_axes(1, 2);
3378        let segmentation = segmentation.as_standard_layout().to_owned();
3379
3380        let seg = Segmentation {
3381            segmentation,
3382            xmin: 0.0,
3383            ymin: 0.0,
3384            xmax: 1.0,
3385            ymax: 1.0,
3386        };
3387
3388        let mut renderer = CPUProcessor::new();
3389        renderer.draw_masks(&mut image, &[], &[seg]).unwrap();
3390
3391        image.save_jpeg("test_segmentation.jpg", 80).unwrap();
3392    }
3393
3394    #[test]
3395    fn test_segmentation_yolo() {
3396        use edgefirst_decoder::Segmentation;
3397        use ndarray::Array3;
3398
3399        let mut image = TensorImage::load(
3400            include_bytes!("../../../testdata/giraffe.jpg"),
3401            Some(RGBA),
3402            None,
3403        )
3404        .unwrap();
3405
3406        let segmentation = Array3::from_shape_vec(
3407            (76, 55, 1),
3408            include_bytes!("../../../testdata/yolov8_seg_crop_76x55.bin").to_vec(),
3409        )
3410        .unwrap();
3411
3412        let detect = DetectBox {
3413            bbox: [0.59375, 0.25, 0.9375, 0.725].into(),
3414            score: 0.99,
3415            label: 1,
3416        };
3417
3418        let seg = Segmentation {
3419            segmentation,
3420            xmin: 0.59375,
3421            ymin: 0.25,
3422            xmax: 0.9375,
3423            ymax: 0.725,
3424        };
3425
3426        let mut renderer = CPUProcessor::new();
3427        renderer
3428            .set_class_colors(&[[255, 255, 0, 233], [128, 128, 255, 100]])
3429            .unwrap();
3430        assert_eq!(renderer.colors[1], [128, 128, 255, 100]);
3431        renderer.draw_masks(&mut image, &[detect], &[seg]).unwrap();
3432        let expected = TensorImage::load(
3433            include_bytes!("../../../testdata/output_render_cpu.jpg"),
3434            Some(RGBA),
3435            None,
3436        )
3437        .unwrap();
3438        compare_images_convert_to_rgb(&image, &expected, 0.99, function!());
3439    }
3440
3441    // =========================================================================
3442    // Generic Conversion Tests (TensorImageRef support)
3443    // =========================================================================
3444
3445    #[test]
3446    fn test_convert_rgb_to_planar_rgb_generic() {
3447        // Create RGB source image
3448        let mut src = TensorImage::new(4, 4, RGB, None).unwrap();
3449        {
3450            let mut map = src.tensor_mut().map().unwrap();
3451            let data = map.as_mut_slice();
3452            // Fill with pattern: pixel 0 = [10, 20, 30], pixel 1 = [40, 50, 60], etc.
3453            for i in 0..16 {
3454                data[i * 3] = (i * 10) as u8;
3455                data[i * 3 + 1] = (i * 10 + 1) as u8;
3456                data[i * 3 + 2] = (i * 10 + 2) as u8;
3457            }
3458        }
3459
3460        // Create planar RGB destination using TensorImageRef
3461        let mut tensor = Tensor::<u8>::new(&[3, 4, 4], None, None).unwrap();
3462        let mut dst = TensorImageRef::from_borrowed_tensor(&mut tensor, PLANAR_RGB).unwrap();
3463
3464        CPUProcessor::convert_format_generic(&src, &mut dst).unwrap();
3465
3466        // Verify the conversion - check first few pixels of each plane
3467        let map = dst.tensor().map().unwrap();
3468        let data = map.as_slice();
3469
3470        // R plane starts at 0, G at 16, B at 32
3471        assert_eq!(data[0], 0); // R of pixel 0
3472        assert_eq!(data[16], 1); // G of pixel 0
3473        assert_eq!(data[32], 2); // B of pixel 0
3474
3475        assert_eq!(data[1], 10); // R of pixel 1
3476        assert_eq!(data[17], 11); // G of pixel 1
3477        assert_eq!(data[33], 12); // B of pixel 1
3478    }
3479
3480    #[test]
3481    fn test_convert_rgba_to_planar_rgb_generic() {
3482        // Create RGBA source image
3483        let mut src = TensorImage::new(4, 4, RGBA, None).unwrap();
3484        {
3485            let mut map = src.tensor_mut().map().unwrap();
3486            let data = map.as_mut_slice();
3487            // Fill with pattern
3488            for i in 0..16 {
3489                data[i * 4] = (i * 10) as u8; // R
3490                data[i * 4 + 1] = (i * 10 + 1) as u8; // G
3491                data[i * 4 + 2] = (i * 10 + 2) as u8; // B
3492                data[i * 4 + 3] = 255; // A (ignored)
3493            }
3494        }
3495
3496        // Create planar RGB destination
3497        let mut tensor = Tensor::<u8>::new(&[3, 4, 4], None, None).unwrap();
3498        let mut dst = TensorImageRef::from_borrowed_tensor(&mut tensor, PLANAR_RGB).unwrap();
3499
3500        CPUProcessor::convert_format_generic(&src, &mut dst).unwrap();
3501
3502        // Verify the conversion
3503        let map = dst.tensor().map().unwrap();
3504        let data = map.as_slice();
3505
3506        assert_eq!(data[0], 0); // R of pixel 0
3507        assert_eq!(data[16], 1); // G of pixel 0
3508        assert_eq!(data[32], 2); // B of pixel 0
3509    }
3510
3511    #[test]
3512    fn test_copy_image_generic_same_format() {
3513        // Create source image with data
3514        let mut src = TensorImage::new(4, 4, RGB, None).unwrap();
3515        {
3516            let mut map = src.tensor_mut().map().unwrap();
3517            let data = map.as_mut_slice();
3518            for (i, byte) in data.iter_mut().enumerate() {
3519                *byte = (i % 256) as u8;
3520            }
3521        }
3522
3523        // Create destination tensor
3524        let mut tensor = Tensor::<u8>::new(&[4, 4, 3], None, None).unwrap();
3525        let mut dst = TensorImageRef::from_borrowed_tensor(&mut tensor, RGB).unwrap();
3526
3527        CPUProcessor::convert_format_generic(&src, &mut dst).unwrap();
3528
3529        // Verify data was copied
3530        let src_map = src.tensor().map().unwrap();
3531        let dst_map = dst.tensor().map().unwrap();
3532        assert_eq!(src_map.as_slice(), dst_map.as_slice());
3533    }
3534
3535    #[test]
3536    fn test_convert_format_generic_unsupported() {
3537        // Try unsupported conversion (NV12 to PLANAR_RGB)
3538        let src = TensorImage::new(8, 8, NV12, None).unwrap();
3539        let mut tensor = Tensor::<u8>::new(&[3, 8, 8], None, None).unwrap();
3540        let mut dst = TensorImageRef::from_borrowed_tensor(&mut tensor, PLANAR_RGB).unwrap();
3541
3542        let result = CPUProcessor::convert_format_generic(&src, &mut dst);
3543        assert!(result.is_err());
3544        assert!(matches!(result, Err(Error::NotSupported(_))));
3545    }
3546
3547    #[test]
3548    fn test_fill_image_outside_crop_generic_rgba() {
3549        let mut tensor = Tensor::<u8>::new(&[4, 4, 4], None, None).unwrap();
3550        // Initialize to zeros
3551        tensor.map().unwrap().as_mut_slice().fill(0);
3552
3553        let mut dst = TensorImageRef::from_borrowed_tensor(&mut tensor, RGBA).unwrap();
3554
3555        // Fill outside a 2x2 crop in the center with red
3556        let crop = Rect::new(1, 1, 2, 2);
3557        CPUProcessor::fill_image_outside_crop_generic(&mut dst, [255, 0, 0, 255], crop).unwrap();
3558
3559        let map = dst.tensor().map().unwrap();
3560        let data = map.as_slice();
3561
3562        // Top-left corner should be filled (red)
3563        assert_eq!(&data[0..4], &[255, 0, 0, 255]);
3564
3565        // Center pixel (1,1) should still be zero (inside crop)
3566        // row=1, col=1, width=4, bytes_per_pixel=4 -> offset = (1*4 + 1) * 4 = 20
3567        let center_offset = 20;
3568        assert_eq!(&data[center_offset..center_offset + 4], &[0, 0, 0, 0]);
3569    }
3570
3571    #[test]
3572    fn test_fill_image_outside_crop_generic_rgb() {
3573        let mut tensor = Tensor::<u8>::new(&[4, 4, 3], None, None).unwrap();
3574        tensor.map().unwrap().as_mut_slice().fill(0);
3575
3576        let mut dst = TensorImageRef::from_borrowed_tensor(&mut tensor, RGB).unwrap();
3577
3578        let crop = Rect::new(1, 1, 2, 2);
3579        CPUProcessor::fill_image_outside_crop_generic(&mut dst, [0, 255, 0, 255], crop).unwrap();
3580
3581        let map = dst.tensor().map().unwrap();
3582        let data = map.as_slice();
3583
3584        // Top-left corner should be green
3585        assert_eq!(&data[0..3], &[0, 255, 0]);
3586
3587        // Center pixel (1,1): row=1, col=1, width=4, bytes=3 -> offset = (1*4 + 1) * 3
3588        // = 15
3589        let center_offset = 15;
3590        assert_eq!(&data[center_offset..center_offset + 3], &[0, 0, 0]);
3591    }
3592
3593    #[test]
3594    fn test_fill_image_outside_crop_generic_planar_rgb() {
3595        let mut tensor = Tensor::<u8>::new(&[3, 4, 4], None, None).unwrap();
3596        tensor.map().unwrap().as_mut_slice().fill(0);
3597
3598        let mut dst = TensorImageRef::from_borrowed_tensor(&mut tensor, PLANAR_RGB).unwrap();
3599
3600        let crop = Rect::new(1, 1, 2, 2);
3601        CPUProcessor::fill_image_outside_crop_generic(&mut dst, [128, 64, 32, 255], crop).unwrap();
3602
3603        let map = dst.tensor().map().unwrap();
3604        let data = map.as_slice();
3605
3606        // For planar: R plane is [0..16], G plane is [16..32], B plane is [32..48]
3607        // Top-left pixel (0,0) should have R=128, G=64, B=32
3608        assert_eq!(data[0], 128); // R plane, pixel 0
3609        assert_eq!(data[16], 64); // G plane, pixel 0
3610        assert_eq!(data[32], 32); // B plane, pixel 0
3611
3612        // Center pixel (1,1): row=1, col=1, width=4 -> index = 1*4 + 1 = 5
3613        let center_idx = 5;
3614        assert_eq!(data[center_idx], 0); // R
3615        assert_eq!(data[16 + center_idx], 0); // G
3616        assert_eq!(data[32 + center_idx], 0); // B
3617    }
3618
3619    #[test]
3620    fn test_convert_rgba_to_bgra() {
3621        use edgefirst_tensor::TensorMemory;
3622        // 2x1 image: pixel0 = [R=10, G=20, B=30, A=255], pixel1 = [R=40, G=50, B=60, A=128]
3623        let src = TensorImage::new(2, 1, RGBA, Some(TensorMemory::Mem)).unwrap();
3624        {
3625            let mut map = src.tensor().map().unwrap();
3626            let buf = map.as_mut_slice();
3627            buf[0..4].copy_from_slice(&[10, 20, 30, 255]);
3628            buf[4..8].copy_from_slice(&[40, 50, 60, 128]);
3629        }
3630        let mut dst = TensorImage::new(2, 1, BGRA, Some(TensorMemory::Mem)).unwrap();
3631        CPUProcessor::convert_format(&src, &mut dst).unwrap();
3632        let map = dst.tensor().map().unwrap();
3633        let buf = map.as_slice();
3634        // BGRA byte order: [B, G, R, A]
3635        assert_eq!(&buf[0..4], &[30, 20, 10, 255]);
3636        assert_eq!(&buf[4..8], &[60, 50, 40, 128]);
3637    }
3638
3639    #[test]
3640    fn test_convert_rgb_to_bgra() {
3641        // Convert RGB→RGBA and RGB→BGRA, verify R↔B swap matches
3642        let src = TensorImage::new(2, 1, RGB, Some(TensorMemory::Mem)).unwrap();
3643        {
3644            let mut map = src.tensor().map().unwrap();
3645            let buf = map.as_mut_slice();
3646            buf[0..3].copy_from_slice(&[100, 150, 200]);
3647            buf[3..6].copy_from_slice(&[50, 75, 25]);
3648        }
3649        let mut rgba_dst = TensorImage::new(2, 1, RGBA, Some(TensorMemory::Mem)).unwrap();
3650        CPUProcessor::convert_format(&src, &mut rgba_dst).unwrap();
3651
3652        let mut bgra_dst = TensorImage::new(2, 1, BGRA, Some(TensorMemory::Mem)).unwrap();
3653        CPUProcessor::convert_format(&src, &mut bgra_dst).unwrap();
3654
3655        assert_bgra_matches_rgba(&bgra_dst, &rgba_dst);
3656
3657        // Also verify the B,G,R channels are correct (alpha may vary)
3658        let map = bgra_dst.tensor().map().unwrap();
3659        let buf = map.as_slice();
3660        assert_eq!(buf[0], 200, "pixel 0 B");
3661        assert_eq!(buf[1], 150, "pixel 0 G");
3662        assert_eq!(buf[2], 100, "pixel 0 R");
3663        assert_eq!(buf[4], 25, "pixel 1 B");
3664        assert_eq!(buf[5], 75, "pixel 1 G");
3665        assert_eq!(buf[6], 50, "pixel 1 R");
3666    }
3667
3668    #[test]
3669    fn test_convert_grey_to_bgra() {
3670        // 2x1 greyscale image
3671        let src = TensorImage::new(2, 1, GREY, Some(TensorMemory::Mem)).unwrap();
3672        {
3673            let mut map = src.tensor().map().unwrap();
3674            let buf = map.as_mut_slice();
3675            buf[0] = 128;
3676            buf[1] = 64;
3677        }
3678        let mut dst = TensorImage::new(2, 1, BGRA, Some(TensorMemory::Mem)).unwrap();
3679        CPUProcessor::convert_format(&src, &mut dst).unwrap();
3680        let map = dst.tensor().map().unwrap();
3681        let buf = map.as_slice();
3682        // Grey→BGRA: all channels same value, A=255; R↔B swap is no-op on grey
3683        assert_eq!(&buf[0..4], &[128, 128, 128, 255]);
3684        assert_eq!(&buf[4..8], &[64, 64, 64, 255]);
3685    }
3686
3687    #[test]
3688    fn test_convert_bgra_to_bgra_copy() {
3689        // Verify BGRA→BGRA is a straight copy
3690        let src = TensorImage::new(2, 1, BGRA, Some(TensorMemory::Mem)).unwrap();
3691        {
3692            let mut map = src.tensor().map().unwrap();
3693            let buf = map.as_mut_slice();
3694            buf[0..4].copy_from_slice(&[10, 20, 30, 255]);
3695            buf[4..8].copy_from_slice(&[40, 50, 60, 128]);
3696        }
3697        let mut dst = TensorImage::new(2, 1, BGRA, Some(TensorMemory::Mem)).unwrap();
3698        CPUProcessor::convert_format(&src, &mut dst).unwrap();
3699        let map = dst.tensor().map().unwrap();
3700        let buf = map.as_slice();
3701        assert_eq!(&buf[0..4], &[10, 20, 30, 255]);
3702        assert_eq!(&buf[4..8], &[40, 50, 60, 128]);
3703    }
3704
3705    /// Helper: compare BGRA output against RGBA output by verifying R↔B swap.
3706    /// Since CPU BGRA conversion is RGBA conversion + R↔B swizzle, the results
3707    /// must be byte-exact after accounting for the channel swap.
3708    fn assert_bgra_matches_rgba(bgra: &TensorImage, rgba: &TensorImage) {
3709        assert_eq!(bgra.fourcc(), BGRA);
3710        assert_eq!(rgba.fourcc(), RGBA);
3711        assert_eq!(bgra.width(), rgba.width());
3712        assert_eq!(bgra.height(), rgba.height());
3713
3714        let bgra_map = bgra.tensor().map().unwrap();
3715        let rgba_map = rgba.tensor().map().unwrap();
3716        let bgra_buf = bgra_map.as_slice();
3717        let rgba_buf = rgba_map.as_slice();
3718
3719        assert_eq!(bgra_buf.len(), rgba_buf.len());
3720        for (i, (bc, rc)) in bgra_buf
3721            .chunks_exact(4)
3722            .zip(rgba_buf.chunks_exact(4))
3723            .enumerate()
3724        {
3725            assert_eq!(bc[0], rc[2], "pixel {i}: B(bgra) != B(rgba)");
3726            assert_eq!(bc[1], rc[1], "pixel {i}: G mismatch");
3727            assert_eq!(bc[2], rc[0], "pixel {i}: R(bgra) != R(rgba)");
3728            assert_eq!(bc[3], rc[3], "pixel {i}: A mismatch");
3729        }
3730    }
3731
3732    #[test]
3733    fn test_convert_nv12_to_bgra() {
3734        let src = load_bytes_to_tensor(
3735            1280,
3736            720,
3737            NV12,
3738            None,
3739            include_bytes!("../../../testdata/camera720p.nv12"),
3740        )
3741        .unwrap();
3742
3743        // Convert to both RGBA and BGRA, then compare
3744        let mut rgba_dst = TensorImage::new(1280, 720, RGBA, None).unwrap();
3745        CPUProcessor::convert_format(&src, &mut rgba_dst).unwrap();
3746
3747        let mut bgra_dst = TensorImage::new(1280, 720, BGRA, None).unwrap();
3748        CPUProcessor::convert_format(&src, &mut bgra_dst).unwrap();
3749
3750        assert_bgra_matches_rgba(&bgra_dst, &rgba_dst);
3751    }
3752
3753    #[test]
3754    fn test_convert_yuyv_to_bgra() {
3755        let src = load_bytes_to_tensor(
3756            1280,
3757            720,
3758            YUYV,
3759            None,
3760            include_bytes!("../../../testdata/camera720p.yuyv"),
3761        )
3762        .unwrap();
3763
3764        let mut rgba_dst = TensorImage::new(1280, 720, RGBA, None).unwrap();
3765        CPUProcessor::convert_format(&src, &mut rgba_dst).unwrap();
3766
3767        let mut bgra_dst = TensorImage::new(1280, 720, BGRA, None).unwrap();
3768        CPUProcessor::convert_format(&src, &mut bgra_dst).unwrap();
3769
3770        assert_bgra_matches_rgba(&bgra_dst, &rgba_dst);
3771    }
3772
3773    #[test]
3774    fn test_convert_vyuy_to_bgra() {
3775        let src = load_bytes_to_tensor(
3776            1280,
3777            720,
3778            VYUY,
3779            None,
3780            include_bytes!("../../../testdata/camera720p.vyuy"),
3781        )
3782        .unwrap();
3783
3784        let mut rgba_dst = TensorImage::new(1280, 720, RGBA, None).unwrap();
3785        CPUProcessor::convert_format(&src, &mut rgba_dst).unwrap();
3786
3787        let mut bgra_dst = TensorImage::new(1280, 720, BGRA, None).unwrap();
3788        CPUProcessor::convert_format(&src, &mut bgra_dst).unwrap();
3789
3790        assert_bgra_matches_rgba(&bgra_dst, &rgba_dst);
3791    }
3792
3793    #[test]
3794    fn test_convert_nv16_to_bgra() {
3795        let src = load_bytes_to_tensor(
3796            1280,
3797            720,
3798            NV16,
3799            None,
3800            include_bytes!("../../../testdata/camera720p.nv16"),
3801        )
3802        .unwrap();
3803
3804        let mut rgba_dst = TensorImage::new(1280, 720, RGBA, None).unwrap();
3805        CPUProcessor::convert_format(&src, &mut rgba_dst).unwrap();
3806
3807        let mut bgra_dst = TensorImage::new(1280, 720, BGRA, None).unwrap();
3808        CPUProcessor::convert_format(&src, &mut bgra_dst).unwrap();
3809
3810        assert_bgra_matches_rgba(&bgra_dst, &rgba_dst);
3811    }
3812
3813    // ========================================================================
3814    // Tests for materialize_segmentations
3815    // ========================================================================
3816
3817    fn make_proto_data(
3818        proto_h: usize,
3819        proto_w: usize,
3820        num_protos: usize,
3821        coefficients: Vec<Vec<f32>>,
3822    ) -> crate::ProtoData {
3823        crate::ProtoData {
3824            mask_coefficients: coefficients,
3825            protos: edgefirst_decoder::ProtoTensor::Float(ndarray::Array3::<f32>::zeros((
3826                proto_h, proto_w, num_protos,
3827            ))),
3828        }
3829    }
3830
3831    fn make_detect_box(xmin: f32, ymin: f32, xmax: f32, ymax: f32) -> crate::DetectBox {
3832        crate::DetectBox {
3833            bbox: edgefirst_decoder::BoundingBox {
3834                xmin,
3835                ymin,
3836                xmax,
3837                ymax,
3838            },
3839            score: 0.9,
3840            label: 0,
3841        }
3842    }
3843
3844    #[test]
3845    fn test_materialize_empty_detections() {
3846        let cpu = CPUProcessor::new();
3847        let proto_data = make_proto_data(8, 8, 4, vec![vec![1.0; 4]]);
3848        let result = cpu.materialize_segmentations(&[], &proto_data);
3849        assert!(result.is_ok());
3850        assert!(result.unwrap().is_empty());
3851    }
3852
3853    #[test]
3854    fn test_materialize_empty_proto_data() {
3855        let cpu = CPUProcessor::new();
3856        let proto_data = make_proto_data(8, 8, 4, vec![]);
3857        let det = [make_detect_box(0.1, 0.1, 0.5, 0.5)];
3858        let result = cpu.materialize_segmentations(&det, &proto_data);
3859        assert!(result.is_ok());
3860        assert!(result.unwrap().is_empty());
3861    }
3862
3863    #[test]
3864    fn test_materialize_single_detection() {
3865        let cpu = CPUProcessor::new();
3866        let proto_data = make_proto_data(8, 8, 4, vec![vec![0.5; 4]]);
3867        let det = [make_detect_box(0.1, 0.1, 0.5, 0.5)];
3868        let result = cpu.materialize_segmentations(&det, &proto_data);
3869        assert!(result.is_ok());
3870        let segs = result.unwrap();
3871        assert_eq!(segs.len(), 1);
3872        // Segmentation should have shape (H, W, 1) with non-zero spatial dims
3873        assert!(segs[0].segmentation.shape()[0] > 0);
3874        assert!(segs[0].segmentation.shape()[1] > 0);
3875        assert_eq!(segs[0].segmentation.shape()[2], 1);
3876    }
3877
3878    #[test]
3879    fn test_materialize_bbox_edge_one() {
3880        let cpu = CPUProcessor::new();
3881        let proto_data = make_proto_data(8, 8, 4, vec![vec![0.5; 4]]);
3882        let det = [make_detect_box(0.5, 0.5, 1.0, 1.0)];
3883        let result = cpu.materialize_segmentations(&det, &proto_data);
3884        assert!(
3885            result.is_ok(),
3886            "bbox at exact boundary (1.0) should not panic"
3887        );
3888        let segs = result.unwrap();
3889        assert_eq!(segs.len(), 1);
3890    }
3891
3892    #[test]
3893    fn test_materialize_bbox_negative_clamp() {
3894        let cpu = CPUProcessor::new();
3895        let proto_data = make_proto_data(8, 8, 4, vec![vec![0.5; 4]]);
3896        let det = [make_detect_box(-0.5, -0.5, 0.5, 0.5)];
3897        let result = cpu.materialize_segmentations(&det, &proto_data);
3898        assert!(
3899            result.is_ok(),
3900            "negative coordinates should be clamped to 0"
3901        );
3902        let segs = result.unwrap();
3903        assert_eq!(segs.len(), 1);
3904        // xmin should be clamped to 0.0
3905        assert!((segs[0].xmin - 0.0).abs() < 0.01);
3906        assert!((segs[0].ymin - 0.0).abs() < 0.01);
3907    }
3908
3909    #[test]
3910    fn test_materialize_invalid_coeff_shape() {
3911        let cpu = CPUProcessor::new();
3912        // Proto has 4 channels but coefficients have 6 elements — mismatch
3913        let proto_data = make_proto_data(8, 8, 4, vec![vec![0.5; 6]]);
3914        let det = [make_detect_box(0.1, 0.1, 0.5, 0.5)];
3915        let result = cpu.materialize_segmentations(&det, &proto_data);
3916        assert!(
3917            result.is_err(),
3918            "mismatched coeff count vs proto channels should error"
3919        );
3920        let err = result.unwrap_err();
3921        assert!(
3922            matches!(&err, crate::Error::Internal(s) if s.contains("coeff")),
3923            "error should mention coefficient shape: {err:?}"
3924        );
3925    }
3926
3927    #[test]
3928    fn test_materialize_multiple_detections() {
3929        let cpu = CPUProcessor::new();
3930        let proto_data = make_proto_data(8, 8, 4, vec![vec![0.5; 4], vec![0.3; 4], vec![0.1; 4]]);
3931        let det = [
3932            make_detect_box(0.0, 0.0, 0.5, 0.5),
3933            make_detect_box(0.5, 0.0, 1.0, 0.5),
3934            make_detect_box(0.0, 0.5, 0.5, 1.0),
3935        ];
3936        let result = cpu.materialize_segmentations(&det, &proto_data);
3937        assert!(result.is_ok());
3938        assert_eq!(result.unwrap().len(), 3);
3939    }
3940
3941    #[test]
3942    fn test_materialize_zero_area_bbox() {
3943        let cpu = CPUProcessor::new();
3944        let proto_data = make_proto_data(8, 8, 4, vec![vec![0.5; 4]]);
3945        // xmin == xmax → zero-width bbox
3946        let det = [make_detect_box(0.5, 0.1, 0.5, 0.5)];
3947        let result = cpu.materialize_segmentations(&det, &proto_data);
3948        assert!(
3949            result.is_ok(),
3950            "zero-area bbox should return Ok with degenerate segmentation"
3951        );
3952        let segs = result.unwrap();
3953        assert_eq!(segs.len(), 1);
3954    }
3955}