Skip to main content

edgefirst_image/
cpu.rs

1// SPDX-FileCopyrightText: Copyright 2025 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::{
5    fourcc_is_int8, fourcc_uint8_equivalent, Crop, Error, Flip, FunctionTimer, ImageProcessorTrait,
6    Rect, Result, Rotation, TensorImage, TensorImageDst, TensorImageRef, GREY, NV12, NV16,
7    PLANAR_RGB, PLANAR_RGBA, RGB, RGBA, VYUY, YUYV,
8};
9use edgefirst_decoder::{DetectBox, ProtoData, Segmentation};
10use edgefirst_tensor::{TensorMapTrait, TensorTrait};
11use four_char_code::FourCharCode;
12use ndarray::{ArrayView3, ArrayViewMut3, Axis};
13use rayon::iter::{
14    IndexedParallelIterator, IntoParallelRefIterator, IntoParallelRefMutIterator, ParallelIterator,
15};
16use std::ops::Shr;
17
18/// CPUConverter implements the ImageProcessor trait using the fallback CPU
19/// implementation for image processing.
20#[derive(Debug, Clone)]
21pub struct CPUProcessor {
22    resizer: fast_image_resize::Resizer,
23    options: fast_image_resize::ResizeOptions,
24    colors: [[u8; 4]; 20],
25}
26
27unsafe impl Send for CPUProcessor {}
28unsafe impl Sync for CPUProcessor {}
29
30#[inline(always)]
31fn limit_to_full(l: u8) -> u8 {
32    (((l as u16 - 16) * 255 + (240 - 16) / 2) / (240 - 16)) as u8
33}
34
35#[inline(always)]
36fn full_to_limit(l: u8) -> u8 {
37    ((l as u16 * (240 - 16) + 255 / 2) / 255 + 16) as u8
38}
39
40impl Default for CPUProcessor {
41    fn default() -> Self {
42        Self::new_bilinear()
43    }
44}
45
46impl CPUProcessor {
47    /// Creates a new CPUConverter with bilinear resizing.
48    pub fn new() -> Self {
49        Self::new_bilinear()
50    }
51
52    /// Creates a new CPUConverter with bilinear resizing.
53    fn new_bilinear() -> Self {
54        let resizer = fast_image_resize::Resizer::new();
55        let options = fast_image_resize::ResizeOptions::new()
56            .resize_alg(fast_image_resize::ResizeAlg::Convolution(
57                fast_image_resize::FilterType::Bilinear,
58            ))
59            .use_alpha(false);
60
61        log::debug!("CPUConverter created");
62        Self {
63            resizer,
64            options,
65            colors: crate::DEFAULT_COLORS_U8,
66        }
67    }
68
69    /// Creates a new CPUConverter with nearest neighbor resizing.
70    pub fn new_nearest() -> Self {
71        let resizer = fast_image_resize::Resizer::new();
72        let options = fast_image_resize::ResizeOptions::new()
73            .resize_alg(fast_image_resize::ResizeAlg::Nearest)
74            .use_alpha(false);
75        log::debug!("CPUConverter created");
76        Self {
77            resizer,
78            options,
79            colors: crate::DEFAULT_COLORS_U8,
80        }
81    }
82
83    pub(crate) fn flip_rotate_ndarray(
84        src_map: &[u8],
85        dst_map: &mut [u8],
86        dst: &TensorImage,
87        rotation: Rotation,
88        flip: Flip,
89    ) -> Result<(), crate::Error> {
90        let mut dst_view =
91            ArrayViewMut3::from_shape((dst.height(), dst.width(), dst.channels()), dst_map)?;
92        let mut src_view = match rotation {
93            Rotation::None | Rotation::Rotate180 => {
94                ArrayView3::from_shape((dst.height(), dst.width(), dst.channels()), src_map)?
95            }
96            Rotation::Clockwise90 | Rotation::CounterClockwise90 => {
97                ArrayView3::from_shape((dst.width(), dst.height(), dst.channels()), src_map)?
98            }
99        };
100
101        match flip {
102            Flip::None => {}
103            Flip::Vertical => {
104                src_view.invert_axis(Axis(0));
105            }
106            Flip::Horizontal => {
107                src_view.invert_axis(Axis(1));
108            }
109        }
110
111        match rotation {
112            Rotation::None => {}
113            Rotation::Clockwise90 => {
114                src_view.swap_axes(0, 1);
115                src_view.invert_axis(Axis(1));
116            }
117            Rotation::Rotate180 => {
118                src_view.invert_axis(Axis(0));
119                src_view.invert_axis(Axis(1));
120            }
121            Rotation::CounterClockwise90 => {
122                src_view.swap_axes(0, 1);
123                src_view.invert_axis(Axis(0));
124            }
125        }
126
127        dst_view.assign(&src_view);
128
129        Ok(())
130    }
131
132    fn convert_nv12_to_rgb(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
133        assert_eq!(src.fourcc(), NV12);
134        assert_eq!(dst.fourcc(), RGB);
135        let map = src.tensor.map()?;
136        let y_stride = src.width() as u32;
137        let uv_stride = src.width() as u32;
138        let slices = map.as_slice().split_at(y_stride as usize * src.height());
139
140        let src = yuv::YuvBiPlanarImage {
141            y_plane: slices.0,
142            y_stride,
143            uv_plane: slices.1,
144            uv_stride,
145            width: src.width() as u32,
146            height: src.height() as u32,
147        };
148
149        Ok(yuv::yuv_nv12_to_rgb(
150            &src,
151            dst.tensor.map()?.as_mut_slice(),
152            dst.row_stride() as u32,
153            yuv::YuvRange::Limited,
154            yuv::YuvStandardMatrix::Bt709,
155            yuv::YuvConversionMode::Balanced,
156        )?)
157    }
158
159    fn convert_nv12_to_rgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
160        assert_eq!(src.fourcc(), NV12);
161        assert_eq!(dst.fourcc(), RGBA);
162        let map = src.tensor.map()?;
163        let y_stride = src.width() as u32;
164        let uv_stride = src.width() as u32;
165        let slices = map.as_slice().split_at(y_stride as usize * src.height());
166
167        let src = yuv::YuvBiPlanarImage {
168            y_plane: slices.0,
169            y_stride,
170            uv_plane: slices.1,
171            uv_stride,
172            width: src.width() as u32,
173            height: src.height() as u32,
174        };
175
176        Ok(yuv::yuv_nv12_to_rgba(
177            &src,
178            dst.tensor.map()?.as_mut_slice(),
179            dst.row_stride() as u32,
180            yuv::YuvRange::Limited,
181            yuv::YuvStandardMatrix::Bt709,
182            yuv::YuvConversionMode::Balanced,
183        )?)
184    }
185
186    fn convert_nv12_to_grey(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
187        assert_eq!(src.fourcc(), NV12);
188        assert_eq!(dst.fourcc(), GREY);
189        let src_map = src.tensor.map()?;
190        let mut dst_map = dst.tensor.map()?;
191        let y_stride = src.width() as u32;
192        let y_slice = src_map
193            .as_slice()
194            .split_at(y_stride as usize * src.height())
195            .0;
196        let src_chunks = y_slice.as_chunks::<8>();
197        let dst_chunks = dst_map.as_chunks_mut::<8>();
198        for (s, d) in src_chunks.0.iter().zip(dst_chunks.0) {
199            s.iter().zip(d).for_each(|(s, d)| *d = limit_to_full(*s));
200        }
201
202        for (s, d) in src_chunks.1.iter().zip(dst_chunks.1) {
203            *d = limit_to_full(*s);
204        }
205
206        Ok(())
207    }
208
209    fn convert_yuyv_to_rgb(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
210        assert_eq!(src.fourcc(), YUYV);
211        assert_eq!(dst.fourcc(), RGB);
212        let src = yuv::YuvPackedImage::<u8> {
213            yuy: &src.tensor.map()?,
214            yuy_stride: src.row_stride() as u32, // we assume packed yuyv
215            width: src.width() as u32,
216            height: src.height() as u32,
217        };
218
219        Ok(yuv::yuyv422_to_rgb(
220            &src,
221            dst.tensor.map()?.as_mut_slice(),
222            dst.width() as u32 * 3,
223            yuv::YuvRange::Limited,
224            yuv::YuvStandardMatrix::Bt709,
225        )?)
226    }
227
228    fn convert_yuyv_to_rgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
229        assert_eq!(src.fourcc(), YUYV);
230        assert_eq!(dst.fourcc(), RGBA);
231        let src = yuv::YuvPackedImage::<u8> {
232            yuy: &src.tensor.map()?,
233            yuy_stride: src.row_stride() as u32, // we assume packed yuyv
234            width: src.width() as u32,
235            height: src.height() as u32,
236        };
237
238        Ok(yuv::yuyv422_to_rgba(
239            &src,
240            dst.tensor.map()?.as_mut_slice(),
241            dst.row_stride() as u32,
242            yuv::YuvRange::Limited,
243            yuv::YuvStandardMatrix::Bt709,
244        )?)
245    }
246
247    fn convert_yuyv_to_8bps(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
248        assert_eq!(src.fourcc(), YUYV);
249        assert_eq!(dst.fourcc(), PLANAR_RGB);
250        let mut tmp = TensorImage::new(src.width(), src.height(), RGB, None)?;
251        Self::convert_yuyv_to_rgb(src, &mut tmp)?;
252        Self::convert_rgb_to_8bps(&tmp, dst)
253    }
254
255    fn convert_yuyv_to_prgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
256        assert_eq!(src.fourcc(), YUYV);
257        assert_eq!(dst.fourcc(), PLANAR_RGBA);
258        let mut tmp = TensorImage::new(src.width(), src.height(), RGB, None)?;
259        Self::convert_yuyv_to_rgb(src, &mut tmp)?;
260        Self::convert_rgb_to_prgba(&tmp, dst)
261    }
262
263    fn convert_yuyv_to_grey(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
264        assert_eq!(src.fourcc(), YUYV);
265        assert_eq!(dst.fourcc(), GREY);
266        let src_map = src.tensor.map()?;
267        let mut dst_map = dst.tensor.map()?;
268        let src_chunks = src_map.as_chunks::<16>();
269        let dst_chunks = dst_map.as_chunks_mut::<8>();
270        for (s, d) in src_chunks.0.iter().zip(dst_chunks.0) {
271            s.iter()
272                .step_by(2)
273                .zip(d)
274                .for_each(|(s, d)| *d = limit_to_full(*s));
275        }
276
277        for (s, d) in src_chunks.1.iter().step_by(2).zip(dst_chunks.1) {
278            *d = limit_to_full(*s);
279        }
280
281        Ok(())
282    }
283
284    fn convert_yuyv_to_nv16(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
285        assert_eq!(src.fourcc(), YUYV);
286        assert_eq!(dst.fourcc(), NV16);
287        let src_map = src.tensor.map()?;
288        let mut dst_map = dst.tensor.map()?;
289
290        let src_chunks = src_map.as_chunks::<2>().0;
291        let (y_plane, uv_plane) = dst_map.split_at_mut(dst.row_stride() * dst.height());
292
293        for ((s, y), uv) in src_chunks.iter().zip(y_plane).zip(uv_plane) {
294            *y = s[0];
295            *uv = s[1];
296        }
297        Ok(())
298    }
299
300    fn convert_vyuy_to_rgb(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
301        assert_eq!(src.fourcc(), VYUY);
302        assert_eq!(dst.fourcc(), RGB);
303        let src = yuv::YuvPackedImage::<u8> {
304            yuy: &src.tensor.map()?,
305            yuy_stride: src.row_stride() as u32,
306            width: src.width() as u32,
307            height: src.height() as u32,
308        };
309
310        Ok(yuv::vyuy422_to_rgb(
311            &src,
312            dst.tensor.map()?.as_mut_slice(),
313            dst.width() as u32 * 3,
314            yuv::YuvRange::Limited,
315            yuv::YuvStandardMatrix::Bt709,
316        )?)
317    }
318
319    fn convert_vyuy_to_rgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
320        assert_eq!(src.fourcc(), VYUY);
321        assert_eq!(dst.fourcc(), RGBA);
322        let src = yuv::YuvPackedImage::<u8> {
323            yuy: &src.tensor.map()?,
324            yuy_stride: src.row_stride() as u32,
325            width: src.width() as u32,
326            height: src.height() as u32,
327        };
328
329        Ok(yuv::vyuy422_to_rgba(
330            &src,
331            dst.tensor.map()?.as_mut_slice(),
332            dst.row_stride() as u32,
333            yuv::YuvRange::Limited,
334            yuv::YuvStandardMatrix::Bt709,
335        )?)
336    }
337
338    fn convert_vyuy_to_8bps(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
339        assert_eq!(src.fourcc(), VYUY);
340        assert_eq!(dst.fourcc(), PLANAR_RGB);
341        let mut tmp = TensorImage::new(src.width(), src.height(), RGB, None)?;
342        Self::convert_vyuy_to_rgb(src, &mut tmp)?;
343        Self::convert_rgb_to_8bps(&tmp, dst)
344    }
345
346    fn convert_vyuy_to_prgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
347        assert_eq!(src.fourcc(), VYUY);
348        assert_eq!(dst.fourcc(), PLANAR_RGBA);
349        let mut tmp = TensorImage::new(src.width(), src.height(), RGB, None)?;
350        Self::convert_vyuy_to_rgb(src, &mut tmp)?;
351        Self::convert_rgb_to_prgba(&tmp, dst)
352    }
353
354    fn convert_vyuy_to_grey(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
355        assert_eq!(src.fourcc(), VYUY);
356        assert_eq!(dst.fourcc(), GREY);
357        let src_map = src.tensor.map()?;
358        let mut dst_map = dst.tensor.map()?;
359        // VYUY byte order: [V, Y0, U, Y1] — Y at offsets 1, 3
360        let src_chunks = src_map.as_chunks::<16>();
361        let dst_chunks = dst_map.as_chunks_mut::<8>();
362        for (s, d) in src_chunks.0.iter().zip(dst_chunks.0) {
363            for (di, si) in (1..16).step_by(2).enumerate() {
364                d[di] = limit_to_full(s[si]);
365            }
366        }
367
368        for (di, si) in (1..src_chunks.1.len()).step_by(2).enumerate() {
369            dst_chunks.1[di] = limit_to_full(src_chunks.1[si]);
370        }
371
372        Ok(())
373    }
374
375    fn convert_vyuy_to_nv16(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
376        assert_eq!(src.fourcc(), VYUY);
377        assert_eq!(dst.fourcc(), NV16);
378        let src_map = src.tensor.map()?;
379        let mut dst_map = dst.tensor.map()?;
380
381        // VYUY byte order: [V, Y0, U, Y1] — per 4-byte macropixel
382        let src_chunks = src_map.as_chunks::<4>().0;
383        let (y_plane, uv_plane) = dst_map.split_at_mut(dst.row_stride() * dst.height());
384        let y_pairs = y_plane.as_chunks_mut::<2>().0;
385        let uv_pairs = uv_plane.as_chunks_mut::<2>().0;
386
387        for ((s, y), uv) in src_chunks.iter().zip(y_pairs).zip(uv_pairs) {
388            y[0] = s[1]; // Y0
389            y[1] = s[3]; // Y1
390            uv[0] = s[2]; // U
391            uv[1] = s[0]; // V
392        }
393        Ok(())
394    }
395
396    fn convert_grey_to_rgb(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
397        assert_eq!(src.fourcc(), GREY);
398        assert_eq!(dst.fourcc(), RGB);
399        let src = yuv::YuvGrayImage::<u8> {
400            y_plane: &src.tensor.map()?,
401            y_stride: src.row_stride() as u32, // we assume packed Y
402            width: src.width() as u32,
403            height: src.height() as u32,
404        };
405        Ok(yuv::yuv400_to_rgb(
406            &src,
407            dst.tensor.map()?.as_mut_slice(),
408            dst.row_stride() as u32,
409            yuv::YuvRange::Full,
410            yuv::YuvStandardMatrix::Bt709,
411        )?)
412    }
413
414    fn convert_grey_to_rgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
415        assert_eq!(src.fourcc(), GREY);
416        assert_eq!(dst.fourcc(), RGBA);
417        let src = yuv::YuvGrayImage::<u8> {
418            y_plane: &src.tensor.map()?,
419            y_stride: src.row_stride() as u32,
420            width: src.width() as u32,
421            height: src.height() as u32,
422        };
423        Ok(yuv::yuv400_to_rgba(
424            &src,
425            dst.tensor.map()?.as_mut_slice(),
426            dst.row_stride() as u32,
427            yuv::YuvRange::Full,
428            yuv::YuvStandardMatrix::Bt709,
429        )?)
430    }
431
432    fn convert_grey_to_8bps(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
433        assert_eq!(src.fourcc(), GREY);
434        assert_eq!(dst.fourcc(), PLANAR_RGB);
435
436        let src = src.tensor().map()?;
437        let src = src.as_slice();
438
439        let mut dst_map = dst.tensor().map()?;
440        let dst_ = dst_map.as_mut_slice();
441
442        let (dst0, dst1) = dst_.split_at_mut(dst.width() * dst.height());
443        let (dst1, dst2) = dst1.split_at_mut(dst.width() * dst.height());
444
445        rayon::scope(|s| {
446            s.spawn(|_| dst0.copy_from_slice(src));
447            s.spawn(|_| dst1.copy_from_slice(src));
448            s.spawn(|_| dst2.copy_from_slice(src));
449        });
450        Ok(())
451    }
452
453    fn convert_grey_to_prgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
454        assert_eq!(src.fourcc(), GREY);
455        assert_eq!(dst.fourcc(), PLANAR_RGBA);
456
457        let src = src.tensor().map()?;
458        let src = src.as_slice();
459
460        let mut dst_map = dst.tensor().map()?;
461        let dst_ = dst_map.as_mut_slice();
462
463        let (dst0, dst1) = dst_.split_at_mut(dst.width() * dst.height());
464        let (dst1, dst2) = dst1.split_at_mut(dst.width() * dst.height());
465        let (dst2, dst3) = dst2.split_at_mut(dst.width() * dst.height());
466        rayon::scope(|s| {
467            s.spawn(|_| dst0.copy_from_slice(src));
468            s.spawn(|_| dst1.copy_from_slice(src));
469            s.spawn(|_| dst2.copy_from_slice(src));
470            s.spawn(|_| dst3.fill(255));
471        });
472        Ok(())
473    }
474
475    fn convert_grey_to_yuyv(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
476        assert_eq!(src.fourcc(), GREY);
477        assert_eq!(dst.fourcc(), YUYV);
478
479        let src = src.tensor().map()?;
480        let src = src.as_slice();
481
482        let mut dst = dst.tensor().map()?;
483        let dst = dst.as_mut_slice();
484        for (s, d) in src
485            .as_chunks::<2>()
486            .0
487            .iter()
488            .zip(dst.as_chunks_mut::<4>().0.iter_mut())
489        {
490            d[0] = full_to_limit(s[0]);
491            d[1] = 128;
492
493            d[2] = full_to_limit(s[1]);
494            d[3] = 128;
495        }
496        Ok(())
497    }
498
499    fn convert_grey_to_nv16(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
500        assert_eq!(src.fourcc(), GREY);
501        assert_eq!(dst.fourcc(), NV16);
502
503        let src = src.tensor().map()?;
504        let src = src.as_slice();
505
506        let mut dst = dst.tensor().map()?;
507        let dst = dst.as_mut_slice();
508
509        for (s, d) in src.iter().zip(dst[0..src.len()].iter_mut()) {
510            *d = full_to_limit(*s);
511        }
512        dst[src.len()..].fill(128);
513
514        Ok(())
515    }
516
517    fn convert_rgba_to_rgb(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
518        assert_eq!(src.fourcc(), RGBA);
519        assert_eq!(dst.fourcc(), RGB);
520
521        Ok(yuv::rgba_to_rgb(
522            src.tensor.map()?.as_slice(),
523            (src.width() * src.channels()) as u32,
524            dst.tensor.map()?.as_mut_slice(),
525            (dst.width() * dst.channels()) as u32,
526            src.width() as u32,
527            src.height() as u32,
528        )?)
529    }
530
531    fn convert_rgba_to_grey(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
532        assert_eq!(src.fourcc(), RGBA);
533        assert_eq!(dst.fourcc(), GREY);
534
535        let mut dst = yuv::YuvGrayImageMut::<u8> {
536            y_plane: yuv::BufferStoreMut::Borrowed(&mut dst.tensor.map()?),
537            y_stride: dst.row_stride() as u32,
538            width: dst.width() as u32,
539            height: dst.height() as u32,
540        };
541        Ok(yuv::rgba_to_yuv400(
542            &mut dst,
543            src.tensor.map()?.as_slice(),
544            src.row_stride() as u32,
545            yuv::YuvRange::Full,
546            yuv::YuvStandardMatrix::Bt709,
547        )?)
548    }
549
550    fn convert_rgba_to_8bps(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
551        assert_eq!(src.fourcc(), RGBA);
552        assert_eq!(dst.fourcc(), PLANAR_RGB);
553
554        let src = src.tensor().map()?;
555        let src = src.as_slice();
556        let src = src.as_chunks::<4>().0;
557
558        let mut dst_map = dst.tensor().map()?;
559        let dst_ = dst_map.as_mut_slice();
560
561        let (dst0, dst1) = dst_.split_at_mut(dst.width() * dst.height());
562        let (dst1, dst2) = dst1.split_at_mut(dst.width() * dst.height());
563
564        src.par_iter()
565            .zip_eq(dst0)
566            .zip_eq(dst1)
567            .zip_eq(dst2)
568            .for_each(|(((s, d0), d1), d2)| {
569                *d0 = s[0];
570                *d1 = s[1];
571                *d2 = s[2];
572            });
573        Ok(())
574    }
575
576    fn convert_rgba_to_prgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
577        assert_eq!(src.fourcc(), RGBA);
578        assert_eq!(dst.fourcc(), PLANAR_RGBA);
579
580        let src = src.tensor().map()?;
581        let src = src.as_slice();
582        let src = src.as_chunks::<4>().0;
583
584        let mut dst_map = dst.tensor().map()?;
585        let dst_ = dst_map.as_mut_slice();
586
587        let (dst0, dst1) = dst_.split_at_mut(dst.width() * dst.height());
588        let (dst1, dst2) = dst1.split_at_mut(dst.width() * dst.height());
589        let (dst2, dst3) = dst2.split_at_mut(dst.width() * dst.height());
590
591        src.par_iter()
592            .zip_eq(dst0)
593            .zip_eq(dst1)
594            .zip_eq(dst2)
595            .zip_eq(dst3)
596            .for_each(|((((s, d0), d1), d2), d3)| {
597                *d0 = s[0];
598                *d1 = s[1];
599                *d2 = s[2];
600                *d3 = s[3];
601            });
602        Ok(())
603    }
604
605    fn convert_rgba_to_yuyv(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
606        assert_eq!(src.fourcc(), RGBA);
607        assert_eq!(dst.fourcc(), YUYV);
608
609        let src = src.tensor().map()?;
610        let src = src.as_slice();
611
612        let mut dst = dst.tensor().map()?;
613        let dst = dst.as_mut_slice();
614
615        // compute quantized Bt.709 limited range RGB to YUV matrix
616        const KR: f64 = 0.2126f64;
617        const KB: f64 = 0.0722f64;
618        const KG: f64 = 1.0 - KR - KB;
619        const BIAS: i32 = 20;
620
621        const Y_R: i32 = (KR * (219 << BIAS) as f64 / 255.0).round() as i32;
622        const Y_G: i32 = (KG * (219 << BIAS) as f64 / 255.0).round() as i32;
623        const Y_B: i32 = (KB * (219 << BIAS) as f64 / 255.0).round() as i32;
624
625        const U_R: i32 = (-KR / (KR + KG) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
626        const U_G: i32 = (-KG / (KR + KG) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
627        const U_B: i32 = (0.5_f64 * (224 << BIAS) as f64 / 255.0).ceil() as i32;
628
629        const V_R: i32 = (0.5_f64 * (224 << BIAS) as f64 / 255.0).ceil() as i32;
630        const V_G: i32 = (-KG / (KG + KB) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
631        const V_B: i32 = (-KB / (KG + KB) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
632        const ROUND: i32 = 1 << (BIAS - 1);
633        const ROUND2: i32 = 1 << BIAS;
634        let process_rgba_to_yuyv = |s: &[u8; 8], d: &mut [u8; 4]| {
635            let [r0, g0, b0, _, r1, g1, b1, _] = *s;
636            let r0 = r0 as i32;
637            let g0 = g0 as i32;
638            let b0 = b0 as i32;
639            let r1 = r1 as i32;
640            let g1 = g1 as i32;
641            let b1 = b1 as i32;
642            d[0] = ((Y_R * r0 + Y_G * g0 + Y_B * b0 + ROUND).shr(BIAS) + 16) as u8;
643            d[1] = ((U_R * r0 + U_G * g0 + U_B * b0 + U_R * r1 + U_G * g1 + U_B * b1 + ROUND2)
644                .shr(BIAS + 1)
645                + 128) as u8;
646            d[2] = ((Y_R * r1 + Y_G * g1 + Y_B * b1 + ROUND).shr(BIAS) + 16) as u8;
647            d[3] = ((V_R * r0 + V_G * g0 + V_B * b0 + V_R * r1 + V_G * g1 + V_B * b1 + ROUND2)
648                .shr(BIAS + 1)
649                + 128) as u8;
650        };
651
652        let src = src.as_chunks::<{ 8 * 32 }>();
653        let dst = dst.as_chunks_mut::<{ 4 * 32 }>();
654
655        for (s, d) in src.0.iter().zip(dst.0.iter_mut()) {
656            let s = s.as_chunks::<8>().0;
657            let d = d.as_chunks_mut::<4>().0;
658            for (s, d) in s.iter().zip(d.iter_mut()) {
659                process_rgba_to_yuyv(s, d);
660            }
661        }
662
663        let s = src.1.as_chunks::<8>().0;
664        let d = dst.1.as_chunks_mut::<4>().0;
665        for (s, d) in s.iter().zip(d.iter_mut()) {
666            process_rgba_to_yuyv(s, d);
667        }
668
669        Ok(())
670    }
671
672    fn convert_rgba_to_nv16(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
673        assert_eq!(src.fourcc(), RGBA);
674        assert_eq!(dst.fourcc(), NV16);
675
676        let mut dst_map = dst.tensor().map()?;
677
678        let (y_plane, uv_plane) = dst_map.split_at_mut(dst.width() * dst.height());
679        let mut bi_planar_image = yuv::YuvBiPlanarImageMut::<u8> {
680            y_plane: yuv::BufferStoreMut::Borrowed(y_plane),
681            y_stride: dst.width() as u32,
682            uv_plane: yuv::BufferStoreMut::Borrowed(uv_plane),
683            uv_stride: dst.width() as u32,
684            width: dst.width() as u32,
685            height: dst.height() as u32,
686        };
687
688        Ok(yuv::rgba_to_yuv_nv16(
689            &mut bi_planar_image,
690            src.tensor.map()?.as_slice(),
691            src.row_stride() as u32,
692            yuv::YuvRange::Limited,
693            yuv::YuvStandardMatrix::Bt709,
694            yuv::YuvConversionMode::Balanced,
695        )?)
696    }
697
698    fn convert_rgb_to_rgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
699        assert_eq!(src.fourcc(), RGB);
700        assert_eq!(dst.fourcc(), RGBA);
701
702        Ok(yuv::rgb_to_rgba(
703            src.tensor.map()?.as_slice(),
704            (src.width() * src.channels()) as u32,
705            dst.tensor.map()?.as_mut_slice(),
706            (dst.width() * dst.channels()) as u32,
707            src.width() as u32,
708            src.height() as u32,
709        )?)
710    }
711
712    fn convert_rgb_to_grey(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
713        assert_eq!(src.fourcc(), RGB);
714        assert_eq!(dst.fourcc(), GREY);
715
716        let mut dst = yuv::YuvGrayImageMut::<u8> {
717            y_plane: yuv::BufferStoreMut::Borrowed(&mut dst.tensor.map()?),
718            y_stride: dst.row_stride() as u32,
719            width: dst.width() as u32,
720            height: dst.height() as u32,
721        };
722        Ok(yuv::rgb_to_yuv400(
723            &mut dst,
724            src.tensor.map()?.as_slice(),
725            src.row_stride() as u32,
726            yuv::YuvRange::Full,
727            yuv::YuvStandardMatrix::Bt709,
728        )?)
729    }
730
731    fn convert_rgb_to_8bps(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
732        assert_eq!(src.fourcc(), RGB);
733        assert_eq!(dst.fourcc(), PLANAR_RGB);
734
735        let src = src.tensor().map()?;
736        let src = src.as_slice();
737        let src = src.as_chunks::<3>().0;
738
739        let mut dst_map = dst.tensor().map()?;
740        let dst_ = dst_map.as_mut_slice();
741
742        let (dst0, dst1) = dst_.split_at_mut(dst.width() * dst.height());
743        let (dst1, dst2) = dst1.split_at_mut(dst.width() * dst.height());
744
745        src.par_iter()
746            .zip_eq(dst0)
747            .zip_eq(dst1)
748            .zip_eq(dst2)
749            .for_each(|(((s, d0), d1), d2)| {
750                *d0 = s[0];
751                *d1 = s[1];
752                *d2 = s[2];
753            });
754        Ok(())
755    }
756
757    fn convert_rgb_to_prgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
758        assert_eq!(src.fourcc(), RGB);
759        assert_eq!(dst.fourcc(), PLANAR_RGBA);
760
761        let src = src.tensor().map()?;
762        let src = src.as_slice();
763        let src = src.as_chunks::<3>().0;
764
765        let mut dst_map = dst.tensor().map()?;
766        let dst_ = dst_map.as_mut_slice();
767
768        let (dst0, dst1) = dst_.split_at_mut(dst.width() * dst.height());
769        let (dst1, dst2) = dst1.split_at_mut(dst.width() * dst.height());
770        let (dst2, dst3) = dst2.split_at_mut(dst.width() * dst.height());
771
772        rayon::scope(|s| {
773            s.spawn(|_| {
774                src.par_iter()
775                    .zip_eq(dst0)
776                    .zip_eq(dst1)
777                    .zip_eq(dst2)
778                    .for_each(|(((s, d0), d1), d2)| {
779                        *d0 = s[0];
780                        *d1 = s[1];
781                        *d2 = s[2];
782                    })
783            });
784            s.spawn(|_| dst3.fill(255));
785        });
786        Ok(())
787    }
788
789    fn convert_rgb_to_yuyv(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
790        assert_eq!(src.fourcc(), RGB);
791        assert_eq!(dst.fourcc(), YUYV);
792
793        let src = src.tensor().map()?;
794        let src = src.as_slice();
795
796        let mut dst = dst.tensor().map()?;
797        let dst = dst.as_mut_slice();
798
799        // compute quantized Bt.709 limited range RGB to YUV matrix
800        const BIAS: i32 = 20;
801        const KR: f64 = 0.2126f64;
802        const KB: f64 = 0.0722f64;
803        const KG: f64 = 1.0 - KR - KB;
804        const Y_R: i32 = (KR * (219 << BIAS) as f64 / 255.0).round() as i32;
805        const Y_G: i32 = (KG * (219 << BIAS) as f64 / 255.0).round() as i32;
806        const Y_B: i32 = (KB * (219 << BIAS) as f64 / 255.0).round() as i32;
807
808        const U_R: i32 = (-KR / (KR + KG) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
809        const U_G: i32 = (-KG / (KR + KG) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
810        const U_B: i32 = (0.5_f64 * (224 << BIAS) as f64 / 255.0).ceil() as i32;
811
812        const V_R: i32 = (0.5_f64 * (224 << BIAS) as f64 / 255.0).ceil() as i32;
813        const V_G: i32 = (-KG / (KG + KB) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
814        const V_B: i32 = (-KB / (KG + KB) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
815        const ROUND: i32 = 1 << (BIAS - 1);
816        const ROUND2: i32 = 1 << BIAS;
817        let process_rgb_to_yuyv = |s: &[u8; 6], d: &mut [u8; 4]| {
818            let [r0, g0, b0, r1, g1, b1] = *s;
819            let r0 = r0 as i32;
820            let g0 = g0 as i32;
821            let b0 = b0 as i32;
822            let r1 = r1 as i32;
823            let g1 = g1 as i32;
824            let b1 = b1 as i32;
825            d[0] = ((Y_R * r0 + Y_G * g0 + Y_B * b0 + ROUND).shr(BIAS) + 16) as u8;
826            d[1] = ((U_R * r0 + U_G * g0 + U_B * b0 + U_R * r1 + U_G * g1 + U_B * b1 + ROUND2)
827                .shr(BIAS + 1)
828                + 128) as u8;
829            d[2] = ((Y_R * r1 + Y_G * g1 + Y_B * b1 + ROUND).shr(BIAS) + 16) as u8;
830            d[3] = ((V_R * r0 + V_G * g0 + V_B * b0 + V_R * r1 + V_G * g1 + V_B * b1 + ROUND2)
831                .shr(BIAS + 1)
832                + 128) as u8;
833        };
834
835        let src = src.as_chunks::<{ 6 * 32 }>();
836        let dst = dst.as_chunks_mut::<{ 4 * 32 }>();
837        for (s, d) in src.0.iter().zip(dst.0.iter_mut()) {
838            let s = s.as_chunks::<6>().0;
839            let d = d.as_chunks_mut::<4>().0;
840            for (s, d) in s.iter().zip(d.iter_mut()) {
841                process_rgb_to_yuyv(s, d);
842            }
843        }
844
845        let s = src.1.as_chunks::<6>().0;
846        let d = dst.1.as_chunks_mut::<4>().0;
847        for (s, d) in s.iter().zip(d.iter_mut()) {
848            process_rgb_to_yuyv(s, d);
849        }
850
851        Ok(())
852    }
853
854    fn convert_rgb_to_nv16(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
855        assert_eq!(src.fourcc(), RGB);
856        assert_eq!(dst.fourcc(), NV16);
857
858        let mut dst_map = dst.tensor().map()?;
859
860        let (y_plane, uv_plane) = dst_map.split_at_mut(dst.width() * dst.height());
861        let mut bi_planar_image = yuv::YuvBiPlanarImageMut::<u8> {
862            y_plane: yuv::BufferStoreMut::Borrowed(y_plane),
863            y_stride: dst.width() as u32,
864            uv_plane: yuv::BufferStoreMut::Borrowed(uv_plane),
865            uv_stride: dst.width() as u32,
866            width: dst.width() as u32,
867            height: dst.height() as u32,
868        };
869
870        Ok(yuv::rgb_to_yuv_nv16(
871            &mut bi_planar_image,
872            src.tensor.map()?.as_slice(),
873            src.row_stride() as u32,
874            yuv::YuvRange::Limited,
875            yuv::YuvStandardMatrix::Bt709,
876            yuv::YuvConversionMode::Balanced,
877        )?)
878    }
879
880    fn copy_image(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
881        assert_eq!(src.fourcc(), dst.fourcc());
882        dst.tensor().map()?.copy_from_slice(&src.tensor().map()?);
883        Ok(())
884    }
885
886    fn convert_nv16_to_rgb(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
887        assert_eq!(src.fourcc(), NV16);
888        assert_eq!(dst.fourcc(), RGB);
889        let map = src.tensor.map()?;
890        let y_stride = src.width() as u32;
891        let uv_stride = src.width() as u32;
892        let slices = map.as_slice().split_at(y_stride as usize * src.height());
893
894        let src = yuv::YuvBiPlanarImage {
895            y_plane: slices.0,
896            y_stride,
897            uv_plane: slices.1,
898            uv_stride,
899            width: src.width() as u32,
900            height: src.height() as u32,
901        };
902
903        Ok(yuv::yuv_nv16_to_rgb(
904            &src,
905            dst.tensor.map()?.as_mut_slice(),
906            dst.row_stride() as u32,
907            yuv::YuvRange::Limited,
908            yuv::YuvStandardMatrix::Bt709,
909            yuv::YuvConversionMode::Balanced,
910        )?)
911    }
912
913    fn convert_nv16_to_rgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
914        assert_eq!(src.fourcc(), NV16);
915        assert_eq!(dst.fourcc(), RGBA);
916        let map = src.tensor.map()?;
917        let y_stride = src.width() as u32;
918        let uv_stride = src.width() as u32;
919        let slices = map.as_slice().split_at(y_stride as usize * src.height());
920
921        let src = yuv::YuvBiPlanarImage {
922            y_plane: slices.0,
923            y_stride,
924            uv_plane: slices.1,
925            uv_stride,
926            width: src.width() as u32,
927            height: src.height() as u32,
928        };
929
930        Ok(yuv::yuv_nv16_to_rgba(
931            &src,
932            dst.tensor.map()?.as_mut_slice(),
933            dst.row_stride() as u32,
934            yuv::YuvRange::Limited,
935            yuv::YuvStandardMatrix::Bt709,
936            yuv::YuvConversionMode::Balanced,
937        )?)
938    }
939
940    fn convert_8bps_to_rgb(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
941        assert_eq!(src.fourcc(), PLANAR_RGB);
942        assert_eq!(dst.fourcc(), RGB);
943
944        let src_map = src.tensor().map()?;
945        let src_ = src_map.as_slice();
946
947        let (src0, src1) = src_.split_at(src.width() * src.height());
948        let (src1, src2) = src1.split_at(src.width() * src.height());
949
950        let mut dst_map = dst.tensor().map()?;
951        let dst_ = dst_map.as_mut_slice();
952
953        src0.par_iter()
954            .zip_eq(src1)
955            .zip_eq(src2)
956            .zip_eq(dst_.as_chunks_mut::<3>().0.par_iter_mut())
957            .for_each(|(((s0, s1), s2), d)| {
958                d[0] = *s0;
959                d[1] = *s1;
960                d[2] = *s2;
961            });
962        Ok(())
963    }
964
965    fn convert_8bps_to_rgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
966        assert_eq!(src.fourcc(), PLANAR_RGB);
967        assert_eq!(dst.fourcc(), RGBA);
968
969        let src_map = src.tensor().map()?;
970        let src_ = src_map.as_slice();
971
972        let (src0, src1) = src_.split_at(src.width() * src.height());
973        let (src1, src2) = src1.split_at(src.width() * src.height());
974
975        let mut dst_map = dst.tensor().map()?;
976        let dst_ = dst_map.as_mut_slice();
977
978        src0.par_iter()
979            .zip_eq(src1)
980            .zip_eq(src2)
981            .zip_eq(dst_.as_chunks_mut::<4>().0.par_iter_mut())
982            .for_each(|(((s0, s1), s2), d)| {
983                d[0] = *s0;
984                d[1] = *s1;
985                d[2] = *s2;
986                d[3] = 255;
987            });
988        Ok(())
989    }
990
991    fn convert_prgba_to_rgb(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
992        assert_eq!(src.fourcc(), PLANAR_RGBA);
993        assert_eq!(dst.fourcc(), RGB);
994
995        let src_map = src.tensor().map()?;
996        let src_ = src_map.as_slice();
997
998        let (src0, src1) = src_.split_at(src.width() * src.height());
999        let (src1, src2) = src1.split_at(src.width() * src.height());
1000        let (src2, _src3) = src2.split_at(src.width() * src.height());
1001
1002        let mut dst_map = dst.tensor().map()?;
1003        let dst_ = dst_map.as_mut_slice();
1004
1005        src0.par_iter()
1006            .zip_eq(src1)
1007            .zip_eq(src2)
1008            .zip_eq(dst_.as_chunks_mut::<3>().0.par_iter_mut())
1009            .for_each(|(((s0, s1), s2), d)| {
1010                d[0] = *s0;
1011                d[1] = *s1;
1012                d[2] = *s2;
1013            });
1014        Ok(())
1015    }
1016
1017    fn convert_prgba_to_rgba(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
1018        assert_eq!(src.fourcc(), PLANAR_RGBA);
1019        assert_eq!(dst.fourcc(), RGBA);
1020
1021        let src_map = src.tensor().map()?;
1022        let src_ = src_map.as_slice();
1023
1024        let (src0, src1) = src_.split_at(src.width() * src.height());
1025        let (src1, src2) = src1.split_at(src.width() * src.height());
1026        let (src2, src3) = src2.split_at(src.width() * src.height());
1027
1028        let mut dst_map = dst.tensor().map()?;
1029        let dst_ = dst_map.as_mut_slice();
1030
1031        src0.par_iter()
1032            .zip_eq(src1)
1033            .zip_eq(src2)
1034            .zip_eq(src3)
1035            .zip_eq(dst_.as_chunks_mut::<4>().0.par_iter_mut())
1036            .for_each(|((((s0, s1), s2), s3), d)| {
1037                d[0] = *s0;
1038                d[1] = *s1;
1039                d[2] = *s2;
1040                d[3] = *s3;
1041            });
1042        Ok(())
1043    }
1044
1045    pub(crate) fn support_conversion(src: FourCharCode, dst: FourCharCode) -> bool {
1046        matches!(
1047            (src, dst),
1048            (NV12, RGB)
1049                | (NV12, RGBA)
1050                | (NV12, GREY)
1051                | (NV16, RGB)
1052                | (NV16, RGBA)
1053                | (YUYV, RGB)
1054                | (YUYV, RGBA)
1055                | (YUYV, GREY)
1056                | (YUYV, YUYV)
1057                | (YUYV, PLANAR_RGB)
1058                | (YUYV, PLANAR_RGBA)
1059                | (YUYV, NV16)
1060                | (VYUY, RGB)
1061                | (VYUY, RGBA)
1062                | (VYUY, GREY)
1063                | (VYUY, VYUY)
1064                | (VYUY, PLANAR_RGB)
1065                | (VYUY, PLANAR_RGBA)
1066                | (VYUY, NV16)
1067                | (RGBA, RGB)
1068                | (RGBA, RGBA)
1069                | (RGBA, GREY)
1070                | (RGBA, YUYV)
1071                | (RGBA, PLANAR_RGB)
1072                | (RGBA, PLANAR_RGBA)
1073                | (RGBA, NV16)
1074                | (RGB, RGB)
1075                | (RGB, RGBA)
1076                | (RGB, GREY)
1077                | (RGB, YUYV)
1078                | (RGB, PLANAR_RGB)
1079                | (RGB, PLANAR_RGBA)
1080                | (RGB, NV16)
1081                | (GREY, RGB)
1082                | (GREY, RGBA)
1083                | (GREY, GREY)
1084                | (GREY, YUYV)
1085                | (GREY, PLANAR_RGB)
1086                | (GREY, PLANAR_RGBA)
1087                | (GREY, NV16)
1088        )
1089    }
1090
1091    pub(crate) fn convert_format(src: &TensorImage, dst: &mut TensorImage) -> Result<()> {
1092        // shapes should be equal
1093        let _timer = FunctionTimer::new(format!(
1094            "ImageProcessor::convert_format {} to {}",
1095            src.fourcc().display(),
1096            dst.fourcc().display()
1097        ));
1098        assert_eq!(src.height(), dst.height());
1099        assert_eq!(src.width(), dst.width());
1100
1101        match (src.fourcc(), dst.fourcc()) {
1102            (NV12, RGB) => Self::convert_nv12_to_rgb(src, dst),
1103            (NV12, RGBA) => Self::convert_nv12_to_rgba(src, dst),
1104            (NV12, GREY) => Self::convert_nv12_to_grey(src, dst),
1105            (YUYV, RGB) => Self::convert_yuyv_to_rgb(src, dst),
1106            (YUYV, RGBA) => Self::convert_yuyv_to_rgba(src, dst),
1107            (YUYV, GREY) => Self::convert_yuyv_to_grey(src, dst),
1108            (YUYV, YUYV) => Self::copy_image(src, dst),
1109            (YUYV, PLANAR_RGB) => Self::convert_yuyv_to_8bps(src, dst),
1110            (YUYV, PLANAR_RGBA) => Self::convert_yuyv_to_prgba(src, dst),
1111            (YUYV, NV16) => Self::convert_yuyv_to_nv16(src, dst),
1112            (VYUY, RGB) => Self::convert_vyuy_to_rgb(src, dst),
1113            (VYUY, RGBA) => Self::convert_vyuy_to_rgba(src, dst),
1114            (VYUY, GREY) => Self::convert_vyuy_to_grey(src, dst),
1115            (VYUY, VYUY) => Self::copy_image(src, dst),
1116            (VYUY, PLANAR_RGB) => Self::convert_vyuy_to_8bps(src, dst),
1117            (VYUY, PLANAR_RGBA) => Self::convert_vyuy_to_prgba(src, dst),
1118            (VYUY, NV16) => Self::convert_vyuy_to_nv16(src, dst),
1119            (RGBA, RGB) => Self::convert_rgba_to_rgb(src, dst),
1120            (RGBA, RGBA) => Self::copy_image(src, dst),
1121            (RGBA, GREY) => Self::convert_rgba_to_grey(src, dst),
1122            (RGBA, YUYV) => Self::convert_rgba_to_yuyv(src, dst),
1123            (RGBA, PLANAR_RGB) => Self::convert_rgba_to_8bps(src, dst),
1124            (RGBA, PLANAR_RGBA) => Self::convert_rgba_to_prgba(src, dst),
1125            (RGBA, NV16) => Self::convert_rgba_to_nv16(src, dst),
1126            (RGB, RGB) => Self::copy_image(src, dst),
1127            (RGB, RGBA) => Self::convert_rgb_to_rgba(src, dst),
1128            (RGB, GREY) => Self::convert_rgb_to_grey(src, dst),
1129            (RGB, YUYV) => Self::convert_rgb_to_yuyv(src, dst),
1130            (RGB, PLANAR_RGB) => Self::convert_rgb_to_8bps(src, dst),
1131            (RGB, PLANAR_RGBA) => Self::convert_rgb_to_prgba(src, dst),
1132            (RGB, NV16) => Self::convert_rgb_to_nv16(src, dst),
1133            (GREY, RGB) => Self::convert_grey_to_rgb(src, dst),
1134            (GREY, RGBA) => Self::convert_grey_to_rgba(src, dst),
1135            (GREY, GREY) => Self::copy_image(src, dst),
1136            (GREY, YUYV) => Self::convert_grey_to_yuyv(src, dst),
1137            (GREY, PLANAR_RGB) => Self::convert_grey_to_8bps(src, dst),
1138            (GREY, PLANAR_RGBA) => Self::convert_grey_to_prgba(src, dst),
1139            (GREY, NV16) => Self::convert_grey_to_nv16(src, dst),
1140
1141            // the following converts are added for use in testing
1142            (NV16, RGB) => Self::convert_nv16_to_rgb(src, dst),
1143            (NV16, RGBA) => Self::convert_nv16_to_rgba(src, dst),
1144            (PLANAR_RGB, RGB) => Self::convert_8bps_to_rgb(src, dst),
1145            (PLANAR_RGB, RGBA) => Self::convert_8bps_to_rgba(src, dst),
1146            (PLANAR_RGBA, RGB) => Self::convert_prgba_to_rgb(src, dst),
1147            (PLANAR_RGBA, RGBA) => Self::convert_prgba_to_rgba(src, dst),
1148            (s, d) => Err(Error::NotSupported(format!(
1149                "Conversion from {} to {}",
1150                s.display(),
1151                d.display()
1152            ))),
1153        }
1154    }
1155
1156    /// Generic RGB to PLANAR_RGB conversion that works with any TensorImageDst.
1157    fn convert_rgb_to_planar_rgb_generic<D: TensorImageDst>(
1158        src: &TensorImage,
1159        dst: &mut D,
1160    ) -> Result<()> {
1161        assert_eq!(src.fourcc(), RGB);
1162        assert_eq!(dst.fourcc(), PLANAR_RGB);
1163
1164        let src = src.tensor().map()?;
1165        let src = src.as_slice();
1166        let src = src.as_chunks::<3>().0;
1167
1168        let mut dst_map = dst.tensor_mut().map()?;
1169        let dst_ = dst_map.as_mut_slice();
1170
1171        let (dst0, dst1) = dst_.split_at_mut(dst.width() * dst.height());
1172        let (dst1, dst2) = dst1.split_at_mut(dst.width() * dst.height());
1173
1174        src.par_iter()
1175            .zip_eq(dst0)
1176            .zip_eq(dst1)
1177            .zip_eq(dst2)
1178            .for_each(|(((s, d0), d1), d2)| {
1179                *d0 = s[0];
1180                *d1 = s[1];
1181                *d2 = s[2];
1182            });
1183        Ok(())
1184    }
1185
1186    /// Generic RGBA to PLANAR_RGB conversion that works with any
1187    /// TensorImageDst.
1188    fn convert_rgba_to_planar_rgb_generic<D: TensorImageDst>(
1189        src: &TensorImage,
1190        dst: &mut D,
1191    ) -> Result<()> {
1192        assert_eq!(src.fourcc(), RGBA);
1193        assert_eq!(dst.fourcc(), PLANAR_RGB);
1194
1195        let src = src.tensor().map()?;
1196        let src = src.as_slice();
1197        let src = src.as_chunks::<4>().0;
1198
1199        let mut dst_map = dst.tensor_mut().map()?;
1200        let dst_ = dst_map.as_mut_slice();
1201
1202        let (dst0, dst1) = dst_.split_at_mut(dst.width() * dst.height());
1203        let (dst1, dst2) = dst1.split_at_mut(dst.width() * dst.height());
1204
1205        src.par_iter()
1206            .zip_eq(dst0)
1207            .zip_eq(dst1)
1208            .zip_eq(dst2)
1209            .for_each(|(((s, d0), d1), d2)| {
1210                *d0 = s[0];
1211                *d1 = s[1];
1212                *d2 = s[2];
1213            });
1214        Ok(())
1215    }
1216
1217    /// Generic copy for same-format images that works with any TensorImageDst.
1218    fn copy_image_generic<D: TensorImageDst>(src: &TensorImage, dst: &mut D) -> Result<()> {
1219        assert_eq!(src.fourcc(), dst.fourcc());
1220        dst.tensor_mut()
1221            .map()?
1222            .copy_from_slice(&src.tensor().map()?);
1223        Ok(())
1224    }
1225
1226    /// Format conversion that writes to a generic TensorImageDst.
1227    /// Supports common zero-copy preprocessing cases.
1228    pub(crate) fn convert_format_generic<D: TensorImageDst>(
1229        src: &TensorImage,
1230        dst: &mut D,
1231    ) -> Result<()> {
1232        let _timer = FunctionTimer::new(format!(
1233            "ImageProcessor::convert_format_generic {} to {}",
1234            src.fourcc().display(),
1235            dst.fourcc().display()
1236        ));
1237        assert_eq!(src.height(), dst.height());
1238        assert_eq!(src.width(), dst.width());
1239
1240        match (src.fourcc(), dst.fourcc()) {
1241            (RGB, PLANAR_RGB) => Self::convert_rgb_to_planar_rgb_generic(src, dst),
1242            (RGBA, PLANAR_RGB) => Self::convert_rgba_to_planar_rgb_generic(src, dst),
1243            (f1, f2) if f1 == f2 => Self::copy_image_generic(src, dst),
1244            (s, d) => Err(Error::NotSupported(format!(
1245                "Generic conversion from {} to {} not supported",
1246                s.display(),
1247                d.display()
1248            ))),
1249        }
1250    }
1251
1252    /// The src and dest img should be in RGB/RGBA/grey format for correct
1253    /// output. If the format is not 1, 3, or 4 bits per pixel, and error will
1254    /// be returned. The src and dest img must have the same fourcc,
1255    /// otherwise the function will panic.
1256    fn resize_flip_rotate(
1257        &mut self,
1258        src: &TensorImage,
1259        dst: &mut TensorImage,
1260        rotation: Rotation,
1261        flip: Flip,
1262        crop: Crop,
1263    ) -> Result<()> {
1264        let _timer = FunctionTimer::new(format!(
1265            "ImageProcessor::resize_flip_rotate {}x{} to {}x{} {}",
1266            src.width(),
1267            src.height(),
1268            dst.width(),
1269            dst.height(),
1270            dst.fourcc().display()
1271        ));
1272        assert_eq!(src.fourcc(), dst.fourcc());
1273
1274        let src_type = match src.channels() {
1275            1 => fast_image_resize::PixelType::U8,
1276            3 => fast_image_resize::PixelType::U8x3,
1277            4 => fast_image_resize::PixelType::U8x4,
1278            _ => {
1279                return Err(Error::NotImplemented(
1280                    "Unsupported source image format".to_string(),
1281                ));
1282            }
1283        };
1284
1285        let mut src_map = src.tensor().map()?;
1286
1287        let mut dst_map = dst.tensor().map()?;
1288
1289        let options = if let Some(crop) = crop.src_rect {
1290            self.options.crop(
1291                crop.left as f64,
1292                crop.top as f64,
1293                crop.width as f64,
1294                crop.height as f64,
1295            )
1296        } else {
1297            self.options
1298        };
1299
1300        let mut dst_rect = crop.dst_rect.unwrap_or_else(|| Rect {
1301            left: 0,
1302            top: 0,
1303            width: dst.width(),
1304            height: dst.height(),
1305        });
1306
1307        // adjust crop box for rotation/flip
1308        Self::adjust_dest_rect_for_rotate_flip(&mut dst_rect, dst, rotation, flip);
1309
1310        let needs_resize = src.width() != dst.width()
1311            || src.height() != dst.height()
1312            || crop.src_rect.is_some_and(|crop| {
1313                crop != Rect {
1314                    left: 0,
1315                    top: 0,
1316                    width: src.width(),
1317                    height: src.height(),
1318                }
1319            })
1320            || crop.dst_rect.is_some_and(|crop| {
1321                crop != Rect {
1322                    left: 0,
1323                    top: 0,
1324                    width: dst.width(),
1325                    height: dst.height(),
1326                }
1327            });
1328
1329        if needs_resize {
1330            let src_view = fast_image_resize::images::Image::from_slice_u8(
1331                src.width() as u32,
1332                src.height() as u32,
1333                &mut src_map,
1334                src_type,
1335            )?;
1336
1337            match (rotation, flip) {
1338                (Rotation::None, Flip::None) => {
1339                    let mut dst_view = fast_image_resize::images::Image::from_slice_u8(
1340                        dst.width() as u32,
1341                        dst.height() as u32,
1342                        &mut dst_map,
1343                        src_type,
1344                    )?;
1345
1346                    let mut dst_view = fast_image_resize::images::CroppedImageMut::new(
1347                        &mut dst_view,
1348                        dst_rect.left as u32,
1349                        dst_rect.top as u32,
1350                        dst_rect.width as u32,
1351                        dst_rect.height as u32,
1352                    )?;
1353
1354                    self.resizer.resize(&src_view, &mut dst_view, &options)?;
1355                }
1356                (Rotation::Clockwise90, _) | (Rotation::CounterClockwise90, _) => {
1357                    let mut tmp = vec![0; dst.row_stride() * dst.height()];
1358                    let mut tmp_view = fast_image_resize::images::Image::from_slice_u8(
1359                        dst.height() as u32,
1360                        dst.width() as u32,
1361                        &mut tmp,
1362                        src_type,
1363                    )?;
1364
1365                    let mut tmp_view = fast_image_resize::images::CroppedImageMut::new(
1366                        &mut tmp_view,
1367                        dst_rect.left as u32,
1368                        dst_rect.top as u32,
1369                        dst_rect.width as u32,
1370                        dst_rect.height as u32,
1371                    )?;
1372
1373                    self.resizer.resize(&src_view, &mut tmp_view, &options)?;
1374                    Self::flip_rotate_ndarray(&tmp, &mut dst_map, dst, rotation, flip)?;
1375                }
1376                (Rotation::None, _) | (Rotation::Rotate180, _) => {
1377                    let mut tmp = vec![0; dst.row_stride() * dst.height()];
1378                    let mut tmp_view = fast_image_resize::images::Image::from_slice_u8(
1379                        dst.width() as u32,
1380                        dst.height() as u32,
1381                        &mut tmp,
1382                        src_type,
1383                    )?;
1384
1385                    let mut tmp_view = fast_image_resize::images::CroppedImageMut::new(
1386                        &mut tmp_view,
1387                        dst_rect.left as u32,
1388                        dst_rect.top as u32,
1389                        dst_rect.width as u32,
1390                        dst_rect.height as u32,
1391                    )?;
1392
1393                    self.resizer.resize(&src_view, &mut tmp_view, &options)?;
1394                    Self::flip_rotate_ndarray(&tmp, &mut dst_map, dst, rotation, flip)?;
1395                }
1396            }
1397        } else {
1398            Self::flip_rotate_ndarray(&src_map, &mut dst_map, dst, rotation, flip)?;
1399        }
1400        Ok(())
1401    }
1402
1403    fn adjust_dest_rect_for_rotate_flip(
1404        crop: &mut Rect,
1405        dst: &TensorImage,
1406        rot: Rotation,
1407        flip: Flip,
1408    ) {
1409        match rot {
1410            Rotation::None => {}
1411            Rotation::Clockwise90 => {
1412                *crop = Rect {
1413                    left: crop.top,
1414                    top: dst.width() - crop.left - crop.width,
1415                    width: crop.height,
1416                    height: crop.width,
1417                }
1418            }
1419            Rotation::Rotate180 => {
1420                *crop = Rect {
1421                    left: dst.width() - crop.left - crop.width,
1422                    top: dst.height() - crop.top - crop.height,
1423                    width: crop.width,
1424                    height: crop.height,
1425                }
1426            }
1427            Rotation::CounterClockwise90 => {
1428                *crop = Rect {
1429                    left: dst.height() - crop.top - crop.height,
1430                    top: crop.left,
1431                    width: crop.height,
1432                    height: crop.width,
1433                }
1434            }
1435        }
1436
1437        match flip {
1438            Flip::None => {}
1439            Flip::Vertical => crop.top = dst.height() - crop.top - crop.height,
1440            Flip::Horizontal => crop.left = dst.width() - crop.left - crop.width,
1441        }
1442    }
1443
1444    /// Fills the area outside a crop rectangle with the specified color.
1445    pub fn fill_image_outside_crop(dst: &mut TensorImage, rgba: [u8; 4], crop: Rect) -> Result<()> {
1446        let dst_fourcc = dst.fourcc();
1447        let mut dst_map = dst.tensor().map()?;
1448        let dst = (dst_map.as_mut_slice(), dst.width(), dst.height());
1449        match dst_fourcc {
1450            RGBA => Self::fill_image_outside_crop_(dst, rgba, crop),
1451            RGB => Self::fill_image_outside_crop_(dst, Self::rgba_to_rgb(rgba), crop),
1452            GREY => Self::fill_image_outside_crop_(dst, Self::rgba_to_grey(rgba), crop),
1453            YUYV => Self::fill_image_outside_crop_(
1454                (dst.0, dst.1 / 2, dst.2),
1455                Self::rgba_to_yuyv(rgba),
1456                Rect::new(crop.left / 2, crop.top, crop.width.div_ceil(2), crop.height),
1457            ),
1458            PLANAR_RGB => Self::fill_image_outside_crop_planar(dst, Self::rgba_to_rgb(rgba), crop),
1459            PLANAR_RGBA => Self::fill_image_outside_crop_planar(dst, rgba, crop),
1460            NV16 => {
1461                let yuyv = Self::rgba_to_yuyv(rgba);
1462                Self::fill_image_outside_crop_yuv_semiplanar(dst, yuyv[0], [yuyv[1], yuyv[3]], crop)
1463            }
1464            _ => Err(Error::Internal(format!(
1465                "Found unexpected destination {}",
1466                dst_fourcc.display()
1467            ))),
1468        }
1469    }
1470
1471    /// Generic fill for TensorImageDst types.
1472    pub(crate) fn fill_image_outside_crop_generic<D: TensorImageDst>(
1473        dst: &mut D,
1474        rgba: [u8; 4],
1475        crop: Rect,
1476    ) -> Result<()> {
1477        let dst_fourcc = dst.fourcc();
1478        let dst_width = dst.width();
1479        let dst_height = dst.height();
1480        let mut dst_map = dst.tensor_mut().map()?;
1481        let dst = (dst_map.as_mut_slice(), dst_width, dst_height);
1482        match dst_fourcc {
1483            RGBA => Self::fill_image_outside_crop_(dst, rgba, crop),
1484            RGB => Self::fill_image_outside_crop_(dst, Self::rgba_to_rgb(rgba), crop),
1485            GREY => Self::fill_image_outside_crop_(dst, Self::rgba_to_grey(rgba), crop),
1486            YUYV => Self::fill_image_outside_crop_(
1487                (dst.0, dst.1 / 2, dst.2),
1488                Self::rgba_to_yuyv(rgba),
1489                Rect::new(crop.left / 2, crop.top, crop.width.div_ceil(2), crop.height),
1490            ),
1491            PLANAR_RGB => Self::fill_image_outside_crop_planar(dst, Self::rgba_to_rgb(rgba), crop),
1492            PLANAR_RGBA => Self::fill_image_outside_crop_planar(dst, rgba, crop),
1493            NV16 => {
1494                let yuyv = Self::rgba_to_yuyv(rgba);
1495                Self::fill_image_outside_crop_yuv_semiplanar(dst, yuyv[0], [yuyv[1], yuyv[3]], crop)
1496            }
1497            _ => Err(Error::Internal(format!(
1498                "Found unexpected destination {}",
1499                dst_fourcc.display()
1500            ))),
1501        }
1502    }
1503
1504    fn fill_image_outside_crop_<const N: usize>(
1505        (dst, dst_width, _dst_height): (&mut [u8], usize, usize),
1506        pix: [u8; N],
1507        crop: Rect,
1508    ) -> Result<()> {
1509        use rayon::{
1510            iter::{IntoParallelRefMutIterator, ParallelIterator},
1511            prelude::ParallelSliceMut,
1512        };
1513
1514        let s = dst.as_chunks_mut::<N>().0;
1515        // calculate the top/bottom
1516        let top_offset = (0, (crop.top * dst_width + crop.left));
1517        let bottom_offset = (
1518            ((crop.top + crop.height) * dst_width + crop.left).min(s.len()),
1519            s.len(),
1520        );
1521
1522        s[top_offset.0..top_offset.1]
1523            .par_iter_mut()
1524            .for_each(|x| *x = pix);
1525
1526        s[bottom_offset.0..bottom_offset.1]
1527            .par_iter_mut()
1528            .for_each(|x| *x = pix);
1529
1530        if dst_width == crop.width {
1531            return Ok(());
1532        }
1533
1534        // the middle part has a stride as well
1535        let middle_stride = dst_width - crop.width;
1536        let middle_offset = (
1537            (crop.top * dst_width + crop.left + crop.width),
1538            ((crop.top + crop.height) * dst_width + crop.left + crop.width).min(s.len()),
1539        );
1540
1541        s[middle_offset.0..middle_offset.1]
1542            .par_chunks_exact_mut(dst_width)
1543            .for_each(|row| {
1544                for p in &mut row[0..middle_stride] {
1545                    *p = pix;
1546                }
1547            });
1548
1549        Ok(())
1550    }
1551
1552    fn fill_image_outside_crop_planar<const N: usize>(
1553        (dst, dst_width, dst_height): (&mut [u8], usize, usize),
1554        pix: [u8; N],
1555        crop: Rect,
1556    ) -> Result<()> {
1557        use rayon::{
1558            iter::{IntoParallelRefMutIterator, ParallelIterator},
1559            prelude::ParallelSliceMut,
1560        };
1561
1562        // map.as_mut_slice().splitn_mut(n, pred)
1563        let s_rem = dst;
1564
1565        s_rem
1566            .par_chunks_exact_mut(dst_height * dst_width)
1567            .zip(pix)
1568            .for_each(|(s, p)| {
1569                let top_offset = (0, (crop.top * dst_width + crop.left));
1570                let bottom_offset = (
1571                    ((crop.top + crop.height) * dst_width + crop.left).min(s.len()),
1572                    s.len(),
1573                );
1574
1575                s[top_offset.0..top_offset.1]
1576                    .par_iter_mut()
1577                    .for_each(|x| *x = p);
1578
1579                s[bottom_offset.0..bottom_offset.1]
1580                    .par_iter_mut()
1581                    .for_each(|x| *x = p);
1582
1583                if dst_width == crop.width {
1584                    return;
1585                }
1586
1587                // the middle part has a stride as well
1588                let middle_stride = dst_width - crop.width;
1589                let middle_offset = (
1590                    (crop.top * dst_width + crop.left + crop.width),
1591                    ((crop.top + crop.height) * dst_width + crop.left + crop.width).min(s.len()),
1592                );
1593
1594                s[middle_offset.0..middle_offset.1]
1595                    .par_chunks_exact_mut(dst_width)
1596                    .for_each(|row| {
1597                        for x in &mut row[0..middle_stride] {
1598                            *x = p;
1599                        }
1600                    });
1601            });
1602        Ok(())
1603    }
1604
1605    fn fill_image_outside_crop_yuv_semiplanar(
1606        (dst, dst_width, dst_height): (&mut [u8], usize, usize),
1607        y: u8,
1608        uv: [u8; 2],
1609        mut crop: Rect,
1610    ) -> Result<()> {
1611        let (y_plane, uv_plane) = dst.split_at_mut(dst_width * dst_height);
1612        Self::fill_image_outside_crop_::<1>((y_plane, dst_width, dst_height), [y], crop)?;
1613        crop.left /= 2;
1614        crop.width /= 2;
1615        Self::fill_image_outside_crop_::<2>((uv_plane, dst_width / 2, dst_height), uv, crop)?;
1616        Ok(())
1617    }
1618
1619    fn rgba_to_rgb(rgba: [u8; 4]) -> [u8; 3] {
1620        let [r, g, b, _] = rgba;
1621        [r, g, b]
1622    }
1623
1624    fn rgba_to_grey(rgba: [u8; 4]) -> [u8; 1] {
1625        const BIAS: i32 = 20;
1626        const KR: f64 = 0.2126f64;
1627        const KB: f64 = 0.0722f64;
1628        const KG: f64 = 1.0 - KR - KB;
1629        const Y_R: i32 = (KR * (255 << BIAS) as f64 / 255.0).round() as i32;
1630        const Y_G: i32 = (KG * (255 << BIAS) as f64 / 255.0).round() as i32;
1631        const Y_B: i32 = (KB * (255 << BIAS) as f64 / 255.0).round() as i32;
1632
1633        const ROUND: i32 = 1 << (BIAS - 1);
1634
1635        let [r, g, b, _] = rgba;
1636        let y = ((Y_R * r as i32 + Y_G * g as i32 + Y_B * b as i32 + ROUND) >> BIAS) as u8;
1637        [y]
1638    }
1639
1640    fn rgba_to_yuyv(rgba: [u8; 4]) -> [u8; 4] {
1641        const KR: f64 = 0.2126f64;
1642        const KB: f64 = 0.0722f64;
1643        const KG: f64 = 1.0 - KR - KB;
1644        const BIAS: i32 = 20;
1645
1646        const Y_R: i32 = (KR * (219 << BIAS) as f64 / 255.0).round() as i32;
1647        const Y_G: i32 = (KG * (219 << BIAS) as f64 / 255.0).round() as i32;
1648        const Y_B: i32 = (KB * (219 << BIAS) as f64 / 255.0).round() as i32;
1649
1650        const U_R: i32 = (-KR / (KR + KG) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
1651        const U_G: i32 = (-KG / (KR + KG) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
1652        const U_B: i32 = (0.5_f64 * (224 << BIAS) as f64 / 255.0).ceil() as i32;
1653
1654        const V_R: i32 = (0.5_f64 * (224 << BIAS) as f64 / 255.0).ceil() as i32;
1655        const V_G: i32 = (-KG / (KG + KB) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
1656        const V_B: i32 = (-KB / (KG + KB) / 2.0 * (224 << BIAS) as f64 / 255.0).round() as i32;
1657        const ROUND: i32 = 1 << (BIAS - 1);
1658
1659        let [r, g, b, _] = rgba;
1660        let r = r as i32;
1661        let g = g as i32;
1662        let b = b as i32;
1663        let y = (((Y_R * r + Y_G * g + Y_B * b + ROUND) >> BIAS) + 16) as u8;
1664        let u = (((U_R * r + U_G * g + U_B * b + ROUND) >> BIAS) + 128) as u8;
1665        let v = (((V_R * r + V_G * g + V_B * b + ROUND) >> BIAS) + 128) as u8;
1666
1667        [y, u, y, v]
1668    }
1669
1670    fn render_modelpack_segmentation(
1671        &mut self,
1672        dst: &TensorImage,
1673        dst_slice: &mut [u8],
1674        segmentation: &Segmentation,
1675    ) -> Result<()> {
1676        use ndarray_stats::QuantileExt;
1677
1678        let seg = &segmentation.segmentation;
1679        let [seg_height, seg_width, seg_classes] = *seg.shape() else {
1680            unreachable!("Array3 did not have [usize; 3] as shape");
1681        };
1682        let start_y = (dst.height() as f32 * segmentation.ymin).round();
1683        let end_y = (dst.height() as f32 * segmentation.ymax).round();
1684        let start_x = (dst.width() as f32 * segmentation.xmin).round();
1685        let end_x = (dst.width() as f32 * segmentation.xmax).round();
1686
1687        let scale_x = (seg_width as f32 - 1.0) / ((end_x - start_x) - 1.0);
1688        let scale_y = (seg_height as f32 - 1.0) / ((end_y - start_y) - 1.0);
1689
1690        let start_x_u = (start_x as usize).min(dst.width());
1691        let start_y_u = (start_y as usize).min(dst.height());
1692        let end_x_u = (end_x as usize).min(dst.width());
1693        let end_y_u = (end_y as usize).min(dst.height());
1694
1695        let argmax = seg.map_axis(Axis(2), |r| r.argmax().unwrap());
1696        let get_value_at_nearest = |x: f32, y: f32| -> usize {
1697            let x = x.round() as usize;
1698            let y = y.round() as usize;
1699            argmax
1700                .get([y.min(seg_height - 1), x.min(seg_width - 1)])
1701                .copied()
1702                .unwrap_or(0)
1703        };
1704
1705        for y in start_y_u..end_y_u {
1706            for x in start_x_u..end_x_u {
1707                let seg_x = (x as f32 - start_x) * scale_x;
1708                let seg_y = (y as f32 - start_y) * scale_y;
1709                let label = get_value_at_nearest(seg_x, seg_y);
1710
1711                if label == seg_classes - 1 {
1712                    continue;
1713                }
1714
1715                let color = self.colors[label % self.colors.len()];
1716
1717                let alpha = color[3] as u16;
1718
1719                let dst_index = (y * dst.row_stride()) + (x * dst.channels());
1720                for c in 0..3 {
1721                    dst_slice[dst_index + c] = ((color[c] as u16 * alpha
1722                        + dst_slice[dst_index + c] as u16 * (255 - alpha))
1723                        / 255) as u8;
1724                }
1725            }
1726        }
1727
1728        Ok(())
1729    }
1730
1731    fn render_yolo_segmentation(
1732        &mut self,
1733        dst: &TensorImage,
1734        dst_slice: &mut [u8],
1735        segmentation: &Segmentation,
1736        class: usize,
1737    ) -> Result<()> {
1738        let seg = &segmentation.segmentation;
1739        let [seg_height, seg_width, classes] = *seg.shape() else {
1740            unreachable!("Array3 did not have [usize;3] as shape");
1741        };
1742        debug_assert_eq!(classes, 1);
1743
1744        let start_y = (dst.height() as f32 * segmentation.ymin).round();
1745        let end_y = (dst.height() as f32 * segmentation.ymax).round();
1746        let start_x = (dst.width() as f32 * segmentation.xmin).round();
1747        let end_x = (dst.width() as f32 * segmentation.xmax).round();
1748
1749        let scale_x = (seg_width as f32 - 1.0) / ((end_x - start_x) - 1.0);
1750        let scale_y = (seg_height as f32 - 1.0) / ((end_y - start_y) - 1.0);
1751
1752        let start_x_u = (start_x as usize).min(dst.width());
1753        let start_y_u = (start_y as usize).min(dst.height());
1754        let end_x_u = (end_x as usize).min(dst.width());
1755        let end_y_u = (end_y as usize).min(dst.height());
1756
1757        for y in start_y_u..end_y_u {
1758            for x in start_x_u..end_x_u {
1759                let seg_x = ((x as f32 - start_x) * scale_x) as usize;
1760                let seg_y = ((y as f32 - start_y) * scale_y) as usize;
1761                let val = *seg.get([seg_y, seg_x, 0]).unwrap_or(&0);
1762
1763                if val < 127 {
1764                    continue;
1765                }
1766
1767                let color = self.colors[class % self.colors.len()];
1768
1769                let alpha = color[3] as u16;
1770
1771                let dst_index = (y * dst.row_stride()) + (x * dst.channels());
1772                for c in 0..3 {
1773                    dst_slice[dst_index + c] = ((color[c] as u16 * alpha
1774                        + dst_slice[dst_index + c] as u16 * (255 - alpha))
1775                        / 255) as u8;
1776                }
1777            }
1778        }
1779
1780        Ok(())
1781    }
1782
1783    fn render_box(
1784        &mut self,
1785        dst: &TensorImage,
1786        dst_slice: &mut [u8],
1787        detect: &[DetectBox],
1788    ) -> Result<()> {
1789        const LINE_THICKNESS: usize = 3;
1790        for d in detect {
1791            use edgefirst_decoder::BoundingBox;
1792
1793            let label = d.label;
1794            let [r, g, b, _] = self.colors[label % self.colors.len()];
1795            let bbox = d.bbox.to_canonical();
1796            let bbox = BoundingBox {
1797                xmin: bbox.xmin.clamp(0.0, 1.0),
1798                ymin: bbox.ymin.clamp(0.0, 1.0),
1799                xmax: bbox.xmax.clamp(0.0, 1.0),
1800                ymax: bbox.ymax.clamp(0.0, 1.0),
1801            };
1802            let inner = [
1803                ((dst.width() - 1) as f32 * bbox.xmin - 0.5).round() as usize,
1804                ((dst.height() - 1) as f32 * bbox.ymin - 0.5).round() as usize,
1805                ((dst.width() - 1) as f32 * bbox.xmax + 0.5).round() as usize,
1806                ((dst.height() - 1) as f32 * bbox.ymax + 0.5).round() as usize,
1807            ];
1808
1809            let outer = [
1810                inner[0].saturating_sub(LINE_THICKNESS),
1811                inner[1].saturating_sub(LINE_THICKNESS),
1812                (inner[2] + LINE_THICKNESS).min(dst.width()),
1813                (inner[3] + LINE_THICKNESS).min(dst.height()),
1814            ];
1815
1816            // top line
1817            for y in outer[1] + 1..=inner[1] {
1818                for x in outer[0] + 1..outer[2] {
1819                    let index = (y * dst.row_stride()) + (x * dst.channels());
1820                    dst_slice[index..(index + 3)].copy_from_slice(&[r, g, b]);
1821                }
1822            }
1823
1824            // left and right lines
1825            for y in inner[1]..inner[3] {
1826                for x in outer[0] + 1..=inner[0] {
1827                    let index = (y * dst.row_stride()) + (x * dst.channels());
1828                    dst_slice[index..(index + 3)].copy_from_slice(&[r, g, b]);
1829                }
1830
1831                for x in inner[2]..outer[2] {
1832                    let index = (y * dst.row_stride()) + (x * dst.channels());
1833                    dst_slice[index..(index + 3)].copy_from_slice(&[r, g, b]);
1834                }
1835            }
1836
1837            // bottom line
1838            for y in inner[3]..outer[3] {
1839                for x in outer[0] + 1..outer[2] {
1840                    let index = (y * dst.row_stride()) + (x * dst.channels());
1841                    dst_slice[index..(index + 3)].copy_from_slice(&[r, g, b]);
1842                }
1843            }
1844        }
1845        Ok(())
1846    }
1847
1848    /// Renders per-instance grayscale masks from raw prototype data at full
1849    /// output resolution. Used internally by [`decode_masks_atlas`] to generate
1850    /// per-detection mask crops that are then packed into the atlas.
1851    fn render_masks_from_protos(
1852        &mut self,
1853        detect: &[crate::DetectBox],
1854        proto_data: crate::ProtoData,
1855        output_width: usize,
1856        output_height: usize,
1857    ) -> Result<Vec<crate::MaskResult>> {
1858        use crate::FunctionTimer;
1859
1860        let _timer = FunctionTimer::new("CPUProcessor::render_masks_from_protos");
1861
1862        if detect.is_empty() || proto_data.mask_coefficients.is_empty() {
1863            return Ok(Vec::new());
1864        }
1865
1866        let protos_cow = proto_data.protos.as_f32();
1867        let protos = protos_cow.as_ref();
1868        let proto_h = protos.shape()[0];
1869        let proto_w = protos.shape()[1];
1870        let num_protos = protos.shape()[2];
1871
1872        let mut results = Vec::with_capacity(detect.len());
1873
1874        for (det, coeff) in detect.iter().zip(proto_data.mask_coefficients.iter()) {
1875            let start_x = (output_width as f32 * det.bbox.xmin).round() as usize;
1876            let start_y = (output_height as f32 * det.bbox.ymin).round() as usize;
1877            // Use span-based rounding to match the numpy reference convention.
1878            let bbox_w = ((det.bbox.xmax - det.bbox.xmin) * output_width as f32)
1879                .round()
1880                .max(1.0) as usize;
1881            let bbox_h = ((det.bbox.ymax - det.bbox.ymin) * output_height as f32)
1882                .round()
1883                .max(1.0) as usize;
1884            let bbox_w = bbox_w.min(output_width.saturating_sub(start_x));
1885            let bbox_h = bbox_h.min(output_height.saturating_sub(start_y));
1886
1887            let mut pixels = vec![0u8; bbox_w * bbox_h];
1888
1889            for row in 0..bbox_h {
1890                let y = start_y + row;
1891                for col in 0..bbox_w {
1892                    let x = start_x + col;
1893                    let px = (x as f32 / output_width as f32) * proto_w as f32 - 0.5;
1894                    let py = (y as f32 / output_height as f32) * proto_h as f32 - 0.5;
1895                    let acc = bilinear_dot(protos, coeff, num_protos, px, py, proto_w, proto_h);
1896                    let mask = 1.0 / (1.0 + (-acc).exp());
1897                    pixels[row * bbox_w + col] = if mask > 0.5 { 255 } else { 0 };
1898                }
1899            }
1900
1901            results.push(crate::MaskResult {
1902                x: start_x,
1903                y: start_y,
1904                w: bbox_w,
1905                h: bbox_h,
1906                pixels,
1907            });
1908        }
1909
1910        Ok(results)
1911    }
1912}
1913
1914impl ImageProcessorTrait for CPUProcessor {
1915    fn convert(
1916        &mut self,
1917        src: &TensorImage,
1918        dst: &mut TensorImage,
1919        rotation: Rotation,
1920        flip: Flip,
1921        crop: Crop,
1922    ) -> Result<()> {
1923        // Int8 formats: convert directly into dst as uint8 (layouts are
1924        // identical), then XOR 0x80 in-place. Avoids a temporary allocation.
1925        if fourcc_is_int8(dst.fourcc()) {
1926            let int8_fourcc = dst.fourcc();
1927            dst.set_fourcc(fourcc_uint8_equivalent(int8_fourcc));
1928            self.convert(src, dst, rotation, flip, crop)?;
1929            dst.set_fourcc(int8_fourcc);
1930            let mut dst_map = dst.tensor().map()?;
1931            for byte in dst_map.iter_mut() {
1932                *byte ^= 0x80;
1933            }
1934            return Ok(());
1935        }
1936
1937        crop.check_crop(src, dst)?;
1938        // supported destinations and srcs:
1939        let intermediate = match (src.fourcc(), dst.fourcc()) {
1940            (NV12, RGB) => RGB,
1941            (NV12, RGBA) => RGBA,
1942            (NV12, GREY) => GREY,
1943            (NV12, YUYV) => RGBA, // RGBA intermediary for YUYV dest resize/convert/rotation/flip
1944            (NV12, NV16) => RGBA, // RGBA intermediary for YUYV dest resize/convert/rotation/flip
1945            (NV12, PLANAR_RGB) => RGB,
1946            (NV12, PLANAR_RGBA) => RGBA,
1947            (YUYV, RGB) => RGB,
1948            (YUYV, RGBA) => RGBA,
1949            (YUYV, GREY) => GREY,
1950            (YUYV, YUYV) => RGBA, // RGBA intermediary for YUYV dest resize/convert/rotation/flip
1951            (YUYV, PLANAR_RGB) => RGB,
1952            (YUYV, PLANAR_RGBA) => RGBA,
1953            (YUYV, NV16) => RGBA,
1954            (VYUY, RGB) => RGB,
1955            (VYUY, RGBA) => RGBA,
1956            (VYUY, GREY) => GREY,
1957            (VYUY, VYUY) => RGBA, // RGBA intermediary for VYUY dest resize/convert/rotation/flip
1958            (VYUY, PLANAR_RGB) => RGB,
1959            (VYUY, PLANAR_RGBA) => RGBA,
1960            (VYUY, NV16) => RGBA,
1961            (RGBA, RGB) => RGBA,
1962            (RGBA, RGBA) => RGBA,
1963            (RGBA, GREY) => GREY,
1964            (RGBA, YUYV) => RGBA, // RGBA intermediary for YUYV dest resize/convert/rotation/flip
1965            (RGBA, PLANAR_RGB) => RGBA,
1966            (RGBA, PLANAR_RGBA) => RGBA,
1967            (RGBA, NV16) => RGBA,
1968            (RGB, RGB) => RGB,
1969            (RGB, RGBA) => RGB,
1970            (RGB, GREY) => GREY,
1971            (RGB, YUYV) => RGB, // RGB intermediary for YUYV dest resize/convert/rotation/flip
1972            (RGB, PLANAR_RGB) => RGB,
1973            (RGB, PLANAR_RGBA) => RGB,
1974            (RGB, NV16) => RGB,
1975            (GREY, RGB) => RGB,
1976            (GREY, RGBA) => RGBA,
1977            (GREY, GREY) => GREY,
1978            (GREY, YUYV) => GREY,
1979            (GREY, PLANAR_RGB) => GREY,
1980            (GREY, PLANAR_RGBA) => GREY,
1981            (GREY, NV16) => GREY,
1982            (s, d) => {
1983                return Err(Error::NotSupported(format!(
1984                    "Conversion from {} to {}",
1985                    s.display(),
1986                    d.display()
1987                )));
1988            }
1989        };
1990
1991        // let crop = crop.src_rect;
1992
1993        let need_resize_flip_rotation = rotation != Rotation::None
1994            || flip != Flip::None
1995            || src.width() != dst.width()
1996            || src.height() != dst.height()
1997            || crop.src_rect.is_some_and(|crop| {
1998                crop != Rect {
1999                    left: 0,
2000                    top: 0,
2001                    width: src.width(),
2002                    height: src.height(),
2003                }
2004            })
2005            || crop.dst_rect.is_some_and(|crop| {
2006                crop != Rect {
2007                    left: 0,
2008                    top: 0,
2009                    width: dst.width(),
2010                    height: dst.height(),
2011                }
2012            });
2013
2014        // check if a direct conversion can be done
2015        if !need_resize_flip_rotation && Self::support_conversion(src.fourcc(), dst.fourcc()) {
2016            return Self::convert_format(src, dst);
2017        };
2018
2019        // any extra checks
2020        if dst.fourcc() == YUYV && !dst.width().is_multiple_of(2) {
2021            return Err(Error::NotSupported(format!(
2022                "{} destination must have width divisible by 2",
2023                dst.fourcc().display(),
2024            )));
2025        }
2026
2027        // create tmp buffer
2028        let mut tmp_buffer;
2029        let tmp;
2030        if intermediate != src.fourcc() {
2031            tmp_buffer = TensorImage::new(
2032                src.width(),
2033                src.height(),
2034                intermediate,
2035                Some(edgefirst_tensor::TensorMemory::Mem),
2036            )?;
2037
2038            Self::convert_format(src, &mut tmp_buffer)?;
2039            tmp = &tmp_buffer;
2040        } else {
2041            tmp = src;
2042        }
2043
2044        // format must be RGB/RGBA/GREY
2045        matches!(tmp.fourcc(), RGB | RGBA | GREY);
2046        if tmp.fourcc() == dst.fourcc() {
2047            self.resize_flip_rotate(tmp, dst, rotation, flip, crop)?;
2048        } else if !need_resize_flip_rotation {
2049            Self::convert_format(tmp, dst)?;
2050        } else {
2051            let mut tmp2 = TensorImage::new(
2052                dst.width(),
2053                dst.height(),
2054                tmp.fourcc(),
2055                Some(edgefirst_tensor::TensorMemory::Mem),
2056            )?;
2057            if crop.dst_rect.is_some_and(|crop| {
2058                crop != Rect {
2059                    left: 0,
2060                    top: 0,
2061                    width: dst.width(),
2062                    height: dst.height(),
2063                }
2064            }) && crop.dst_color.is_none()
2065            {
2066                // convert the dst into tmp2 when there is a dst crop
2067                // TODO: this could be optimized by changing convert_format to take a
2068                // destination crop?
2069
2070                Self::convert_format(dst, &mut tmp2)?;
2071            }
2072            self.resize_flip_rotate(tmp, &mut tmp2, rotation, flip, crop)?;
2073            Self::convert_format(&tmp2, dst)?;
2074        }
2075        if let (Some(dst_rect), Some(dst_color)) = (crop.dst_rect, crop.dst_color) {
2076            let full_rect = Rect {
2077                left: 0,
2078                top: 0,
2079                width: dst.width(),
2080                height: dst.height(),
2081            };
2082            if dst_rect != full_rect {
2083                Self::fill_image_outside_crop(dst, dst_color, dst_rect)?;
2084            }
2085        }
2086
2087        Ok(())
2088    }
2089
2090    fn convert_ref(
2091        &mut self,
2092        src: &TensorImage,
2093        dst: &mut TensorImageRef<'_>,
2094        rotation: Rotation,
2095        flip: Flip,
2096        crop: Crop,
2097    ) -> Result<()> {
2098        crop.check_crop_ref(src, dst)?;
2099
2100        // Determine intermediate format needed for conversion
2101        let intermediate = match (src.fourcc(), dst.fourcc()) {
2102            (NV12, RGB) => RGB,
2103            (NV12, RGBA) => RGBA,
2104            (NV12, GREY) => GREY,
2105            (NV12, PLANAR_RGB) => RGB,
2106            (NV12, PLANAR_RGBA) => RGBA,
2107            (YUYV, RGB) => RGB,
2108            (YUYV, RGBA) => RGBA,
2109            (YUYV, GREY) => GREY,
2110            (YUYV, PLANAR_RGB) => RGB,
2111            (YUYV, PLANAR_RGBA) => RGBA,
2112            (VYUY, RGB) => RGB,
2113            (VYUY, RGBA) => RGBA,
2114            (VYUY, GREY) => GREY,
2115            (VYUY, PLANAR_RGB) => RGB,
2116            (VYUY, PLANAR_RGBA) => RGBA,
2117            (RGBA, RGB) => RGBA,
2118            (RGBA, RGBA) => RGBA,
2119            (RGBA, GREY) => GREY,
2120            (RGBA, PLANAR_RGB) => RGBA,
2121            (RGBA, PLANAR_RGBA) => RGBA,
2122            (RGB, RGB) => RGB,
2123            (RGB, RGBA) => RGB,
2124            (RGB, GREY) => GREY,
2125            (RGB, PLANAR_RGB) => RGB,
2126            (RGB, PLANAR_RGBA) => RGB,
2127            (GREY, RGB) => RGB,
2128            (GREY, RGBA) => RGBA,
2129            (GREY, GREY) => GREY,
2130            (GREY, PLANAR_RGB) => GREY,
2131            (GREY, PLANAR_RGBA) => GREY,
2132            (s, d) => {
2133                return Err(Error::NotSupported(format!(
2134                    "Conversion from {} to {}",
2135                    s.display(),
2136                    d.display()
2137                )));
2138            }
2139        };
2140
2141        let need_resize_flip_rotation = rotation != Rotation::None
2142            || flip != Flip::None
2143            || src.width() != dst.width()
2144            || src.height() != dst.height()
2145            || crop.src_rect.is_some_and(|crop| {
2146                crop != Rect {
2147                    left: 0,
2148                    top: 0,
2149                    width: src.width(),
2150                    height: src.height(),
2151                }
2152            })
2153            || crop.dst_rect.is_some_and(|crop| {
2154                crop != Rect {
2155                    left: 0,
2156                    top: 0,
2157                    width: dst.width(),
2158                    height: dst.height(),
2159                }
2160            });
2161
2162        // Simple case: no resize/flip/rotation needed
2163        if !need_resize_flip_rotation {
2164            // Try direct generic conversion (zero-copy path)
2165            if let Ok(()) = Self::convert_format_generic(src, dst) {
2166                return Ok(());
2167            }
2168        }
2169
2170        // Complex case: need intermediate buffers
2171        // First, convert source to intermediate format if needed
2172        let mut tmp_buffer;
2173        let tmp: &TensorImage;
2174        if intermediate != src.fourcc() {
2175            tmp_buffer = TensorImage::new(
2176                src.width(),
2177                src.height(),
2178                intermediate,
2179                Some(edgefirst_tensor::TensorMemory::Mem),
2180            )?;
2181            Self::convert_format(src, &mut tmp_buffer)?;
2182            tmp = &tmp_buffer;
2183        } else {
2184            tmp = src;
2185        }
2186
2187        // Process resize/flip/rotation if needed
2188        if need_resize_flip_rotation {
2189            // Create intermediate buffer for resize output
2190            let mut tmp2 = TensorImage::new(
2191                dst.width(),
2192                dst.height(),
2193                tmp.fourcc(),
2194                Some(edgefirst_tensor::TensorMemory::Mem),
2195            )?;
2196            self.resize_flip_rotate(tmp, &mut tmp2, rotation, flip, crop)?;
2197
2198            // Final conversion to destination (zero-copy into dst)
2199            Self::convert_format_generic(&tmp2, dst)?;
2200        } else {
2201            // Direct conversion (already checked above, but handle edge cases)
2202            Self::convert_format_generic(tmp, dst)?;
2203        }
2204
2205        // Handle destination crop fill if needed
2206        if let (Some(dst_rect), Some(dst_color)) = (crop.dst_rect, crop.dst_color) {
2207            let full_rect = Rect {
2208                left: 0,
2209                top: 0,
2210                width: dst.width(),
2211                height: dst.height(),
2212            };
2213            if dst_rect != full_rect {
2214                Self::fill_image_outside_crop_generic(dst, dst_color, dst_rect)?;
2215            }
2216        }
2217
2218        Ok(())
2219    }
2220
2221    fn draw_masks(
2222        &mut self,
2223        dst: &mut TensorImage,
2224        detect: &[DetectBox],
2225        segmentation: &[Segmentation],
2226    ) -> Result<()> {
2227        if !matches!(dst.fourcc(), RGBA | RGB) {
2228            return Err(crate::Error::NotSupported(
2229                "CPU image rendering only supports RGBA or RGB images".to_string(),
2230            ));
2231        }
2232
2233        let _timer = FunctionTimer::new("CPUProcessor::draw_masks");
2234
2235        let mut map = dst.tensor.map()?;
2236        let dst_slice = map.as_mut_slice();
2237
2238        self.render_box(dst, dst_slice, detect)?;
2239
2240        if segmentation.is_empty() {
2241            return Ok(());
2242        }
2243
2244        // Semantic segmentation (e.g. ModelPack) has C > 1 (multi-class),
2245        // instance segmentation (e.g. YOLO) has C = 1 (binary per-instance).
2246        let is_semantic = segmentation[0].segmentation.shape()[2] > 1;
2247
2248        if is_semantic {
2249            self.render_modelpack_segmentation(dst, dst_slice, &segmentation[0])?;
2250        } else {
2251            for (seg, detect) in segmentation.iter().zip(detect) {
2252                self.render_yolo_segmentation(dst, dst_slice, seg, detect.label)?;
2253            }
2254        }
2255
2256        Ok(())
2257    }
2258
2259    fn draw_masks_proto(
2260        &mut self,
2261        dst: &mut TensorImage,
2262        detect: &[DetectBox],
2263        proto_data: &ProtoData,
2264    ) -> Result<()> {
2265        if !matches!(dst.fourcc(), RGBA | RGB) {
2266            return Err(crate::Error::NotSupported(
2267                "CPU image rendering only supports RGBA or RGB images".to_string(),
2268            ));
2269        }
2270
2271        let _timer = FunctionTimer::new("CPUProcessor::draw_masks_proto");
2272
2273        let mut map = dst.tensor.map()?;
2274        let dst_slice = map.as_mut_slice();
2275
2276        self.render_box(dst, dst_slice, detect)?;
2277
2278        if detect.is_empty() || proto_data.mask_coefficients.is_empty() {
2279            return Ok(());
2280        }
2281
2282        let protos_cow = proto_data.protos.as_f32();
2283        let protos = protos_cow.as_ref();
2284        let proto_h = protos.shape()[0];
2285        let proto_w = protos.shape()[1];
2286        let num_protos = protos.shape()[2];
2287        let dst_w = dst.width();
2288        let dst_h = dst.height();
2289        let row_stride = dst.row_stride();
2290        let channels = dst.channels();
2291
2292        for (det, coeff) in detect.iter().zip(proto_data.mask_coefficients.iter()) {
2293            let color = self.colors[det.label % self.colors.len()];
2294            let alpha = color[3] as u16;
2295
2296            // Pixel bounds of the detection in dst image space
2297            let start_x = (dst_w as f32 * det.bbox.xmin).round() as usize;
2298            let start_y = (dst_h as f32 * det.bbox.ymin).round() as usize;
2299            let end_x = ((dst_w as f32 * det.bbox.xmax).round() as usize).min(dst_w);
2300            let end_y = ((dst_h as f32 * det.bbox.ymax).round() as usize).min(dst_h);
2301
2302            for y in start_y..end_y {
2303                for x in start_x..end_x {
2304                    // Map pixel (x, y) to proto space
2305                    let px = (x as f32 / dst_w as f32) * proto_w as f32 - 0.5;
2306                    let py = (y as f32 / dst_h as f32) * proto_h as f32 - 0.5;
2307
2308                    // Bilinear interpolation + dot product
2309                    let acc = bilinear_dot(protos, coeff, num_protos, px, py, proto_w, proto_h);
2310
2311                    // Sigmoid threshold
2312                    let mask = 1.0 / (1.0 + (-acc).exp());
2313                    if mask < 0.5 {
2314                        continue;
2315                    }
2316
2317                    // Alpha blend
2318                    let dst_index = y * row_stride + x * channels;
2319                    for c in 0..3 {
2320                        dst_slice[dst_index + c] = ((color[c] as u16 * alpha
2321                            + dst_slice[dst_index + c] as u16 * (255 - alpha))
2322                            / 255) as u8;
2323                    }
2324                }
2325            }
2326        }
2327
2328        Ok(())
2329    }
2330
2331    fn decode_masks_atlas(
2332        &mut self,
2333        detect: &[crate::DetectBox],
2334        proto_data: crate::ProtoData,
2335        output_width: usize,
2336        output_height: usize,
2337    ) -> Result<(Vec<u8>, Vec<crate::MaskRegion>)> {
2338        use crate::FunctionTimer;
2339
2340        let _timer = FunctionTimer::new("CPUProcessor::decode_masks_atlas");
2341
2342        let padding = 4usize;
2343
2344        // Render per-detection masks via existing path
2345        let mask_results =
2346            self.render_masks_from_protos(detect, proto_data, output_width, output_height)?;
2347
2348        // Pack into compact atlas: each strip is padded bbox height
2349        let ow = output_width as i32;
2350        let oh = output_height as i32;
2351        let pad = padding as i32;
2352
2353        let mut regions = Vec::with_capacity(mask_results.len());
2354        let mut atlas_y = 0usize;
2355
2356        // Pre-compute regions
2357        for mr in &mask_results {
2358            let bx = mr.x as i32;
2359            let by = mr.y as i32;
2360            let bw = mr.w as i32;
2361            let bh = mr.h as i32;
2362            let padded_x = (bx - pad).max(0);
2363            let padded_y = (by - pad).max(0);
2364            let padded_w = ((bx + bw + pad).min(ow) - padded_x).max(1);
2365            let padded_h = ((by + bh + pad).min(oh) - padded_y).max(1);
2366            regions.push(crate::MaskRegion {
2367                atlas_y_offset: atlas_y,
2368                padded_x: padded_x as usize,
2369                padded_y: padded_y as usize,
2370                padded_w: padded_w as usize,
2371                padded_h: padded_h as usize,
2372                bbox_x: mr.x,
2373                bbox_y: mr.y,
2374                bbox_w: mr.w,
2375                bbox_h: mr.h,
2376            });
2377            atlas_y += padded_h as usize;
2378        }
2379
2380        let atlas_height = atlas_y;
2381        let mut atlas = vec![0u8; output_width * atlas_height];
2382
2383        for (mr, region) in mask_results.iter().zip(regions.iter()) {
2384            // Copy mask pixels into the atlas at the correct position
2385            for row in 0..mr.h {
2386                let dst_row = region.atlas_y_offset + (mr.y - region.padded_y) + row;
2387                let dst_start = dst_row * output_width + mr.x;
2388                let src_start = row * mr.w;
2389                if dst_start + mr.w <= atlas.len() && src_start + mr.w <= mr.pixels.len() {
2390                    atlas[dst_start..dst_start + mr.w]
2391                        .copy_from_slice(&mr.pixels[src_start..src_start + mr.w]);
2392                }
2393            }
2394        }
2395
2396        Ok((atlas, regions))
2397    }
2398
2399    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()> {
2400        for (c, new_c) in self.colors.iter_mut().zip(colors.iter()) {
2401            *c = *new_c;
2402        }
2403        Ok(())
2404    }
2405}
2406
2407/// Bilinear interpolation of proto values at `(px, py)` combined with dot
2408/// product against `coeff`. Returns the scalar accumulator before sigmoid.
2409///
2410/// Samples the four nearest proto texels, weights by bilinear coefficients,
2411/// and simultaneously computes the dot product with the mask coefficients.
2412#[inline]
2413fn bilinear_dot(
2414    protos: &ndarray::Array3<f32>,
2415    coeff: &[f32],
2416    num_protos: usize,
2417    px: f32,
2418    py: f32,
2419    proto_w: usize,
2420    proto_h: usize,
2421) -> f32 {
2422    let x0 = (px.floor() as isize).clamp(0, proto_w as isize - 1) as usize;
2423    let y0 = (py.floor() as isize).clamp(0, proto_h as isize - 1) as usize;
2424    let x1 = (x0 + 1).min(proto_w - 1);
2425    let y1 = (y0 + 1).min(proto_h - 1);
2426
2427    let fx = px - px.floor();
2428    let fy = py - py.floor();
2429
2430    let w00 = (1.0 - fx) * (1.0 - fy);
2431    let w10 = fx * (1.0 - fy);
2432    let w01 = (1.0 - fx) * fy;
2433    let w11 = fx * fy;
2434
2435    let mut acc = 0.0f32;
2436    for p in 0..num_protos {
2437        let val = w00 * protos[[y0, x0, p]]
2438            + w10 * protos[[y0, x1, p]]
2439            + w01 * protos[[y1, x0, p]]
2440            + w11 * protos[[y1, x1, p]];
2441        acc += coeff[p] * val;
2442    }
2443    acc
2444}
2445
2446#[cfg(test)]
2447#[cfg_attr(coverage_nightly, coverage(off))]
2448mod cpu_tests {
2449
2450    use super::*;
2451    use crate::{CPUProcessor, Rotation, TensorImageRef, RGBA};
2452    use edgefirst_tensor::{Tensor, TensorMapTrait, TensorMemory};
2453    use image::buffer::ConvertBuffer;
2454
2455    macro_rules! function {
2456        () => {{
2457            fn f() {}
2458            fn type_name_of<T>(_: T) -> &'static str {
2459                std::any::type_name::<T>()
2460            }
2461            let name = type_name_of(f);
2462
2463            // Find and cut the rest of the path
2464            match &name[..name.len() - 3].rfind(':') {
2465                Some(pos) => &name[pos + 1..name.len() - 3],
2466                None => &name[..name.len() - 3],
2467            }
2468        }};
2469    }
2470
2471    fn compare_images_convert_to_grey(
2472        img1: &TensorImage,
2473        img2: &TensorImage,
2474        threshold: f64,
2475        name: &str,
2476    ) {
2477        assert_eq!(img1.height(), img2.height(), "Heights differ");
2478        assert_eq!(img1.width(), img2.width(), "Widths differ");
2479
2480        let mut img_rgb1 = TensorImage::new(img1.width(), img1.height(), RGBA, None).unwrap();
2481        let mut img_rgb2 = TensorImage::new(img1.width(), img1.height(), RGBA, None).unwrap();
2482        CPUProcessor::convert_format(img1, &mut img_rgb1).unwrap();
2483        CPUProcessor::convert_format(img2, &mut img_rgb2).unwrap();
2484
2485        let image1 = image::RgbaImage::from_vec(
2486            img_rgb1.width() as u32,
2487            img_rgb1.height() as u32,
2488            img_rgb1.tensor().map().unwrap().to_vec(),
2489        )
2490        .unwrap();
2491
2492        let image2 = image::RgbaImage::from_vec(
2493            img_rgb2.width() as u32,
2494            img_rgb2.height() as u32,
2495            img_rgb2.tensor().map().unwrap().to_vec(),
2496        )
2497        .unwrap();
2498
2499        let similarity = image_compare::gray_similarity_structure(
2500            &image_compare::Algorithm::RootMeanSquared,
2501            &image1.convert(),
2502            &image2.convert(),
2503        )
2504        .expect("Image Comparison failed");
2505        if similarity.score < threshold {
2506            // image1.save(format!("{name}_1.png"));
2507            // image2.save(format!("{name}_2.png"));
2508            similarity
2509                .image
2510                .to_color_map()
2511                .save(format!("{name}.png"))
2512                .unwrap();
2513            panic!(
2514                "{name}: converted image and target image have similarity score too low: {} < {}",
2515                similarity.score, threshold
2516            )
2517        }
2518    }
2519
2520    fn compare_images_convert_to_rgb(
2521        img1: &TensorImage,
2522        img2: &TensorImage,
2523        threshold: f64,
2524        name: &str,
2525    ) {
2526        assert_eq!(img1.height(), img2.height(), "Heights differ");
2527        assert_eq!(img1.width(), img2.width(), "Widths differ");
2528
2529        let mut img_rgb1 = TensorImage::new(img1.width(), img1.height(), RGB, None).unwrap();
2530        let mut img_rgb2 = TensorImage::new(img1.width(), img1.height(), RGB, None).unwrap();
2531        CPUProcessor::convert_format(img1, &mut img_rgb1).unwrap();
2532        CPUProcessor::convert_format(img2, &mut img_rgb2).unwrap();
2533
2534        let image1 = image::RgbImage::from_vec(
2535            img_rgb1.width() as u32,
2536            img_rgb1.height() as u32,
2537            img_rgb1.tensor().map().unwrap().to_vec(),
2538        )
2539        .unwrap();
2540
2541        let image2 = image::RgbImage::from_vec(
2542            img_rgb2.width() as u32,
2543            img_rgb2.height() as u32,
2544            img_rgb2.tensor().map().unwrap().to_vec(),
2545        )
2546        .unwrap();
2547
2548        let similarity = image_compare::rgb_similarity_structure(
2549            &image_compare::Algorithm::RootMeanSquared,
2550            &image1,
2551            &image2,
2552        )
2553        .expect("Image Comparison failed");
2554        if similarity.score < threshold {
2555            // image1.save(format!("{name}_1.png"));
2556            // image2.save(format!("{name}_2.png"));
2557            similarity
2558                .image
2559                .to_color_map()
2560                .save(format!("{name}.png"))
2561                .unwrap();
2562            panic!(
2563                "{name}: converted image and target image have similarity score too low: {} < {}",
2564                similarity.score, threshold
2565            )
2566        }
2567    }
2568
2569    fn load_bytes_to_tensor(
2570        width: usize,
2571        height: usize,
2572        fourcc: FourCharCode,
2573        memory: Option<TensorMemory>,
2574        bytes: &[u8],
2575    ) -> Result<TensorImage, Error> {
2576        log::debug!("Current function is {}", function!());
2577        let src = TensorImage::new(width, height, fourcc, memory)?;
2578        src.tensor().map()?.as_mut_slice()[0..bytes.len()].copy_from_slice(bytes);
2579        Ok(src)
2580    }
2581
2582    macro_rules! generate_conversion_tests {
2583        (
2584        $src_fmt:ident,  $src_file:expr, $dst_fmt:ident, $dst_file:expr
2585    ) => {{
2586            // Load source
2587            let src = load_bytes_to_tensor(
2588                1280,
2589                720,
2590                $src_fmt,
2591                None,
2592                include_bytes!(concat!("../../../testdata/", $src_file)),
2593            )?;
2594
2595            // Load destination reference
2596            let dst = load_bytes_to_tensor(
2597                1280,
2598                720,
2599                $dst_fmt,
2600                None,
2601                include_bytes!(concat!("../../../testdata/", $dst_file)),
2602            )?;
2603
2604            let mut converter = CPUProcessor::default();
2605
2606            let mut converted = TensorImage::new(src.width(), src.height(), dst.fourcc(), None)?;
2607
2608            converter.convert(
2609                &src,
2610                &mut converted,
2611                Rotation::None,
2612                Flip::None,
2613                Crop::default(),
2614            )?;
2615
2616            compare_images_convert_to_rgb(&dst, &converted, 0.99, function!());
2617
2618            Ok(())
2619        }};
2620    }
2621
2622    macro_rules! generate_conversion_tests_greyscale {
2623        (
2624        $src_fmt:ident,  $src_file:expr, $dst_fmt:ident, $dst_file:expr
2625    ) => {{
2626            // Load source
2627            let src = load_bytes_to_tensor(
2628                1280,
2629                720,
2630                $src_fmt,
2631                None,
2632                include_bytes!(concat!("../../../testdata/", $src_file)),
2633            )?;
2634
2635            // Load destination reference
2636            let dst = load_bytes_to_tensor(
2637                1280,
2638                720,
2639                $dst_fmt,
2640                None,
2641                include_bytes!(concat!("../../../testdata/", $dst_file)),
2642            )?;
2643
2644            let mut converter = CPUProcessor::default();
2645
2646            let mut converted = TensorImage::new(src.width(), src.height(), dst.fourcc(), None)?;
2647
2648            converter.convert(
2649                &src,
2650                &mut converted,
2651                Rotation::None,
2652                Flip::None,
2653                Crop::default(),
2654            )?;
2655
2656            compare_images_convert_to_grey(&dst, &converted, 0.985, function!());
2657
2658            Ok(())
2659        }};
2660    }
2661
2662    // let mut dsts = [yuyv, rgb, rgba, grey, nv16, planar_rgb, planar_rgba];
2663
2664    #[test]
2665    fn test_cpu_yuyv_to_yuyv() -> Result<()> {
2666        generate_conversion_tests!(YUYV, "camera720p.yuyv", YUYV, "camera720p.yuyv")
2667    }
2668
2669    #[test]
2670    fn test_cpu_yuyv_to_rgb() -> Result<()> {
2671        generate_conversion_tests!(YUYV, "camera720p.yuyv", RGB, "camera720p.rgb")
2672    }
2673
2674    #[test]
2675    fn test_cpu_yuyv_to_rgba() -> Result<()> {
2676        generate_conversion_tests!(YUYV, "camera720p.yuyv", RGBA, "camera720p.rgba")
2677    }
2678
2679    #[test]
2680    fn test_cpu_yuyv_to_grey() -> Result<()> {
2681        generate_conversion_tests!(YUYV, "camera720p.yuyv", GREY, "camera720p.y800")
2682    }
2683
2684    #[test]
2685    fn test_cpu_yuyv_to_nv16() -> Result<()> {
2686        generate_conversion_tests!(YUYV, "camera720p.yuyv", NV16, "camera720p.nv16")
2687    }
2688
2689    #[test]
2690    fn test_cpu_yuyv_to_planar_rgb() -> Result<()> {
2691        generate_conversion_tests!(YUYV, "camera720p.yuyv", PLANAR_RGB, "camera720p.8bps")
2692    }
2693
2694    #[test]
2695    fn test_cpu_yuyv_to_planar_rgba() -> Result<()> {
2696        generate_conversion_tests!(YUYV, "camera720p.yuyv", PLANAR_RGBA, "camera720p.8bpa")
2697    }
2698
2699    #[test]
2700    fn test_cpu_rgb_to_yuyv() -> Result<()> {
2701        generate_conversion_tests!(RGB, "camera720p.rgb", YUYV, "camera720p.yuyv")
2702    }
2703
2704    #[test]
2705    fn test_cpu_rgb_to_rgb() -> Result<()> {
2706        generate_conversion_tests!(RGB, "camera720p.rgb", RGB, "camera720p.rgb")
2707    }
2708
2709    #[test]
2710    fn test_cpu_rgb_to_rgba() -> Result<()> {
2711        generate_conversion_tests!(RGB, "camera720p.rgb", RGBA, "camera720p.rgba")
2712    }
2713
2714    #[test]
2715    fn test_cpu_rgb_to_grey() -> Result<()> {
2716        generate_conversion_tests!(RGB, "camera720p.rgb", GREY, "camera720p.y800")
2717    }
2718
2719    #[test]
2720    fn test_cpu_rgb_to_nv16() -> Result<()> {
2721        generate_conversion_tests!(RGB, "camera720p.rgb", NV16, "camera720p.nv16")
2722    }
2723
2724    #[test]
2725    fn test_cpu_rgb_to_planar_rgb() -> Result<()> {
2726        generate_conversion_tests!(RGB, "camera720p.rgb", PLANAR_RGB, "camera720p.8bps")
2727    }
2728
2729    #[test]
2730    fn test_cpu_rgb_to_planar_rgba() -> Result<()> {
2731        generate_conversion_tests!(RGB, "camera720p.rgb", PLANAR_RGBA, "camera720p.8bpa")
2732    }
2733
2734    #[test]
2735    fn test_cpu_rgba_to_yuyv() -> Result<()> {
2736        generate_conversion_tests!(RGBA, "camera720p.rgba", YUYV, "camera720p.yuyv")
2737    }
2738
2739    #[test]
2740    fn test_cpu_rgba_to_rgb() -> Result<()> {
2741        generate_conversion_tests!(RGBA, "camera720p.rgba", RGB, "camera720p.rgb")
2742    }
2743
2744    #[test]
2745    fn test_cpu_rgba_to_rgba() -> Result<()> {
2746        generate_conversion_tests!(RGBA, "camera720p.rgba", RGBA, "camera720p.rgba")
2747    }
2748
2749    #[test]
2750    fn test_cpu_rgba_to_grey() -> Result<()> {
2751        generate_conversion_tests!(RGBA, "camera720p.rgba", GREY, "camera720p.y800")
2752    }
2753
2754    #[test]
2755    fn test_cpu_rgba_to_nv16() -> Result<()> {
2756        generate_conversion_tests!(RGBA, "camera720p.rgba", NV16, "camera720p.nv16")
2757    }
2758
2759    #[test]
2760    fn test_cpu_rgba_to_planar_rgb() -> Result<()> {
2761        generate_conversion_tests!(RGBA, "camera720p.rgba", PLANAR_RGB, "camera720p.8bps")
2762    }
2763
2764    #[test]
2765    fn test_cpu_rgba_to_planar_rgba() -> Result<()> {
2766        generate_conversion_tests!(RGBA, "camera720p.rgba", PLANAR_RGBA, "camera720p.8bpa")
2767    }
2768
2769    #[test]
2770    fn test_cpu_nv12_to_rgb() -> Result<()> {
2771        generate_conversion_tests!(NV12, "camera720p.nv12", RGB, "camera720p.rgb")
2772    }
2773
2774    #[test]
2775    fn test_cpu_nv12_to_yuyv() -> Result<()> {
2776        generate_conversion_tests!(NV12, "camera720p.nv12", YUYV, "camera720p.yuyv")
2777    }
2778
2779    #[test]
2780    fn test_cpu_nv12_to_rgba() -> Result<()> {
2781        generate_conversion_tests!(NV12, "camera720p.nv12", RGBA, "camera720p.rgba")
2782    }
2783
2784    #[test]
2785    fn test_cpu_nv12_to_grey() -> Result<()> {
2786        generate_conversion_tests!(NV12, "camera720p.nv12", GREY, "camera720p.y800")
2787    }
2788
2789    #[test]
2790    fn test_cpu_nv12_to_nv16() -> Result<()> {
2791        generate_conversion_tests!(NV12, "camera720p.nv12", NV16, "camera720p.nv16")
2792    }
2793
2794    #[test]
2795    fn test_cpu_nv12_to_planar_rgb() -> Result<()> {
2796        generate_conversion_tests!(NV12, "camera720p.nv12", PLANAR_RGB, "camera720p.8bps")
2797    }
2798
2799    #[test]
2800    fn test_cpu_nv12_to_planar_rgba() -> Result<()> {
2801        generate_conversion_tests!(NV12, "camera720p.nv12", PLANAR_RGBA, "camera720p.8bpa")
2802    }
2803
2804    #[test]
2805    fn test_cpu_grey_to_yuyv() -> Result<()> {
2806        generate_conversion_tests_greyscale!(GREY, "camera720p.y800", YUYV, "camera720p.yuyv")
2807    }
2808
2809    #[test]
2810    fn test_cpu_grey_to_rgb() -> Result<()> {
2811        generate_conversion_tests_greyscale!(GREY, "camera720p.y800", RGB, "camera720p.rgb")
2812    }
2813
2814    #[test]
2815    fn test_cpu_grey_to_rgba() -> Result<()> {
2816        generate_conversion_tests_greyscale!(GREY, "camera720p.y800", RGBA, "camera720p.rgba")
2817    }
2818
2819    #[test]
2820    fn test_cpu_grey_to_grey() -> Result<()> {
2821        generate_conversion_tests_greyscale!(GREY, "camera720p.y800", GREY, "camera720p.y800")
2822    }
2823
2824    #[test]
2825    fn test_cpu_grey_to_nv16() -> Result<()> {
2826        generate_conversion_tests_greyscale!(GREY, "camera720p.y800", NV16, "camera720p.nv16")
2827    }
2828
2829    #[test]
2830    fn test_cpu_grey_to_planar_rgb() -> Result<()> {
2831        generate_conversion_tests_greyscale!(GREY, "camera720p.y800", PLANAR_RGB, "camera720p.8bps")
2832    }
2833
2834    #[test]
2835    fn test_cpu_grey_to_planar_rgba() -> Result<()> {
2836        generate_conversion_tests_greyscale!(
2837            GREY,
2838            "camera720p.y800",
2839            PLANAR_RGBA,
2840            "camera720p.8bpa"
2841        )
2842    }
2843
2844    #[test]
2845    fn test_cpu_nearest() -> Result<()> {
2846        // Load source
2847        let src = load_bytes_to_tensor(2, 1, RGB, None, &[0, 0, 0, 255, 255, 255])?;
2848
2849        let mut converter = CPUProcessor::new_nearest();
2850
2851        let mut converted = TensorImage::new(4, 1, RGB, None)?;
2852
2853        converter.convert(
2854            &src,
2855            &mut converted,
2856            Rotation::None,
2857            Flip::None,
2858            Crop::default(),
2859        )?;
2860
2861        assert_eq!(
2862            &converted.tensor().map()?.as_slice(),
2863            &[0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255]
2864        );
2865
2866        Ok(())
2867    }
2868
2869    #[test]
2870    fn test_cpu_rotate_cw() -> Result<()> {
2871        // Load source
2872        let src = load_bytes_to_tensor(
2873            2,
2874            2,
2875            RGBA,
2876            None,
2877            &[0, 0, 0, 255, 1, 1, 1, 255, 2, 2, 2, 255, 3, 3, 3, 255],
2878        )?;
2879
2880        let mut converter = CPUProcessor::default();
2881
2882        let mut converted = TensorImage::new(4, 4, RGBA, None)?;
2883
2884        converter.convert(
2885            &src,
2886            &mut converted,
2887            Rotation::Clockwise90,
2888            Flip::None,
2889            Crop::default(),
2890        )?;
2891
2892        assert_eq!(&converted.tensor().map()?.as_slice()[0..4], &[2, 2, 2, 255]);
2893        assert_eq!(
2894            &converted.tensor().map()?.as_slice()[12..16],
2895            &[0, 0, 0, 255]
2896        );
2897        assert_eq!(
2898            &converted.tensor().map()?.as_slice()[48..52],
2899            &[3, 3, 3, 255]
2900        );
2901
2902        assert_eq!(
2903            &converted.tensor().map()?.as_slice()[60..64],
2904            &[1, 1, 1, 255]
2905        );
2906
2907        Ok(())
2908    }
2909
2910    #[test]
2911    fn test_cpu_rotate_ccw() -> Result<()> {
2912        // Load source
2913        let src = load_bytes_to_tensor(
2914            2,
2915            2,
2916            RGBA,
2917            None,
2918            &[0, 0, 0, 255, 1, 1, 1, 255, 2, 2, 2, 255, 3, 3, 3, 255],
2919        )?;
2920
2921        let mut converter = CPUProcessor::default();
2922
2923        let mut converted = TensorImage::new(4, 4, RGBA, None)?;
2924
2925        converter.convert(
2926            &src,
2927            &mut converted,
2928            Rotation::CounterClockwise90,
2929            Flip::None,
2930            Crop::default(),
2931        )?;
2932
2933        assert_eq!(&converted.tensor().map()?.as_slice()[0..4], &[1, 1, 1, 255]);
2934        assert_eq!(
2935            &converted.tensor().map()?.as_slice()[12..16],
2936            &[3, 3, 3, 255]
2937        );
2938        assert_eq!(
2939            &converted.tensor().map()?.as_slice()[48..52],
2940            &[0, 0, 0, 255]
2941        );
2942
2943        assert_eq!(
2944            &converted.tensor().map()?.as_slice()[60..64],
2945            &[2, 2, 2, 255]
2946        );
2947
2948        Ok(())
2949    }
2950
2951    #[test]
2952    fn test_cpu_rotate_180() -> Result<()> {
2953        // Load source
2954        let src = load_bytes_to_tensor(
2955            2,
2956            2,
2957            RGBA,
2958            None,
2959            &[0, 0, 0, 255, 1, 1, 1, 255, 2, 2, 2, 255, 3, 3, 3, 255],
2960        )?;
2961
2962        let mut converter = CPUProcessor::default();
2963
2964        let mut converted = TensorImage::new(4, 4, RGBA, None)?;
2965
2966        converter.convert(
2967            &src,
2968            &mut converted,
2969            Rotation::Rotate180,
2970            Flip::None,
2971            Crop::default(),
2972        )?;
2973
2974        assert_eq!(&converted.tensor().map()?.as_slice()[0..4], &[3, 3, 3, 255]);
2975        assert_eq!(
2976            &converted.tensor().map()?.as_slice()[12..16],
2977            &[2, 2, 2, 255]
2978        );
2979        assert_eq!(
2980            &converted.tensor().map()?.as_slice()[48..52],
2981            &[1, 1, 1, 255]
2982        );
2983
2984        assert_eq!(
2985            &converted.tensor().map()?.as_slice()[60..64],
2986            &[0, 0, 0, 255]
2987        );
2988
2989        Ok(())
2990    }
2991
2992    #[test]
2993    fn test_cpu_flip_v() -> Result<()> {
2994        // Load source
2995        let src = load_bytes_to_tensor(
2996            2,
2997            2,
2998            RGBA,
2999            None,
3000            &[0, 0, 0, 255, 1, 1, 1, 255, 2, 2, 2, 255, 3, 3, 3, 255],
3001        )?;
3002
3003        let mut converter = CPUProcessor::default();
3004
3005        let mut converted = TensorImage::new(4, 4, RGBA, None)?;
3006
3007        converter.convert(
3008            &src,
3009            &mut converted,
3010            Rotation::None,
3011            Flip::Vertical,
3012            Crop::default(),
3013        )?;
3014
3015        assert_eq!(&converted.tensor().map()?.as_slice()[0..4], &[2, 2, 2, 255]);
3016        assert_eq!(
3017            &converted.tensor().map()?.as_slice()[12..16],
3018            &[3, 3, 3, 255]
3019        );
3020        assert_eq!(
3021            &converted.tensor().map()?.as_slice()[48..52],
3022            &[0, 0, 0, 255]
3023        );
3024
3025        assert_eq!(
3026            &converted.tensor().map()?.as_slice()[60..64],
3027            &[1, 1, 1, 255]
3028        );
3029
3030        Ok(())
3031    }
3032
3033    #[test]
3034    fn test_cpu_flip_h() -> Result<()> {
3035        // Load source
3036        let src = load_bytes_to_tensor(
3037            2,
3038            2,
3039            RGBA,
3040            None,
3041            &[0, 0, 0, 255, 1, 1, 1, 255, 2, 2, 2, 255, 3, 3, 3, 255],
3042        )?;
3043
3044        let mut converter = CPUProcessor::default();
3045
3046        let mut converted = TensorImage::new(4, 4, RGBA, None)?;
3047
3048        converter.convert(
3049            &src,
3050            &mut converted,
3051            Rotation::None,
3052            Flip::Horizontal,
3053            Crop::default(),
3054        )?;
3055
3056        assert_eq!(&converted.tensor().map()?.as_slice()[0..4], &[1, 1, 1, 255]);
3057        assert_eq!(
3058            &converted.tensor().map()?.as_slice()[12..16],
3059            &[0, 0, 0, 255]
3060        );
3061        assert_eq!(
3062            &converted.tensor().map()?.as_slice()[48..52],
3063            &[3, 3, 3, 255]
3064        );
3065
3066        assert_eq!(
3067            &converted.tensor().map()?.as_slice()[60..64],
3068            &[2, 2, 2, 255]
3069        );
3070
3071        Ok(())
3072    }
3073
3074    #[test]
3075    fn test_cpu_src_crop() -> Result<()> {
3076        // Load source
3077        let src = load_bytes_to_tensor(2, 2, GREY, None, &[10, 20, 30, 40])?;
3078
3079        let mut converter = CPUProcessor::default();
3080
3081        let mut converted = TensorImage::new(2, 2, RGBA, None)?;
3082
3083        converter.convert(
3084            &src,
3085            &mut converted,
3086            Rotation::None,
3087            Flip::None,
3088            Crop::new().with_src_rect(Some(Rect::new(0, 0, 1, 2))),
3089        )?;
3090
3091        assert_eq!(
3092            converted.tensor().map()?.as_slice(),
3093            &[10, 10, 10, 255, 13, 13, 13, 255, 30, 30, 30, 255, 33, 33, 33, 255]
3094        );
3095        Ok(())
3096    }
3097
3098    #[test]
3099    fn test_cpu_dst_crop() -> Result<()> {
3100        // Load source
3101        let src = load_bytes_to_tensor(2, 2, GREY, None, &[2, 4, 6, 8])?;
3102
3103        let mut converter = CPUProcessor::default();
3104
3105        let mut converted =
3106            load_bytes_to_tensor(2, 2, YUYV, None, &[200, 128, 200, 128, 200, 128, 200, 128])?;
3107
3108        converter.convert(
3109            &src,
3110            &mut converted,
3111            Rotation::None,
3112            Flip::None,
3113            Crop::new().with_dst_rect(Some(Rect::new(0, 0, 2, 1))),
3114        )?;
3115
3116        assert_eq!(
3117            converted.tensor().map()?.as_slice(),
3118            &[20, 128, 21, 128, 200, 128, 200, 128]
3119        );
3120        Ok(())
3121    }
3122
3123    #[test]
3124    fn test_cpu_fill_rgba() -> Result<()> {
3125        // Load source
3126        let src = load_bytes_to_tensor(1, 1, RGBA, None, &[3, 3, 3, 255])?;
3127
3128        let mut converter = CPUProcessor::default();
3129
3130        let mut converted = TensorImage::new(2, 2, RGBA, None)?;
3131
3132        converter.convert(
3133            &src,
3134            &mut converted,
3135            Rotation::None,
3136            Flip::None,
3137            Crop {
3138                src_rect: None,
3139                dst_rect: Some(Rect {
3140                    left: 1,
3141                    top: 1,
3142                    width: 1,
3143                    height: 1,
3144                }),
3145                dst_color: Some([255, 0, 0, 255]),
3146            },
3147        )?;
3148
3149        assert_eq!(
3150            converted.tensor().map()?.as_slice(),
3151            &[255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 3, 3, 3, 255]
3152        );
3153        Ok(())
3154    }
3155
3156    #[test]
3157    fn test_cpu_fill_yuyv() -> Result<()> {
3158        // Load source
3159        let src = load_bytes_to_tensor(2, 1, RGBA, None, &[3, 3, 3, 255, 3, 3, 3, 255])?;
3160
3161        let mut converter = CPUProcessor::default();
3162
3163        let mut converted = TensorImage::new(2, 3, YUYV, None)?;
3164
3165        converter.convert(
3166            &src,
3167            &mut converted,
3168            Rotation::None,
3169            Flip::None,
3170            Crop {
3171                src_rect: None,
3172                dst_rect: Some(Rect {
3173                    left: 0,
3174                    top: 1,
3175                    width: 2,
3176                    height: 1,
3177                }),
3178                dst_color: Some([255, 0, 0, 255]),
3179            },
3180        )?;
3181
3182        assert_eq!(
3183            converted.tensor().map()?.as_slice(),
3184            &[63, 102, 63, 240, 19, 128, 19, 128, 63, 102, 63, 240]
3185        );
3186        Ok(())
3187    }
3188
3189    #[test]
3190    fn test_cpu_fill_grey() -> Result<()> {
3191        // Load source
3192        let src = load_bytes_to_tensor(2, 1, RGBA, None, &[3, 3, 3, 255, 3, 3, 3, 255])?;
3193
3194        let mut converter = CPUProcessor::default();
3195
3196        let mut converted = TensorImage::new(2, 3, GREY, None)?;
3197
3198        converter.convert(
3199            &src,
3200            &mut converted,
3201            Rotation::None,
3202            Flip::None,
3203            Crop {
3204                src_rect: None,
3205                dst_rect: Some(Rect {
3206                    left: 0,
3207                    top: 1,
3208                    width: 2,
3209                    height: 1,
3210                }),
3211                dst_color: Some([200, 200, 200, 255]),
3212            },
3213        )?;
3214
3215        assert_eq!(
3216            converted.tensor().map()?.as_slice(),
3217            &[200, 200, 3, 3, 200, 200]
3218        );
3219        Ok(())
3220    }
3221
3222    #[test]
3223    fn test_segmentation() {
3224        use edgefirst_decoder::Segmentation;
3225        use ndarray::Array3;
3226
3227        let mut image = TensorImage::load(
3228            include_bytes!("../../../testdata/giraffe.jpg"),
3229            Some(RGBA),
3230            None,
3231        )
3232        .unwrap();
3233
3234        let mut segmentation = Array3::from_shape_vec(
3235            (2, 160, 160),
3236            include_bytes!("../../../testdata/modelpack_seg_2x160x160.bin").to_vec(),
3237        )
3238        .unwrap();
3239        segmentation.swap_axes(0, 1);
3240        segmentation.swap_axes(1, 2);
3241        let segmentation = segmentation.as_standard_layout().to_owned();
3242
3243        let seg = Segmentation {
3244            segmentation,
3245            xmin: 0.0,
3246            ymin: 0.0,
3247            xmax: 1.0,
3248            ymax: 1.0,
3249        };
3250
3251        let mut renderer = CPUProcessor::new();
3252        renderer.draw_masks(&mut image, &[], &[seg]).unwrap();
3253
3254        image.save_jpeg("test_segmentation.jpg", 80).unwrap();
3255    }
3256
3257    #[test]
3258    fn test_segmentation_yolo() {
3259        use edgefirst_decoder::Segmentation;
3260        use ndarray::Array3;
3261
3262        let mut image = TensorImage::load(
3263            include_bytes!("../../../testdata/giraffe.jpg"),
3264            Some(RGBA),
3265            None,
3266        )
3267        .unwrap();
3268
3269        let segmentation = Array3::from_shape_vec(
3270            (76, 55, 1),
3271            include_bytes!("../../../testdata/yolov8_seg_crop_76x55.bin").to_vec(),
3272        )
3273        .unwrap();
3274
3275        let detect = DetectBox {
3276            bbox: [0.59375, 0.25, 0.9375, 0.725].into(),
3277            score: 0.99,
3278            label: 1,
3279        };
3280
3281        let seg = Segmentation {
3282            segmentation,
3283            xmin: 0.59375,
3284            ymin: 0.25,
3285            xmax: 0.9375,
3286            ymax: 0.725,
3287        };
3288
3289        let mut renderer = CPUProcessor::new();
3290        renderer
3291            .set_class_colors(&[[255, 255, 0, 233], [128, 128, 255, 100]])
3292            .unwrap();
3293        assert_eq!(renderer.colors[1], [128, 128, 255, 100]);
3294        renderer.draw_masks(&mut image, &[detect], &[seg]).unwrap();
3295        let expected = TensorImage::load(
3296            include_bytes!("../../../testdata/output_render_cpu.jpg"),
3297            Some(RGBA),
3298            None,
3299        )
3300        .unwrap();
3301        compare_images_convert_to_rgb(&image, &expected, 0.99, function!());
3302    }
3303
3304    // =========================================================================
3305    // Generic Conversion Tests (TensorImageRef support)
3306    // =========================================================================
3307
3308    #[test]
3309    fn test_convert_rgb_to_planar_rgb_generic() {
3310        // Create RGB source image
3311        let mut src = TensorImage::new(4, 4, RGB, None).unwrap();
3312        {
3313            let mut map = src.tensor_mut().map().unwrap();
3314            let data = map.as_mut_slice();
3315            // Fill with pattern: pixel 0 = [10, 20, 30], pixel 1 = [40, 50, 60], etc.
3316            for i in 0..16 {
3317                data[i * 3] = (i * 10) as u8;
3318                data[i * 3 + 1] = (i * 10 + 1) as u8;
3319                data[i * 3 + 2] = (i * 10 + 2) as u8;
3320            }
3321        }
3322
3323        // Create planar RGB destination using TensorImageRef
3324        let mut tensor = Tensor::<u8>::new(&[3, 4, 4], None, None).unwrap();
3325        let mut dst = TensorImageRef::from_borrowed_tensor(&mut tensor, PLANAR_RGB).unwrap();
3326
3327        CPUProcessor::convert_format_generic(&src, &mut dst).unwrap();
3328
3329        // Verify the conversion - check first few pixels of each plane
3330        let map = dst.tensor().map().unwrap();
3331        let data = map.as_slice();
3332
3333        // R plane starts at 0, G at 16, B at 32
3334        assert_eq!(data[0], 0); // R of pixel 0
3335        assert_eq!(data[16], 1); // G of pixel 0
3336        assert_eq!(data[32], 2); // B of pixel 0
3337
3338        assert_eq!(data[1], 10); // R of pixel 1
3339        assert_eq!(data[17], 11); // G of pixel 1
3340        assert_eq!(data[33], 12); // B of pixel 1
3341    }
3342
3343    #[test]
3344    fn test_convert_rgba_to_planar_rgb_generic() {
3345        // Create RGBA source image
3346        let mut src = TensorImage::new(4, 4, RGBA, None).unwrap();
3347        {
3348            let mut map = src.tensor_mut().map().unwrap();
3349            let data = map.as_mut_slice();
3350            // Fill with pattern
3351            for i in 0..16 {
3352                data[i * 4] = (i * 10) as u8; // R
3353                data[i * 4 + 1] = (i * 10 + 1) as u8; // G
3354                data[i * 4 + 2] = (i * 10 + 2) as u8; // B
3355                data[i * 4 + 3] = 255; // A (ignored)
3356            }
3357        }
3358
3359        // Create planar RGB destination
3360        let mut tensor = Tensor::<u8>::new(&[3, 4, 4], None, None).unwrap();
3361        let mut dst = TensorImageRef::from_borrowed_tensor(&mut tensor, PLANAR_RGB).unwrap();
3362
3363        CPUProcessor::convert_format_generic(&src, &mut dst).unwrap();
3364
3365        // Verify the conversion
3366        let map = dst.tensor().map().unwrap();
3367        let data = map.as_slice();
3368
3369        assert_eq!(data[0], 0); // R of pixel 0
3370        assert_eq!(data[16], 1); // G of pixel 0
3371        assert_eq!(data[32], 2); // B of pixel 0
3372    }
3373
3374    #[test]
3375    fn test_copy_image_generic_same_format() {
3376        // Create source image with data
3377        let mut src = TensorImage::new(4, 4, RGB, None).unwrap();
3378        {
3379            let mut map = src.tensor_mut().map().unwrap();
3380            let data = map.as_mut_slice();
3381            for (i, byte) in data.iter_mut().enumerate() {
3382                *byte = (i % 256) as u8;
3383            }
3384        }
3385
3386        // Create destination tensor
3387        let mut tensor = Tensor::<u8>::new(&[4, 4, 3], None, None).unwrap();
3388        let mut dst = TensorImageRef::from_borrowed_tensor(&mut tensor, RGB).unwrap();
3389
3390        CPUProcessor::convert_format_generic(&src, &mut dst).unwrap();
3391
3392        // Verify data was copied
3393        let src_map = src.tensor().map().unwrap();
3394        let dst_map = dst.tensor().map().unwrap();
3395        assert_eq!(src_map.as_slice(), dst_map.as_slice());
3396    }
3397
3398    #[test]
3399    fn test_convert_format_generic_unsupported() {
3400        // Try unsupported conversion (NV12 to PLANAR_RGB)
3401        let src = TensorImage::new(8, 8, NV12, None).unwrap();
3402        let mut tensor = Tensor::<u8>::new(&[3, 8, 8], None, None).unwrap();
3403        let mut dst = TensorImageRef::from_borrowed_tensor(&mut tensor, PLANAR_RGB).unwrap();
3404
3405        let result = CPUProcessor::convert_format_generic(&src, &mut dst);
3406        assert!(result.is_err());
3407        assert!(matches!(result, Err(Error::NotSupported(_))));
3408    }
3409
3410    #[test]
3411    fn test_fill_image_outside_crop_generic_rgba() {
3412        let mut tensor = Tensor::<u8>::new(&[4, 4, 4], None, None).unwrap();
3413        // Initialize to zeros
3414        tensor.map().unwrap().as_mut_slice().fill(0);
3415
3416        let mut dst = TensorImageRef::from_borrowed_tensor(&mut tensor, RGBA).unwrap();
3417
3418        // Fill outside a 2x2 crop in the center with red
3419        let crop = Rect::new(1, 1, 2, 2);
3420        CPUProcessor::fill_image_outside_crop_generic(&mut dst, [255, 0, 0, 255], crop).unwrap();
3421
3422        let map = dst.tensor().map().unwrap();
3423        let data = map.as_slice();
3424
3425        // Top-left corner should be filled (red)
3426        assert_eq!(&data[0..4], &[255, 0, 0, 255]);
3427
3428        // Center pixel (1,1) should still be zero (inside crop)
3429        // row=1, col=1, width=4, bytes_per_pixel=4 -> offset = (1*4 + 1) * 4 = 20
3430        let center_offset = 20;
3431        assert_eq!(&data[center_offset..center_offset + 4], &[0, 0, 0, 0]);
3432    }
3433
3434    #[test]
3435    fn test_fill_image_outside_crop_generic_rgb() {
3436        let mut tensor = Tensor::<u8>::new(&[4, 4, 3], None, None).unwrap();
3437        tensor.map().unwrap().as_mut_slice().fill(0);
3438
3439        let mut dst = TensorImageRef::from_borrowed_tensor(&mut tensor, RGB).unwrap();
3440
3441        let crop = Rect::new(1, 1, 2, 2);
3442        CPUProcessor::fill_image_outside_crop_generic(&mut dst, [0, 255, 0, 255], crop).unwrap();
3443
3444        let map = dst.tensor().map().unwrap();
3445        let data = map.as_slice();
3446
3447        // Top-left corner should be green
3448        assert_eq!(&data[0..3], &[0, 255, 0]);
3449
3450        // Center pixel (1,1): row=1, col=1, width=4, bytes=3 -> offset = (1*4 + 1) * 3
3451        // = 15
3452        let center_offset = 15;
3453        assert_eq!(&data[center_offset..center_offset + 3], &[0, 0, 0]);
3454    }
3455
3456    #[test]
3457    fn test_fill_image_outside_crop_generic_planar_rgb() {
3458        let mut tensor = Tensor::<u8>::new(&[3, 4, 4], None, None).unwrap();
3459        tensor.map().unwrap().as_mut_slice().fill(0);
3460
3461        let mut dst = TensorImageRef::from_borrowed_tensor(&mut tensor, PLANAR_RGB).unwrap();
3462
3463        let crop = Rect::new(1, 1, 2, 2);
3464        CPUProcessor::fill_image_outside_crop_generic(&mut dst, [128, 64, 32, 255], crop).unwrap();
3465
3466        let map = dst.tensor().map().unwrap();
3467        let data = map.as_slice();
3468
3469        // For planar: R plane is [0..16], G plane is [16..32], B plane is [32..48]
3470        // Top-left pixel (0,0) should have R=128, G=64, B=32
3471        assert_eq!(data[0], 128); // R plane, pixel 0
3472        assert_eq!(data[16], 64); // G plane, pixel 0
3473        assert_eq!(data[32], 32); // B plane, pixel 0
3474
3475        // Center pixel (1,1): row=1, col=1, width=4 -> index = 1*4 + 1 = 5
3476        let center_idx = 5;
3477        assert_eq!(data[center_idx], 0); // R
3478        assert_eq!(data[16 + center_idx], 0); // G
3479        assert_eq!(data[32 + center_idx], 0); // B
3480    }
3481}