Skip to main content

edgefirst_image/cpu/
mod.rs

1// SPDX-FileCopyrightText: Copyright 2025 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::{Crop, Error, Flip, FunctionTimer, ImageProcessorTrait, Rect, Result, Rotation};
5use edgefirst_decoder::{DetectBox, ProtoData, Segmentation};
6use edgefirst_tensor::{
7    DType, PixelFormat, Tensor, TensorDyn, TensorMapTrait, TensorMemory, TensorTrait,
8};
9
10mod convert;
11mod masks;
12mod resize;
13mod tests;
14
15use masks::bilinear_dot;
16
17/// CPUConverter implements the ImageProcessor trait using the fallback CPU
18/// implementation for image processing.
19#[derive(Debug, Clone)]
20pub struct CPUProcessor {
21    resizer: fast_image_resize::Resizer,
22    options: fast_image_resize::ResizeOptions,
23    colors: [[u8; 4]; 20],
24}
25
26unsafe impl Send for CPUProcessor {}
27unsafe impl Sync for CPUProcessor {}
28
29impl Default for CPUProcessor {
30    fn default() -> Self {
31        Self::new_bilinear()
32    }
33}
34
35/// Compute row stride for a packed-format Tensor<u8> image given its format.
36fn row_stride_for(width: usize, fmt: PixelFormat) -> usize {
37    use edgefirst_tensor::PixelLayout;
38    match fmt.layout() {
39        PixelLayout::Packed => width * fmt.channels(),
40        PixelLayout::Planar | PixelLayout::SemiPlanar => width,
41        _ => width, // fallback for non-exhaustive
42    }
43}
44
45/// Apply XOR 0x80 bias to color channels only, preserving alpha.
46///
47/// Matches GL int8 shader behavior: `vec4(int8_bias(c.rgb), c.a)`.
48/// For formats without alpha, XORs every byte (fast path).
49pub(crate) fn apply_int8_xor_bias(data: &mut [u8], fmt: PixelFormat) {
50    use edgefirst_tensor::PixelLayout;
51    if !fmt.has_alpha() {
52        for b in data.iter_mut() {
53            *b ^= 0x80;
54        }
55    } else if fmt.layout() == PixelLayout::Planar {
56        // Planar with alpha (e.g. PlanarRgba): XOR color planes, skip alpha plane.
57        let channels = fmt.channels();
58        let plane_size = data.len() / channels;
59        for b in data[..plane_size * (channels - 1)].iter_mut() {
60            *b ^= 0x80;
61        }
62    } else {
63        // Packed with alpha (Rgba, Bgra): XOR color bytes, skip alpha byte.
64        let channels = fmt.channels();
65        for pixel in data.chunks_exact_mut(channels) {
66            for b in &mut pixel[..channels - 1] {
67                *b ^= 0x80;
68            }
69        }
70    }
71}
72
73impl CPUProcessor {
74    /// Creates a new CPUConverter with bilinear resizing.
75    pub fn new() -> Self {
76        Self::new_bilinear()
77    }
78
79    /// Creates a new CPUConverter with bilinear resizing.
80    fn new_bilinear() -> Self {
81        let resizer = fast_image_resize::Resizer::new();
82        let options = fast_image_resize::ResizeOptions::new()
83            .resize_alg(fast_image_resize::ResizeAlg::Convolution(
84                fast_image_resize::FilterType::Bilinear,
85            ))
86            .use_alpha(false);
87
88        log::debug!("CPUConverter created");
89        Self {
90            resizer,
91            options,
92            colors: crate::DEFAULT_COLORS_U8,
93        }
94    }
95
96    /// Creates a new CPUConverter with nearest neighbor resizing.
97    pub fn new_nearest() -> Self {
98        let resizer = fast_image_resize::Resizer::new();
99        let options = fast_image_resize::ResizeOptions::new()
100            .resize_alg(fast_image_resize::ResizeAlg::Nearest)
101            .use_alpha(false);
102        log::debug!("CPUConverter created");
103        Self {
104            resizer,
105            options,
106            colors: crate::DEFAULT_COLORS_U8,
107        }
108    }
109
110    pub(crate) fn support_conversion_pf(src: PixelFormat, dst: PixelFormat) -> bool {
111        use PixelFormat::*;
112        matches!(
113            (src, dst),
114            (Nv12, Rgb)
115                | (Nv12, Rgba)
116                | (Nv12, Grey)
117                | (Nv16, Rgb)
118                | (Nv16, Rgba)
119                | (Nv16, Bgra)
120                | (Yuyv, Rgb)
121                | (Yuyv, Rgba)
122                | (Yuyv, Grey)
123                | (Yuyv, Yuyv)
124                | (Yuyv, PlanarRgb)
125                | (Yuyv, PlanarRgba)
126                | (Yuyv, Nv16)
127                | (Vyuy, Rgb)
128                | (Vyuy, Rgba)
129                | (Vyuy, Grey)
130                | (Vyuy, Vyuy)
131                | (Vyuy, PlanarRgb)
132                | (Vyuy, PlanarRgba)
133                | (Vyuy, Nv16)
134                | (Rgba, Rgb)
135                | (Rgba, Rgba)
136                | (Rgba, Grey)
137                | (Rgba, Yuyv)
138                | (Rgba, PlanarRgb)
139                | (Rgba, PlanarRgba)
140                | (Rgba, Nv16)
141                | (Rgb, Rgb)
142                | (Rgb, Rgba)
143                | (Rgb, Grey)
144                | (Rgb, Yuyv)
145                | (Rgb, PlanarRgb)
146                | (Rgb, PlanarRgba)
147                | (Rgb, Nv16)
148                | (Grey, Rgb)
149                | (Grey, Rgba)
150                | (Grey, Grey)
151                | (Grey, Yuyv)
152                | (Grey, PlanarRgb)
153                | (Grey, PlanarRgba)
154                | (Grey, Nv16)
155                | (Nv12, Bgra)
156                | (Yuyv, Bgra)
157                | (Vyuy, Bgra)
158                | (Rgba, Bgra)
159                | (Rgb, Bgra)
160                | (Grey, Bgra)
161                | (Bgra, Bgra)
162                | (PlanarRgb, Rgb)
163                | (PlanarRgb, Rgba)
164                | (PlanarRgba, Rgb)
165                | (PlanarRgba, Rgba)
166                | (PlanarRgb, Bgra)
167                | (PlanarRgba, Bgra)
168        )
169    }
170
171    /// Format conversion dispatch for Tensor<u8> with PixelFormat metadata.
172    pub(crate) fn convert_format_pf(
173        src: &Tensor<u8>,
174        dst: &mut Tensor<u8>,
175        src_fmt: PixelFormat,
176        dst_fmt: PixelFormat,
177    ) -> Result<()> {
178        let _timer = FunctionTimer::new(format!(
179            "ImageProcessor::convert_format {} to {}",
180            src_fmt, dst_fmt,
181        ));
182
183        use PixelFormat::*;
184        match (src_fmt, dst_fmt) {
185            (Nv12, Rgb) => Self::convert_nv12_to_rgb(src, dst),
186            (Nv12, Rgba) => Self::convert_nv12_to_rgba(src, dst),
187            (Nv12, Grey) => Self::convert_nv12_to_grey(src, dst),
188            (Yuyv, Rgb) => Self::convert_yuyv_to_rgb(src, dst),
189            (Yuyv, Rgba) => Self::convert_yuyv_to_rgba(src, dst),
190            (Yuyv, Grey) => Self::convert_yuyv_to_grey(src, dst),
191            (Yuyv, Yuyv) => Self::copy_image(src, dst),
192            (Yuyv, PlanarRgb) => Self::convert_yuyv_to_8bps(src, dst),
193            (Yuyv, PlanarRgba) => Self::convert_yuyv_to_prgba(src, dst),
194            (Yuyv, Nv16) => Self::convert_yuyv_to_nv16(src, dst),
195            (Vyuy, Rgb) => Self::convert_vyuy_to_rgb(src, dst),
196            (Vyuy, Rgba) => Self::convert_vyuy_to_rgba(src, dst),
197            (Vyuy, Grey) => Self::convert_vyuy_to_grey(src, dst),
198            (Vyuy, Vyuy) => Self::copy_image(src, dst),
199            (Vyuy, PlanarRgb) => Self::convert_vyuy_to_8bps(src, dst),
200            (Vyuy, PlanarRgba) => Self::convert_vyuy_to_prgba(src, dst),
201            (Vyuy, Nv16) => Self::convert_vyuy_to_nv16(src, dst),
202            (Rgba, Rgb) => Self::convert_rgba_to_rgb(src, dst),
203            (Rgba, Rgba) => Self::copy_image(src, dst),
204            (Rgba, Grey) => Self::convert_rgba_to_grey(src, dst),
205            (Rgba, Yuyv) => Self::convert_rgba_to_yuyv(src, dst),
206            (Rgba, PlanarRgb) => Self::convert_rgba_to_8bps(src, dst),
207            (Rgba, PlanarRgba) => Self::convert_rgba_to_prgba(src, dst),
208            (Rgba, Nv16) => Self::convert_rgba_to_nv16(src, dst),
209            (Rgb, Rgb) => Self::copy_image(src, dst),
210            (Rgb, Rgba) => Self::convert_rgb_to_rgba(src, dst),
211            (Rgb, Grey) => Self::convert_rgb_to_grey(src, dst),
212            (Rgb, Yuyv) => Self::convert_rgb_to_yuyv(src, dst),
213            (Rgb, PlanarRgb) => Self::convert_rgb_to_8bps(src, dst),
214            (Rgb, PlanarRgba) => Self::convert_rgb_to_prgba(src, dst),
215            (Rgb, Nv16) => Self::convert_rgb_to_nv16(src, dst),
216            (Grey, Rgb) => Self::convert_grey_to_rgb(src, dst),
217            (Grey, Rgba) => Self::convert_grey_to_rgba(src, dst),
218            (Grey, Grey) => Self::copy_image(src, dst),
219            (Grey, Yuyv) => Self::convert_grey_to_yuyv(src, dst),
220            (Grey, PlanarRgb) => Self::convert_grey_to_8bps(src, dst),
221            (Grey, PlanarRgba) => Self::convert_grey_to_prgba(src, dst),
222            (Grey, Nv16) => Self::convert_grey_to_nv16(src, dst),
223
224            // the following converts are added for use in testing
225            (Nv16, Rgb) => Self::convert_nv16_to_rgb(src, dst),
226            (Nv16, Rgba) => Self::convert_nv16_to_rgba(src, dst),
227            (PlanarRgb, Rgb) => Self::convert_8bps_to_rgb(src, dst),
228            (PlanarRgb, Rgba) => Self::convert_8bps_to_rgba(src, dst),
229            (PlanarRgba, Rgb) => Self::convert_prgba_to_rgb(src, dst),
230            (PlanarRgba, Rgba) => Self::convert_prgba_to_rgba(src, dst),
231
232            // BGRA destination: convert to RGBA layout, then swap R and B
233            (Bgra, Bgra) => Self::copy_image(src, dst),
234            (Nv12, Bgra) => {
235                Self::convert_nv12_to_rgba(src, dst)?;
236                Self::swizzle_rb_4chan(dst)
237            }
238            (Nv16, Bgra) => {
239                Self::convert_nv16_to_rgba(src, dst)?;
240                Self::swizzle_rb_4chan(dst)
241            }
242            (Yuyv, Bgra) => {
243                Self::convert_yuyv_to_rgba(src, dst)?;
244                Self::swizzle_rb_4chan(dst)
245            }
246            (Vyuy, Bgra) => {
247                Self::convert_vyuy_to_rgba(src, dst)?;
248                Self::swizzle_rb_4chan(dst)
249            }
250            (Rgba, Bgra) => {
251                dst.map()?.copy_from_slice(&src.map()?);
252                Self::swizzle_rb_4chan(dst)
253            }
254            (Rgb, Bgra) => {
255                Self::convert_rgb_to_rgba(src, dst)?;
256                Self::swizzle_rb_4chan(dst)
257            }
258            (Grey, Bgra) => {
259                Self::convert_grey_to_rgba(src, dst)?;
260                Self::swizzle_rb_4chan(dst)
261            }
262            (PlanarRgb, Bgra) => {
263                Self::convert_8bps_to_rgba(src, dst)?;
264                Self::swizzle_rb_4chan(dst)
265            }
266            (PlanarRgba, Bgra) => {
267                Self::convert_prgba_to_rgba(src, dst)?;
268                Self::swizzle_rb_4chan(dst)
269            }
270
271            (s, d) => Err(Error::NotSupported(format!("Conversion from {s} to {d}",))),
272        }
273    }
274
275    /// Tensor<u8>-based fill_image_outside_crop.
276    pub(crate) fn fill_image_outside_crop_u8(
277        dst: &mut Tensor<u8>,
278        rgba: [u8; 4],
279        crop: Rect,
280    ) -> Result<()> {
281        let dst_fmt = dst.format().unwrap();
282        let dst_w = dst.width().unwrap();
283        let dst_h = dst.height().unwrap();
284        let mut dst_map = dst.map()?;
285        let dst_tup = (dst_map.as_mut_slice(), dst_w, dst_h);
286        Self::fill_outside_crop_dispatch(dst_tup, dst_fmt, rgba, crop)
287    }
288
289    /// Common fill dispatch by format.
290    fn fill_outside_crop_dispatch(
291        dst: (&mut [u8], usize, usize),
292        fmt: PixelFormat,
293        rgba: [u8; 4],
294        crop: Rect,
295    ) -> Result<()> {
296        use PixelFormat::*;
297        match fmt {
298            Rgba | Bgra => Self::fill_image_outside_crop_(dst, rgba, crop),
299            Rgb => Self::fill_image_outside_crop_(dst, Self::rgba_to_rgb(rgba), crop),
300            Grey => Self::fill_image_outside_crop_(dst, Self::rgba_to_grey(rgba), crop),
301            Yuyv => Self::fill_image_outside_crop_(
302                (dst.0, dst.1 / 2, dst.2),
303                Self::rgba_to_yuyv(rgba),
304                Rect::new(crop.left / 2, crop.top, crop.width.div_ceil(2), crop.height),
305            ),
306            PlanarRgb => Self::fill_image_outside_crop_planar(dst, Self::rgba_to_rgb(rgba), crop),
307            PlanarRgba => Self::fill_image_outside_crop_planar(dst, rgba, crop),
308            Nv16 => {
309                let yuyv = Self::rgba_to_yuyv(rgba);
310                Self::fill_image_outside_crop_yuv_semiplanar(dst, yuyv[0], [yuyv[1], yuyv[3]], crop)
311            }
312            _ => Err(Error::Internal(format!(
313                "Found unexpected destination {fmt}",
314            ))),
315        }
316    }
317}
318
319impl ImageProcessorTrait for CPUProcessor {
320    fn convert(
321        &mut self,
322        src: &TensorDyn,
323        dst: &mut TensorDyn,
324        rotation: Rotation,
325        flip: Flip,
326        crop: Crop,
327    ) -> Result<()> {
328        self.convert_impl(src, dst, rotation, flip, crop)
329    }
330
331    fn draw_decoded_masks(
332        &mut self,
333        dst: &mut TensorDyn,
334        detect: &[DetectBox],
335        segmentation: &[Segmentation],
336        overlay: crate::MaskOverlay<'_>,
337    ) -> Result<()> {
338        let dst = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
339        self.draw_decoded_masks_impl(
340            dst,
341            detect,
342            segmentation,
343            overlay.opacity,
344            overlay.color_mode,
345        )
346    }
347
348    fn draw_proto_masks(
349        &mut self,
350        dst: &mut TensorDyn,
351        detect: &[DetectBox],
352        proto_data: &ProtoData,
353        overlay: crate::MaskOverlay<'_>,
354    ) -> Result<()> {
355        let dst = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
356        self.draw_proto_masks_impl(
357            dst,
358            detect,
359            proto_data,
360            overlay.opacity,
361            overlay.letterbox,
362            overlay.color_mode,
363        )
364    }
365
366    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()> {
367        for (c, new_c) in self.colors.iter_mut().zip(colors.iter()) {
368            *c = *new_c;
369        }
370        Ok(())
371    }
372}
373
374// Internal methods — dtype-aware dispatch layer.
375impl CPUProcessor {
376    /// Top-level conversion dispatcher: handles dtype combinations.
377    pub(crate) fn convert_impl(
378        &mut self,
379        src: &TensorDyn,
380        dst: &mut TensorDyn,
381        rotation: Rotation,
382        flip: Flip,
383        crop: Crop,
384    ) -> Result<()> {
385        let src_fmt = src.format().ok_or(Error::NotAnImage)?;
386        let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
387
388        match (src.dtype(), dst.dtype()) {
389            (DType::U8, DType::U8) => {
390                let src = src.as_u8().unwrap();
391                let dst = dst.as_u8_mut().unwrap();
392                self.convert_u8(src, dst, src_fmt, dst_fmt, rotation, flip, crop)
393            }
394            (DType::U8, DType::I8) => {
395                // Int8 output: reinterpret the i8 destination as u8 (layout-
396                // identical), convert directly into it, then XOR 0x80 in-place.
397                let src_u8 = src.as_u8().unwrap();
398                let dst_i8 = dst.as_i8_mut().unwrap();
399                // SAFETY: Tensor<i8> and Tensor<u8> are layout-identical
400                // (same element size, no T-dependent drop glue). Same
401                // rationale as gl::processor::tensor_i8_as_u8_mut.
402                let dst_u8 = unsafe { &mut *(dst_i8 as *mut Tensor<i8> as *mut Tensor<u8>) };
403                self.convert_u8(src_u8, dst_u8, src_fmt, dst_fmt, rotation, flip, crop)?;
404                // Apply XOR 0x80 bias in-place (u8 → i8 conversion)
405                let mut map = dst_u8.map()?;
406                apply_int8_xor_bias(map.as_mut_slice(), dst_fmt);
407                Ok(())
408            }
409            (s, d) => Err(Error::NotSupported(format!("dtype {s} -> {d}",))),
410        }
411    }
412
413    /// U8-to-U8 conversion: the full format conversion + resize pipeline.
414    #[allow(clippy::too_many_arguments)]
415    fn convert_u8(
416        &mut self,
417        src: &Tensor<u8>,
418        dst: &mut Tensor<u8>,
419        src_fmt: PixelFormat,
420        dst_fmt: PixelFormat,
421        rotation: Rotation,
422        flip: Flip,
423        crop: Crop,
424    ) -> Result<()> {
425        use PixelFormat::*;
426
427        let src_w = src.width().unwrap();
428        let src_h = src.height().unwrap();
429        let dst_w = dst.width().unwrap();
430        let dst_h = dst.height().unwrap();
431
432        crop.check_crop_dims(src_w, src_h, dst_w, dst_h)?;
433
434        // Determine intermediate format for the resize step
435        let intermediate = match (src_fmt, dst_fmt) {
436            (Nv12, Rgb) => Rgb,
437            (Nv12, Rgba) => Rgba,
438            (Nv12, Grey) => Grey,
439            (Nv12, Yuyv) => Rgba,
440            (Nv12, Nv16) => Rgba,
441            (Nv12, PlanarRgb) => Rgb,
442            (Nv12, PlanarRgba) => Rgba,
443            (Yuyv, Rgb) => Rgb,
444            (Yuyv, Rgba) => Rgba,
445            (Yuyv, Grey) => Grey,
446            (Yuyv, Yuyv) => Rgba,
447            (Yuyv, PlanarRgb) => Rgb,
448            (Yuyv, PlanarRgba) => Rgba,
449            (Yuyv, Nv16) => Rgba,
450            (Vyuy, Rgb) => Rgb,
451            (Vyuy, Rgba) => Rgba,
452            (Vyuy, Grey) => Grey,
453            (Vyuy, Vyuy) => Rgba,
454            (Vyuy, PlanarRgb) => Rgb,
455            (Vyuy, PlanarRgba) => Rgba,
456            (Vyuy, Nv16) => Rgba,
457            (Rgba, Rgb) => Rgba,
458            (Rgba, Rgba) => Rgba,
459            (Rgba, Grey) => Grey,
460            (Rgba, Yuyv) => Rgba,
461            (Rgba, PlanarRgb) => Rgba,
462            (Rgba, PlanarRgba) => Rgba,
463            (Rgba, Nv16) => Rgba,
464            (Rgb, Rgb) => Rgb,
465            (Rgb, Rgba) => Rgb,
466            (Rgb, Grey) => Grey,
467            (Rgb, Yuyv) => Rgb,
468            (Rgb, PlanarRgb) => Rgb,
469            (Rgb, PlanarRgba) => Rgb,
470            (Rgb, Nv16) => Rgb,
471            (Grey, Rgb) => Rgb,
472            (Grey, Rgba) => Rgba,
473            (Grey, Grey) => Grey,
474            (Grey, Yuyv) => Grey,
475            (Grey, PlanarRgb) => Grey,
476            (Grey, PlanarRgba) => Grey,
477            (Grey, Nv16) => Grey,
478            (Nv12, Bgra) => Rgba,
479            (Yuyv, Bgra) => Rgba,
480            (Vyuy, Bgra) => Rgba,
481            (Rgba, Bgra) => Rgba,
482            (Rgb, Bgra) => Rgb,
483            (Grey, Bgra) => Grey,
484            (Bgra, Bgra) => Bgra,
485            (Nv16, Rgb) => Rgb,
486            (Nv16, Rgba) => Rgba,
487            (Nv16, Bgra) => Rgba,
488            (PlanarRgb, Rgb) => Rgb,
489            (PlanarRgb, Rgba) => Rgb,
490            (PlanarRgb, Bgra) => Rgb,
491            (PlanarRgba, Rgb) => Rgba,
492            (PlanarRgba, Rgba) => Rgba,
493            (PlanarRgba, Bgra) => Rgba,
494            (s, d) => {
495                return Err(Error::NotSupported(format!("Conversion from {s} to {d}",)));
496            }
497        };
498
499        let need_resize_flip_rotation = rotation != Rotation::None
500            || flip != Flip::None
501            || src_w != dst_w
502            || src_h != dst_h
503            || crop.src_rect.is_some_and(|c| {
504                c != Rect {
505                    left: 0,
506                    top: 0,
507                    width: src_w,
508                    height: src_h,
509                }
510            })
511            || crop.dst_rect.is_some_and(|c| {
512                c != Rect {
513                    left: 0,
514                    top: 0,
515                    width: dst_w,
516                    height: dst_h,
517                }
518            });
519
520        // check if a direct conversion can be done
521        if !need_resize_flip_rotation && Self::support_conversion_pf(src_fmt, dst_fmt) {
522            return Self::convert_format_pf(src, dst, src_fmt, dst_fmt);
523        }
524
525        // any extra checks
526        if dst_fmt == Yuyv && !dst_w.is_multiple_of(2) {
527            return Err(Error::NotSupported(format!(
528                "{} destination must have width divisible by 2",
529                dst_fmt,
530            )));
531        }
532
533        // create tmp buffer
534        let mut tmp_buffer;
535        let tmp;
536        let tmp_fmt;
537        if intermediate != src_fmt {
538            tmp_buffer = Tensor::<u8>::image(src_w, src_h, intermediate, Some(TensorMemory::Mem))?;
539
540            Self::convert_format_pf(src, &mut tmp_buffer, src_fmt, intermediate)?;
541            tmp = &tmp_buffer;
542            tmp_fmt = intermediate;
543        } else {
544            tmp = src;
545            tmp_fmt = src_fmt;
546        }
547
548        // format must be RGB/RGBA/GREY
549        debug_assert!(matches!(tmp_fmt, Rgb | Rgba | Grey));
550        if tmp_fmt == dst_fmt {
551            self.resize_flip_rotate_pf(tmp, dst, dst_fmt, rotation, flip, crop)?;
552        } else if !need_resize_flip_rotation {
553            Self::convert_format_pf(tmp, dst, tmp_fmt, dst_fmt)?;
554        } else {
555            let mut tmp2 = Tensor::<u8>::image(dst_w, dst_h, tmp_fmt, Some(TensorMemory::Mem))?;
556            if crop.dst_rect.is_some_and(|c| {
557                c != Rect {
558                    left: 0,
559                    top: 0,
560                    width: dst_w,
561                    height: dst_h,
562                }
563            }) && crop.dst_color.is_none()
564            {
565                Self::convert_format_pf(dst, &mut tmp2, dst_fmt, tmp_fmt)?;
566            }
567            self.resize_flip_rotate_pf(tmp, &mut tmp2, tmp_fmt, rotation, flip, crop)?;
568            Self::convert_format_pf(&tmp2, dst, tmp_fmt, dst_fmt)?;
569        }
570        if let (Some(dst_rect), Some(dst_color)) = (crop.dst_rect, crop.dst_color) {
571            let full_rect = Rect {
572                left: 0,
573                top: 0,
574                width: dst_w,
575                height: dst_h,
576            };
577            if dst_rect != full_rect {
578                Self::fill_image_outside_crop_u8(dst, dst_color, dst_rect)?;
579            }
580        }
581
582        Ok(())
583    }
584
585    fn draw_decoded_masks_impl(
586        &mut self,
587        dst: &mut Tensor<u8>,
588        detect: &[DetectBox],
589        segmentation: &[Segmentation],
590        opacity: f32,
591        color_mode: crate::ColorMode,
592    ) -> Result<()> {
593        let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
594        if !matches!(dst_fmt, PixelFormat::Rgba | PixelFormat::Rgb) {
595            return Err(crate::Error::NotSupported(
596                "CPU image rendering only supports RGBA or RGB images".to_string(),
597            ));
598        }
599
600        let _timer = FunctionTimer::new("CPUProcessor::draw_decoded_masks");
601
602        let dst_w = dst.width().unwrap();
603        let dst_h = dst.height().unwrap();
604        let dst_rs = row_stride_for(dst_w, dst_fmt);
605        let dst_c = dst_fmt.channels();
606
607        let mut map = dst.map()?;
608        let dst_slice = map.as_mut_slice();
609
610        self.render_box(dst_w, dst_h, dst_rs, dst_c, dst_slice, detect, color_mode)?;
611
612        if segmentation.is_empty() {
613            return Ok(());
614        }
615
616        // Semantic segmentation (e.g. ModelPack) has C > 1 (multi-class),
617        // instance segmentation (e.g. YOLO) has C = 1 (binary per-instance).
618        let is_semantic = segmentation[0].segmentation.shape()[2] > 1;
619
620        if is_semantic {
621            self.render_modelpack_segmentation(
622                dst_w,
623                dst_h,
624                dst_rs,
625                dst_c,
626                dst_slice,
627                &segmentation[0],
628                opacity,
629            )?;
630        } else {
631            for (idx, (seg, det)) in segmentation.iter().zip(detect).enumerate() {
632                let color_index = color_mode.index(idx, det.label);
633                self.render_yolo_segmentation(
634                    dst_w,
635                    dst_h,
636                    dst_rs,
637                    dst_c,
638                    dst_slice,
639                    seg,
640                    color_index,
641                    opacity,
642                )?;
643            }
644        }
645
646        Ok(())
647    }
648
649    fn draw_proto_masks_impl(
650        &mut self,
651        dst: &mut Tensor<u8>,
652        detect: &[DetectBox],
653        proto_data: &ProtoData,
654        opacity: f32,
655        letterbox: Option<[f32; 4]>,
656        color_mode: crate::ColorMode,
657    ) -> Result<()> {
658        let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
659        if !matches!(dst_fmt, PixelFormat::Rgba | PixelFormat::Rgb) {
660            return Err(crate::Error::NotSupported(
661                "CPU image rendering only supports RGBA or RGB images".to_string(),
662            ));
663        }
664
665        let _timer = FunctionTimer::new("CPUProcessor::draw_proto_masks");
666
667        let dst_w = dst.width().unwrap();
668        let dst_h = dst.height().unwrap();
669        let dst_rs = row_stride_for(dst_w, dst_fmt);
670        let channels = dst_fmt.channels();
671
672        let mut map = dst.map()?;
673        let dst_slice = map.as_mut_slice();
674
675        self.render_box(
676            dst_w, dst_h, dst_rs, channels, dst_slice, detect, color_mode,
677        )?;
678
679        if detect.is_empty() || proto_data.mask_coefficients.is_empty() {
680            return Ok(());
681        }
682
683        let protos_cow = proto_data.protos.as_f32();
684        let protos = protos_cow.as_ref();
685        let proto_h = protos.shape()[0];
686        let proto_w = protos.shape()[1];
687        let num_protos = protos.shape()[2];
688
689        // Precompute letterbox scale/offset for output-pixel → proto-pixel mapping.
690        // Without letterbox: proto_x = (x / dst_w) * proto_w
691        // With letterbox [lx0,ly0,lx1,ly1]: proto_x = (lx0 + (x/dst_w)*(lx1-lx0)) * proto_w
692        let (lx0, lx_range, ly0, ly_range) = match letterbox {
693            Some([lx0, ly0, lx1, ly1]) => (lx0, lx1 - lx0, ly0, ly1 - ly0),
694            None => (0.0_f32, 1.0_f32, 0.0_f32, 1.0_f32),
695        };
696
697        for (idx, (det, coeff)) in detect
698            .iter()
699            .zip(proto_data.mask_coefficients.iter())
700            .enumerate()
701        {
702            let color_index = color_mode.index(idx, det.label);
703            let color = self.colors[color_index % self.colors.len()];
704            let alpha = if opacity == 1.0 {
705                color[3] as u16
706            } else {
707                (color[3] as f32 * opacity).round() as u16
708            };
709
710            // `detect` has already been un-letterboxed by the caller (lib.rs),
711            // so bbox coords are in output-image-normalized space.
712            let start_x = (dst_w as f32 * det.bbox.xmin).round() as usize;
713            let start_y = (dst_h as f32 * det.bbox.ymin).round() as usize;
714            let end_x = ((dst_w as f32 * det.bbox.xmax).round() as usize).min(dst_w);
715            let end_y = ((dst_h as f32 * det.bbox.ymax).round() as usize).min(dst_h);
716
717            for y in start_y..end_y {
718                for x in start_x..end_x {
719                    // Map output pixel (x, y) → model-input-normalized → proto pixel.
720                    // When a letterbox was applied, output pixels map to a sub-region
721                    // of the model input; lx0/lx_range re-introduce that mapping.
722                    let px = (lx0 + (x as f32 / dst_w as f32) * lx_range) * proto_w as f32 - 0.5;
723                    let py = (ly0 + (y as f32 / dst_h as f32) * ly_range) * proto_h as f32 - 0.5;
724
725                    // Bilinear interpolation + dot product
726                    let acc = bilinear_dot(protos, coeff, num_protos, px, py, proto_w, proto_h);
727
728                    // Sigmoid threshold
729                    let mask = 1.0 / (1.0 + (-acc).exp());
730                    if mask < 0.5 {
731                        continue;
732                    }
733
734                    // Alpha blend
735                    let dst_index = y * dst_rs + x * channels;
736                    for c in 0..3 {
737                        dst_slice[dst_index + c] = ((color[c] as u16 * alpha
738                            + dst_slice[dst_index + c] as u16 * (255 - alpha))
739                            / 255) as u8;
740                    }
741                }
742            }
743        }
744
745        Ok(())
746    }
747}