Skip to main content

edgefirst_image/cpu/
mod.rs

1// SPDX-FileCopyrightText: Copyright 2025 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::{Crop, Error, Flip, FunctionTimer, ImageProcessorTrait, Rect, Result, Rotation};
5use edgefirst_decoder::{DetectBox, ProtoData, Segmentation};
6use edgefirst_tensor::{
7    DType, PixelFormat, Tensor, TensorDyn, TensorMapTrait, TensorMemory, TensorTrait,
8};
9
10mod convert;
11mod masks;
12mod resize;
13mod tests;
14
15use masks::bilinear_dot;
16
17/// CPUConverter implements the ImageProcessor trait using the fallback CPU
18/// implementation for image processing.
19#[derive(Debug, Clone)]
20pub struct CPUProcessor {
21    resizer: fast_image_resize::Resizer,
22    options: fast_image_resize::ResizeOptions,
23    colors: [[u8; 4]; 20],
24}
25
26unsafe impl Send for CPUProcessor {}
27unsafe impl Sync for CPUProcessor {}
28
29impl Default for CPUProcessor {
30    fn default() -> Self {
31        Self::new_bilinear()
32    }
33}
34
35/// Write the base layer of `dst` before mask rendering.
36///
37/// This is the terminal fallback: on CPU we have no 2D hardware, so a
38/// direct buffer write is the appropriate primitive. The invariant is that
39/// every call to the CPU draw_* entry points fully initialises dst — we
40/// never rely on "whatever was in the buffer" from the caller.
41///
42/// - `background == Some(bg)` → byte-for-byte copy bg → dst (after shape /
43///   format validation).
44/// - `background == None` → fill dst with 0x00 (transparent black).
45fn prepare_dst_base_cpu(dst: &mut TensorDyn, background: Option<&TensorDyn>) -> Result<()> {
46    match background {
47        Some(bg) => {
48            if bg.shape() != dst.shape() {
49                return Err(Error::InvalidShape(
50                    "background shape does not match dst".into(),
51                ));
52            }
53            if bg.format() != dst.format() {
54                return Err(Error::InvalidShape(
55                    "background pixel format does not match dst".into(),
56                ));
57            }
58            let bg_u8 = bg.as_u8().ok_or(Error::NotAnImage)?;
59            let dst_u8 = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
60            let bg_map = bg_u8.map()?;
61            let mut dst_map = dst_u8.map()?;
62            let bg_slice = bg_map.as_slice();
63            let dst_slice = dst_map.as_mut_slice();
64            if bg_slice.len() != dst_slice.len() {
65                return Err(Error::InvalidShape(
66                    "background buffer size does not match dst".into(),
67                ));
68            }
69            dst_slice.copy_from_slice(bg_slice);
70        }
71        None => {
72            let dst_u8 = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
73            let mut dst_map = dst_u8.map()?;
74            dst_map.as_mut_slice().fill(0);
75        }
76    }
77    Ok(())
78}
79
80/// Compute row stride for a packed-format Tensor<u8> image given its format.
81fn row_stride_for(width: usize, fmt: PixelFormat) -> usize {
82    use edgefirst_tensor::PixelLayout;
83    match fmt.layout() {
84        PixelLayout::Packed => width * fmt.channels(),
85        PixelLayout::Planar | PixelLayout::SemiPlanar => width,
86        _ => width, // fallback for non-exhaustive
87    }
88}
89
90/// Apply XOR 0x80 bias to color channels only, preserving alpha.
91///
92/// Matches GL int8 shader behavior: `vec4(int8_bias(c.rgb), c.a)`.
93/// For formats without alpha, XORs every byte (fast path).
94pub(crate) fn apply_int8_xor_bias(data: &mut [u8], fmt: PixelFormat) {
95    use edgefirst_tensor::PixelLayout;
96    if !fmt.has_alpha() {
97        for b in data.iter_mut() {
98            *b ^= 0x80;
99        }
100    } else if fmt.layout() == PixelLayout::Planar {
101        // Planar with alpha (e.g. PlanarRgba): XOR color planes, skip alpha plane.
102        let channels = fmt.channels();
103        let plane_size = data.len() / channels;
104        for b in data[..plane_size * (channels - 1)].iter_mut() {
105            *b ^= 0x80;
106        }
107    } else {
108        // Packed with alpha (Rgba, Bgra): XOR color bytes, skip alpha byte.
109        let channels = fmt.channels();
110        for pixel in data.chunks_exact_mut(channels) {
111            for b in &mut pixel[..channels - 1] {
112                *b ^= 0x80;
113            }
114        }
115    }
116}
117
118impl CPUProcessor {
119    /// Creates a new CPUConverter with bilinear resizing.
120    pub fn new() -> Self {
121        Self::new_bilinear()
122    }
123
124    /// Creates a new CPUConverter with bilinear resizing.
125    fn new_bilinear() -> Self {
126        let resizer = fast_image_resize::Resizer::new();
127        let options = fast_image_resize::ResizeOptions::new()
128            .resize_alg(fast_image_resize::ResizeAlg::Convolution(
129                fast_image_resize::FilterType::Bilinear,
130            ))
131            .use_alpha(false);
132
133        log::debug!("CPUConverter created");
134        Self {
135            resizer,
136            options,
137            colors: crate::DEFAULT_COLORS_U8,
138        }
139    }
140
141    /// Creates a new CPUConverter with nearest neighbor resizing.
142    pub fn new_nearest() -> Self {
143        let resizer = fast_image_resize::Resizer::new();
144        let options = fast_image_resize::ResizeOptions::new()
145            .resize_alg(fast_image_resize::ResizeAlg::Nearest)
146            .use_alpha(false);
147        log::debug!("CPUConverter created");
148        Self {
149            resizer,
150            options,
151            colors: crate::DEFAULT_COLORS_U8,
152        }
153    }
154
155    pub(crate) fn support_conversion_pf(src: PixelFormat, dst: PixelFormat) -> bool {
156        use PixelFormat::*;
157        matches!(
158            (src, dst),
159            (Nv12, Rgb)
160                | (Nv12, Rgba)
161                | (Nv12, Grey)
162                | (Nv16, Rgb)
163                | (Nv16, Rgba)
164                | (Nv16, Bgra)
165                | (Yuyv, Rgb)
166                | (Yuyv, Rgba)
167                | (Yuyv, Grey)
168                | (Yuyv, Yuyv)
169                | (Yuyv, PlanarRgb)
170                | (Yuyv, PlanarRgba)
171                | (Yuyv, Nv16)
172                | (Vyuy, Rgb)
173                | (Vyuy, Rgba)
174                | (Vyuy, Grey)
175                | (Vyuy, Vyuy)
176                | (Vyuy, PlanarRgb)
177                | (Vyuy, PlanarRgba)
178                | (Vyuy, Nv16)
179                | (Rgba, Rgb)
180                | (Rgba, Rgba)
181                | (Rgba, Grey)
182                | (Rgba, Yuyv)
183                | (Rgba, PlanarRgb)
184                | (Rgba, PlanarRgba)
185                | (Rgba, Nv16)
186                | (Rgb, Rgb)
187                | (Rgb, Rgba)
188                | (Rgb, Grey)
189                | (Rgb, Yuyv)
190                | (Rgb, PlanarRgb)
191                | (Rgb, PlanarRgba)
192                | (Rgb, Nv16)
193                | (Grey, Rgb)
194                | (Grey, Rgba)
195                | (Grey, Grey)
196                | (Grey, Yuyv)
197                | (Grey, PlanarRgb)
198                | (Grey, PlanarRgba)
199                | (Grey, Nv16)
200                | (Nv12, Bgra)
201                | (Yuyv, Bgra)
202                | (Vyuy, Bgra)
203                | (Rgba, Bgra)
204                | (Rgb, Bgra)
205                | (Grey, Bgra)
206                | (Bgra, Bgra)
207                | (PlanarRgb, Rgb)
208                | (PlanarRgb, Rgba)
209                | (PlanarRgba, Rgb)
210                | (PlanarRgba, Rgba)
211                | (PlanarRgb, Bgra)
212                | (PlanarRgba, Bgra)
213        )
214    }
215
216    /// Format conversion dispatch for Tensor<u8> with PixelFormat metadata.
217    pub(crate) fn convert_format_pf(
218        src: &Tensor<u8>,
219        dst: &mut Tensor<u8>,
220        src_fmt: PixelFormat,
221        dst_fmt: PixelFormat,
222    ) -> Result<()> {
223        let _timer = FunctionTimer::new(format!(
224            "ImageProcessor::convert_format {} to {}",
225            src_fmt, dst_fmt,
226        ));
227
228        use PixelFormat::*;
229        match (src_fmt, dst_fmt) {
230            (Nv12, Rgb) => Self::convert_nv12_to_rgb(src, dst),
231            (Nv12, Rgba) => Self::convert_nv12_to_rgba(src, dst),
232            (Nv12, Grey) => Self::convert_nv12_to_grey(src, dst),
233            (Yuyv, Rgb) => Self::convert_yuyv_to_rgb(src, dst),
234            (Yuyv, Rgba) => Self::convert_yuyv_to_rgba(src, dst),
235            (Yuyv, Grey) => Self::convert_yuyv_to_grey(src, dst),
236            (Yuyv, Yuyv) => Self::copy_image(src, dst),
237            (Yuyv, PlanarRgb) => Self::convert_yuyv_to_8bps(src, dst),
238            (Yuyv, PlanarRgba) => Self::convert_yuyv_to_prgba(src, dst),
239            (Yuyv, Nv16) => Self::convert_yuyv_to_nv16(src, dst),
240            (Vyuy, Rgb) => Self::convert_vyuy_to_rgb(src, dst),
241            (Vyuy, Rgba) => Self::convert_vyuy_to_rgba(src, dst),
242            (Vyuy, Grey) => Self::convert_vyuy_to_grey(src, dst),
243            (Vyuy, Vyuy) => Self::copy_image(src, dst),
244            (Vyuy, PlanarRgb) => Self::convert_vyuy_to_8bps(src, dst),
245            (Vyuy, PlanarRgba) => Self::convert_vyuy_to_prgba(src, dst),
246            (Vyuy, Nv16) => Self::convert_vyuy_to_nv16(src, dst),
247            (Rgba, Rgb) => Self::convert_rgba_to_rgb(src, dst),
248            (Rgba, Rgba) => Self::copy_image(src, dst),
249            (Rgba, Grey) => Self::convert_rgba_to_grey(src, dst),
250            (Rgba, Yuyv) => Self::convert_rgba_to_yuyv(src, dst),
251            (Rgba, PlanarRgb) => Self::convert_rgba_to_8bps(src, dst),
252            (Rgba, PlanarRgba) => Self::convert_rgba_to_prgba(src, dst),
253            (Rgba, Nv16) => Self::convert_rgba_to_nv16(src, dst),
254            (Rgb, Rgb) => Self::copy_image(src, dst),
255            (Rgb, Rgba) => Self::convert_rgb_to_rgba(src, dst),
256            (Rgb, Grey) => Self::convert_rgb_to_grey(src, dst),
257            (Rgb, Yuyv) => Self::convert_rgb_to_yuyv(src, dst),
258            (Rgb, PlanarRgb) => Self::convert_rgb_to_8bps(src, dst),
259            (Rgb, PlanarRgba) => Self::convert_rgb_to_prgba(src, dst),
260            (Rgb, Nv16) => Self::convert_rgb_to_nv16(src, dst),
261            (Grey, Rgb) => Self::convert_grey_to_rgb(src, dst),
262            (Grey, Rgba) => Self::convert_grey_to_rgba(src, dst),
263            (Grey, Grey) => Self::copy_image(src, dst),
264            (Grey, Yuyv) => Self::convert_grey_to_yuyv(src, dst),
265            (Grey, PlanarRgb) => Self::convert_grey_to_8bps(src, dst),
266            (Grey, PlanarRgba) => Self::convert_grey_to_prgba(src, dst),
267            (Grey, Nv16) => Self::convert_grey_to_nv16(src, dst),
268
269            // the following converts are added for use in testing
270            (Nv16, Rgb) => Self::convert_nv16_to_rgb(src, dst),
271            (Nv16, Rgba) => Self::convert_nv16_to_rgba(src, dst),
272            (PlanarRgb, Rgb) => Self::convert_8bps_to_rgb(src, dst),
273            (PlanarRgb, Rgba) => Self::convert_8bps_to_rgba(src, dst),
274            (PlanarRgba, Rgb) => Self::convert_prgba_to_rgb(src, dst),
275            (PlanarRgba, Rgba) => Self::convert_prgba_to_rgba(src, dst),
276
277            // BGRA destination: convert to RGBA layout, then swap R and B
278            (Bgra, Bgra) => Self::copy_image(src, dst),
279            (Nv12, Bgra) => {
280                Self::convert_nv12_to_rgba(src, dst)?;
281                Self::swizzle_rb_4chan(dst)
282            }
283            (Nv16, Bgra) => {
284                Self::convert_nv16_to_rgba(src, dst)?;
285                Self::swizzle_rb_4chan(dst)
286            }
287            (Yuyv, Bgra) => {
288                Self::convert_yuyv_to_rgba(src, dst)?;
289                Self::swizzle_rb_4chan(dst)
290            }
291            (Vyuy, Bgra) => {
292                Self::convert_vyuy_to_rgba(src, dst)?;
293                Self::swizzle_rb_4chan(dst)
294            }
295            (Rgba, Bgra) => {
296                dst.map()?.copy_from_slice(&src.map()?);
297                Self::swizzle_rb_4chan(dst)
298            }
299            (Rgb, Bgra) => {
300                Self::convert_rgb_to_rgba(src, dst)?;
301                Self::swizzle_rb_4chan(dst)
302            }
303            (Grey, Bgra) => {
304                Self::convert_grey_to_rgba(src, dst)?;
305                Self::swizzle_rb_4chan(dst)
306            }
307            (PlanarRgb, Bgra) => {
308                Self::convert_8bps_to_rgba(src, dst)?;
309                Self::swizzle_rb_4chan(dst)
310            }
311            (PlanarRgba, Bgra) => {
312                Self::convert_prgba_to_rgba(src, dst)?;
313                Self::swizzle_rb_4chan(dst)
314            }
315
316            (s, d) => Err(Error::NotSupported(format!("Conversion from {s} to {d}",))),
317        }
318    }
319
320    /// Tensor<u8>-based fill_image_outside_crop.
321    pub(crate) fn fill_image_outside_crop_u8(
322        dst: &mut Tensor<u8>,
323        rgba: [u8; 4],
324        crop: Rect,
325    ) -> Result<()> {
326        let dst_fmt = dst.format().unwrap();
327        let dst_w = dst.width().unwrap();
328        let dst_h = dst.height().unwrap();
329        let mut dst_map = dst.map()?;
330        let dst_tup = (dst_map.as_mut_slice(), dst_w, dst_h);
331        Self::fill_outside_crop_dispatch(dst_tup, dst_fmt, rgba, crop)
332    }
333
334    /// Common fill dispatch by format.
335    fn fill_outside_crop_dispatch(
336        dst: (&mut [u8], usize, usize),
337        fmt: PixelFormat,
338        rgba: [u8; 4],
339        crop: Rect,
340    ) -> Result<()> {
341        use PixelFormat::*;
342        match fmt {
343            Rgba | Bgra => Self::fill_image_outside_crop_(dst, rgba, crop),
344            Rgb => Self::fill_image_outside_crop_(dst, Self::rgba_to_rgb(rgba), crop),
345            Grey => Self::fill_image_outside_crop_(dst, Self::rgba_to_grey(rgba), crop),
346            Yuyv => Self::fill_image_outside_crop_(
347                (dst.0, dst.1 / 2, dst.2),
348                Self::rgba_to_yuyv(rgba),
349                Rect::new(crop.left / 2, crop.top, crop.width.div_ceil(2), crop.height),
350            ),
351            PlanarRgb => Self::fill_image_outside_crop_planar(dst, Self::rgba_to_rgb(rgba), crop),
352            PlanarRgba => Self::fill_image_outside_crop_planar(dst, rgba, crop),
353            Nv16 => {
354                let yuyv = Self::rgba_to_yuyv(rgba);
355                Self::fill_image_outside_crop_yuv_semiplanar(dst, yuyv[0], [yuyv[1], yuyv[3]], crop)
356            }
357            _ => Err(Error::Internal(format!(
358                "Found unexpected destination {fmt}",
359            ))),
360        }
361    }
362}
363
364impl ImageProcessorTrait for CPUProcessor {
365    fn convert(
366        &mut self,
367        src: &TensorDyn,
368        dst: &mut TensorDyn,
369        rotation: Rotation,
370        flip: Flip,
371        crop: Crop,
372    ) -> Result<()> {
373        self.convert_impl(src, dst, rotation, flip, crop)
374    }
375
376    fn draw_decoded_masks(
377        &mut self,
378        dst: &mut TensorDyn,
379        detect: &[DetectBox],
380        segmentation: &[Segmentation],
381        overlay: crate::MaskOverlay<'_>,
382    ) -> Result<()> {
383        // CPU is the terminal fallback — it must always produce the full
384        // output, never assume the caller cleared dst. Every call writes
385        // the base layer first (bg copy or zero fill) and then the masks.
386        prepare_dst_base_cpu(dst, overlay.background)?;
387        let dst = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
388        self.draw_decoded_masks_impl(
389            dst,
390            detect,
391            segmentation,
392            overlay.opacity,
393            overlay.color_mode,
394        )
395    }
396
397    fn draw_proto_masks(
398        &mut self,
399        dst: &mut TensorDyn,
400        detect: &[DetectBox],
401        proto_data: &ProtoData,
402        overlay: crate::MaskOverlay<'_>,
403    ) -> Result<()> {
404        prepare_dst_base_cpu(dst, overlay.background)?;
405        let dst = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
406        self.draw_proto_masks_impl(
407            dst,
408            detect,
409            proto_data,
410            overlay.opacity,
411            overlay.letterbox,
412            overlay.color_mode,
413        )
414    }
415
416    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()> {
417        for (c, new_c) in self.colors.iter_mut().zip(colors.iter()) {
418            *c = *new_c;
419        }
420        Ok(())
421    }
422}
423
424// Internal methods — dtype-aware dispatch layer.
425impl CPUProcessor {
426    /// Top-level conversion dispatcher: handles dtype combinations.
427    pub(crate) fn convert_impl(
428        &mut self,
429        src: &TensorDyn,
430        dst: &mut TensorDyn,
431        rotation: Rotation,
432        flip: Flip,
433        crop: Crop,
434    ) -> Result<()> {
435        let src_fmt = src.format().ok_or(Error::NotAnImage)?;
436        let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
437
438        match (src.dtype(), dst.dtype()) {
439            (DType::U8, DType::U8) => {
440                let src = src.as_u8().unwrap();
441                let dst = dst.as_u8_mut().unwrap();
442                self.convert_u8(src, dst, src_fmt, dst_fmt, rotation, flip, crop)
443            }
444            (DType::U8, DType::I8) => {
445                // Int8 output: reinterpret the i8 destination as u8 (layout-
446                // identical), convert directly into it, then XOR 0x80 in-place.
447                let src_u8 = src.as_u8().unwrap();
448                let dst_i8 = dst.as_i8_mut().unwrap();
449                // SAFETY: Tensor<i8> and Tensor<u8> are layout-identical
450                // (same element size, no T-dependent drop glue). Same
451                // rationale as gl::processor::tensor_i8_as_u8_mut.
452                let dst_u8 = unsafe { &mut *(dst_i8 as *mut Tensor<i8> as *mut Tensor<u8>) };
453                self.convert_u8(src_u8, dst_u8, src_fmt, dst_fmt, rotation, flip, crop)?;
454                // Apply XOR 0x80 bias in-place (u8 → i8 conversion)
455                let mut map = dst_u8.map()?;
456                apply_int8_xor_bias(map.as_mut_slice(), dst_fmt);
457                Ok(())
458            }
459            (s, d) => Err(Error::NotSupported(format!("dtype {s} -> {d}",))),
460        }
461    }
462
463    /// U8-to-U8 conversion: the full format conversion + resize pipeline.
464    #[allow(clippy::too_many_arguments)]
465    fn convert_u8(
466        &mut self,
467        src: &Tensor<u8>,
468        dst: &mut Tensor<u8>,
469        src_fmt: PixelFormat,
470        dst_fmt: PixelFormat,
471        rotation: Rotation,
472        flip: Flip,
473        crop: Crop,
474    ) -> Result<()> {
475        use PixelFormat::*;
476
477        let src_w = src.width().unwrap();
478        let src_h = src.height().unwrap();
479        let dst_w = dst.width().unwrap();
480        let dst_h = dst.height().unwrap();
481
482        crop.check_crop_dims(src_w, src_h, dst_w, dst_h)?;
483
484        // Determine intermediate format for the resize step
485        let intermediate = match (src_fmt, dst_fmt) {
486            (Nv12, Rgb) => Rgb,
487            (Nv12, Rgba) => Rgba,
488            (Nv12, Grey) => Grey,
489            (Nv12, Yuyv) => Rgba,
490            (Nv12, Nv16) => Rgba,
491            (Nv12, PlanarRgb) => Rgb,
492            (Nv12, PlanarRgba) => Rgba,
493            (Yuyv, Rgb) => Rgb,
494            (Yuyv, Rgba) => Rgba,
495            (Yuyv, Grey) => Grey,
496            (Yuyv, Yuyv) => Rgba,
497            (Yuyv, PlanarRgb) => Rgb,
498            (Yuyv, PlanarRgba) => Rgba,
499            (Yuyv, Nv16) => Rgba,
500            (Vyuy, Rgb) => Rgb,
501            (Vyuy, Rgba) => Rgba,
502            (Vyuy, Grey) => Grey,
503            (Vyuy, Vyuy) => Rgba,
504            (Vyuy, PlanarRgb) => Rgb,
505            (Vyuy, PlanarRgba) => Rgba,
506            (Vyuy, Nv16) => Rgba,
507            (Rgba, Rgb) => Rgba,
508            (Rgba, Rgba) => Rgba,
509            (Rgba, Grey) => Grey,
510            (Rgba, Yuyv) => Rgba,
511            (Rgba, PlanarRgb) => Rgba,
512            (Rgba, PlanarRgba) => Rgba,
513            (Rgba, Nv16) => Rgba,
514            (Rgb, Rgb) => Rgb,
515            (Rgb, Rgba) => Rgb,
516            (Rgb, Grey) => Grey,
517            (Rgb, Yuyv) => Rgb,
518            (Rgb, PlanarRgb) => Rgb,
519            (Rgb, PlanarRgba) => Rgb,
520            (Rgb, Nv16) => Rgb,
521            (Grey, Rgb) => Rgb,
522            (Grey, Rgba) => Rgba,
523            (Grey, Grey) => Grey,
524            (Grey, Yuyv) => Grey,
525            (Grey, PlanarRgb) => Grey,
526            (Grey, PlanarRgba) => Grey,
527            (Grey, Nv16) => Grey,
528            (Nv12, Bgra) => Rgba,
529            (Yuyv, Bgra) => Rgba,
530            (Vyuy, Bgra) => Rgba,
531            (Rgba, Bgra) => Rgba,
532            (Rgb, Bgra) => Rgb,
533            (Grey, Bgra) => Grey,
534            (Bgra, Bgra) => Bgra,
535            (Nv16, Rgb) => Rgb,
536            (Nv16, Rgba) => Rgba,
537            (Nv16, Bgra) => Rgba,
538            (PlanarRgb, Rgb) => Rgb,
539            (PlanarRgb, Rgba) => Rgb,
540            (PlanarRgb, Bgra) => Rgb,
541            (PlanarRgba, Rgb) => Rgba,
542            (PlanarRgba, Rgba) => Rgba,
543            (PlanarRgba, Bgra) => Rgba,
544            (s, d) => {
545                return Err(Error::NotSupported(format!("Conversion from {s} to {d}",)));
546            }
547        };
548
549        let need_resize_flip_rotation = rotation != Rotation::None
550            || flip != Flip::None
551            || src_w != dst_w
552            || src_h != dst_h
553            || crop.src_rect.is_some_and(|c| {
554                c != Rect {
555                    left: 0,
556                    top: 0,
557                    width: src_w,
558                    height: src_h,
559                }
560            })
561            || crop.dst_rect.is_some_and(|c| {
562                c != Rect {
563                    left: 0,
564                    top: 0,
565                    width: dst_w,
566                    height: dst_h,
567                }
568            });
569
570        // check if a direct conversion can be done
571        if !need_resize_flip_rotation && Self::support_conversion_pf(src_fmt, dst_fmt) {
572            return Self::convert_format_pf(src, dst, src_fmt, dst_fmt);
573        }
574
575        // any extra checks
576        if dst_fmt == Yuyv && !dst_w.is_multiple_of(2) {
577            return Err(Error::NotSupported(format!(
578                "{} destination must have width divisible by 2",
579                dst_fmt,
580            )));
581        }
582
583        // create tmp buffer
584        let mut tmp_buffer;
585        let tmp;
586        let tmp_fmt;
587        if intermediate != src_fmt {
588            tmp_buffer = Tensor::<u8>::image(src_w, src_h, intermediate, Some(TensorMemory::Mem))?;
589
590            Self::convert_format_pf(src, &mut tmp_buffer, src_fmt, intermediate)?;
591            tmp = &tmp_buffer;
592            tmp_fmt = intermediate;
593        } else {
594            tmp = src;
595            tmp_fmt = src_fmt;
596        }
597
598        // format must be RGB/RGBA/GREY
599        debug_assert!(matches!(tmp_fmt, Rgb | Rgba | Grey));
600        if tmp_fmt == dst_fmt {
601            self.resize_flip_rotate_pf(tmp, dst, dst_fmt, rotation, flip, crop)?;
602        } else if !need_resize_flip_rotation {
603            Self::convert_format_pf(tmp, dst, tmp_fmt, dst_fmt)?;
604        } else {
605            let mut tmp2 = Tensor::<u8>::image(dst_w, dst_h, tmp_fmt, Some(TensorMemory::Mem))?;
606            if crop.dst_rect.is_some_and(|c| {
607                c != Rect {
608                    left: 0,
609                    top: 0,
610                    width: dst_w,
611                    height: dst_h,
612                }
613            }) && crop.dst_color.is_none()
614            {
615                Self::convert_format_pf(dst, &mut tmp2, dst_fmt, tmp_fmt)?;
616            }
617            self.resize_flip_rotate_pf(tmp, &mut tmp2, tmp_fmt, rotation, flip, crop)?;
618            Self::convert_format_pf(&tmp2, dst, tmp_fmt, dst_fmt)?;
619        }
620        if let (Some(dst_rect), Some(dst_color)) = (crop.dst_rect, crop.dst_color) {
621            let full_rect = Rect {
622                left: 0,
623                top: 0,
624                width: dst_w,
625                height: dst_h,
626            };
627            if dst_rect != full_rect {
628                Self::fill_image_outside_crop_u8(dst, dst_color, dst_rect)?;
629            }
630        }
631
632        Ok(())
633    }
634
635    fn draw_decoded_masks_impl(
636        &mut self,
637        dst: &mut Tensor<u8>,
638        detect: &[DetectBox],
639        segmentation: &[Segmentation],
640        opacity: f32,
641        color_mode: crate::ColorMode,
642    ) -> Result<()> {
643        let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
644        if !matches!(dst_fmt, PixelFormat::Rgba | PixelFormat::Rgb) {
645            return Err(crate::Error::NotSupported(
646                "CPU image rendering only supports RGBA or RGB images".to_string(),
647            ));
648        }
649
650        let _timer = FunctionTimer::new("CPUProcessor::draw_decoded_masks");
651
652        let dst_w = dst.width().unwrap();
653        let dst_h = dst.height().unwrap();
654        let dst_rs = row_stride_for(dst_w, dst_fmt);
655        let dst_c = dst_fmt.channels();
656
657        let mut map = dst.map()?;
658        let dst_slice = map.as_mut_slice();
659
660        self.render_box(dst_w, dst_h, dst_rs, dst_c, dst_slice, detect, color_mode)?;
661
662        if segmentation.is_empty() {
663            return Ok(());
664        }
665
666        // Semantic segmentation (e.g. ModelPack) has C > 1 (multi-class),
667        // instance segmentation (e.g. YOLO) has C = 1 (binary per-instance).
668        let is_semantic = segmentation[0].segmentation.shape()[2] > 1;
669
670        if is_semantic {
671            self.render_modelpack_segmentation(
672                dst_w,
673                dst_h,
674                dst_rs,
675                dst_c,
676                dst_slice,
677                &segmentation[0],
678                opacity,
679            )?;
680        } else {
681            for (idx, (seg, det)) in segmentation.iter().zip(detect).enumerate() {
682                let color_index = color_mode.index(idx, det.label);
683                self.render_yolo_segmentation(
684                    dst_w,
685                    dst_h,
686                    dst_rs,
687                    dst_c,
688                    dst_slice,
689                    seg,
690                    color_index,
691                    opacity,
692                )?;
693            }
694        }
695
696        Ok(())
697    }
698
699    fn draw_proto_masks_impl(
700        &mut self,
701        dst: &mut Tensor<u8>,
702        detect: &[DetectBox],
703        proto_data: &ProtoData,
704        opacity: f32,
705        letterbox: Option<[f32; 4]>,
706        color_mode: crate::ColorMode,
707    ) -> Result<()> {
708        let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
709        if !matches!(dst_fmt, PixelFormat::Rgba | PixelFormat::Rgb) {
710            return Err(crate::Error::NotSupported(
711                "CPU image rendering only supports RGBA or RGB images".to_string(),
712            ));
713        }
714
715        let _timer = FunctionTimer::new("CPUProcessor::draw_proto_masks");
716
717        let dst_w = dst.width().unwrap();
718        let dst_h = dst.height().unwrap();
719        let dst_rs = row_stride_for(dst_w, dst_fmt);
720        let channels = dst_fmt.channels();
721
722        let mut map = dst.map()?;
723        let dst_slice = map.as_mut_slice();
724
725        self.render_box(
726            dst_w, dst_h, dst_rs, channels, dst_slice, detect, color_mode,
727        )?;
728
729        if detect.is_empty() || proto_data.mask_coefficients.is_empty() {
730            return Ok(());
731        }
732
733        let protos_cow = proto_data.protos.as_f32();
734        let protos = protos_cow.as_ref();
735        let proto_h = protos.shape()[0];
736        let proto_w = protos.shape()[1];
737        let num_protos = protos.shape()[2];
738
739        // Precompute letterbox scale/offset for output-pixel → proto-pixel mapping.
740        // Without letterbox: proto_x = (x / dst_w) * proto_w
741        // With letterbox [lx0,ly0,lx1,ly1]: proto_x = (lx0 + (x/dst_w)*(lx1-lx0)) * proto_w
742        let (lx0, lx_range, ly0, ly_range) = match letterbox {
743            Some([lx0, ly0, lx1, ly1]) => (lx0, lx1 - lx0, ly0, ly1 - ly0),
744            None => (0.0_f32, 1.0_f32, 0.0_f32, 1.0_f32),
745        };
746
747        for (idx, (det, coeff)) in detect
748            .iter()
749            .zip(proto_data.mask_coefficients.iter())
750            .enumerate()
751        {
752            let color_index = color_mode.index(idx, det.label);
753            let color = self.colors[color_index % self.colors.len()];
754            let alpha = if opacity == 1.0 {
755                color[3] as u16
756            } else {
757                (color[3] as f32 * opacity).round() as u16
758            };
759
760            // `detect` has already been un-letterboxed by the caller (lib.rs),
761            // so bbox coords are in output-image-normalized space.
762            let start_x = (dst_w as f32 * det.bbox.xmin).round() as usize;
763            let start_y = (dst_h as f32 * det.bbox.ymin).round() as usize;
764            let end_x = ((dst_w as f32 * det.bbox.xmax).round() as usize).min(dst_w);
765            let end_y = ((dst_h as f32 * det.bbox.ymax).round() as usize).min(dst_h);
766
767            for y in start_y..end_y {
768                for x in start_x..end_x {
769                    // Map output pixel (x, y) → model-input-normalized → proto pixel.
770                    // When a letterbox was applied, output pixels map to a sub-region
771                    // of the model input; lx0/lx_range re-introduce that mapping.
772                    let px = (lx0 + (x as f32 / dst_w as f32) * lx_range) * proto_w as f32 - 0.5;
773                    let py = (ly0 + (y as f32 / dst_h as f32) * ly_range) * proto_h as f32 - 0.5;
774
775                    // Bilinear interpolation + dot product
776                    let acc = bilinear_dot(protos, coeff, num_protos, px, py, proto_w, proto_h);
777
778                    // Sigmoid threshold
779                    let mask = 1.0 / (1.0 + (-acc).exp());
780                    if mask < 0.5 {
781                        continue;
782                    }
783
784                    // Alpha blend
785                    let dst_index = y * dst_rs + x * channels;
786                    for c in 0..3 {
787                        dst_slice[dst_index + c] = ((color[c] as u16 * alpha
788                            + dst_slice[dst_index + c] as u16 * (255 - alpha))
789                            / 255) as u8;
790                    }
791                }
792            }
793        }
794
795        Ok(())
796    }
797}