Skip to main content

edgefirst_image/cpu/
mod.rs

1// SPDX-FileCopyrightText: Copyright 2025 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::{Crop, Error, Flip, FunctionTimer, ImageProcessorTrait, Rect, Result, Rotation};
5use edgefirst_decoder::{DetectBox, ProtoData, Segmentation};
6use edgefirst_tensor::{
7    DType, PixelFormat, Tensor, TensorDyn, TensorMapTrait, TensorMemory, TensorTrait,
8};
9
10mod convert;
11mod masks;
12mod resize;
13mod tests;
14
15use masks::bilinear_dot;
16
17/// CPUConverter implements the ImageProcessor trait using the fallback CPU
18/// implementation for image processing.
19#[derive(Debug, Clone)]
20pub struct CPUProcessor {
21    resizer: fast_image_resize::Resizer,
22    options: fast_image_resize::ResizeOptions,
23    colors: [[u8; 4]; 20],
24}
25
26unsafe impl Send for CPUProcessor {}
27unsafe impl Sync for CPUProcessor {}
28
29impl Default for CPUProcessor {
30    fn default() -> Self {
31        Self::new_bilinear()
32    }
33}
34
35/// Compute row stride for a packed-format Tensor<u8> image given its format.
36fn row_stride_for(width: usize, fmt: PixelFormat) -> usize {
37    use edgefirst_tensor::PixelLayout;
38    match fmt.layout() {
39        PixelLayout::Packed => width * fmt.channels(),
40        PixelLayout::Planar | PixelLayout::SemiPlanar => width,
41        _ => width, // fallback for non-exhaustive
42    }
43}
44
45/// Apply XOR 0x80 bias to color channels only, preserving alpha.
46///
47/// Matches GL int8 shader behavior: `vec4(int8_bias(c.rgb), c.a)`.
48/// For formats without alpha, XORs every byte (fast path).
49pub(crate) fn apply_int8_xor_bias(data: &mut [u8], fmt: PixelFormat) {
50    use edgefirst_tensor::PixelLayout;
51    if !fmt.has_alpha() {
52        for b in data.iter_mut() {
53            *b ^= 0x80;
54        }
55    } else if fmt.layout() == PixelLayout::Planar {
56        // Planar with alpha (e.g. PlanarRgba): XOR color planes, skip alpha plane.
57        let channels = fmt.channels();
58        let plane_size = data.len() / channels;
59        for b in data[..plane_size * (channels - 1)].iter_mut() {
60            *b ^= 0x80;
61        }
62    } else {
63        // Packed with alpha (Rgba, Bgra): XOR color bytes, skip alpha byte.
64        let channels = fmt.channels();
65        for pixel in data.chunks_exact_mut(channels) {
66            for b in &mut pixel[..channels - 1] {
67                *b ^= 0x80;
68            }
69        }
70    }
71}
72
73impl CPUProcessor {
74    /// Creates a new CPUConverter with bilinear resizing.
75    pub fn new() -> Self {
76        Self::new_bilinear()
77    }
78
79    /// Creates a new CPUConverter with bilinear resizing.
80    fn new_bilinear() -> Self {
81        let resizer = fast_image_resize::Resizer::new();
82        let options = fast_image_resize::ResizeOptions::new()
83            .resize_alg(fast_image_resize::ResizeAlg::Convolution(
84                fast_image_resize::FilterType::Bilinear,
85            ))
86            .use_alpha(false);
87
88        log::debug!("CPUConverter created");
89        Self {
90            resizer,
91            options,
92            colors: crate::DEFAULT_COLORS_U8,
93        }
94    }
95
96    /// Creates a new CPUConverter with nearest neighbor resizing.
97    pub fn new_nearest() -> Self {
98        let resizer = fast_image_resize::Resizer::new();
99        let options = fast_image_resize::ResizeOptions::new()
100            .resize_alg(fast_image_resize::ResizeAlg::Nearest)
101            .use_alpha(false);
102        log::debug!("CPUConverter created");
103        Self {
104            resizer,
105            options,
106            colors: crate::DEFAULT_COLORS_U8,
107        }
108    }
109
110    pub(crate) fn support_conversion_pf(src: PixelFormat, dst: PixelFormat) -> bool {
111        use PixelFormat::*;
112        matches!(
113            (src, dst),
114            (Nv12, Rgb)
115                | (Nv12, Rgba)
116                | (Nv12, Grey)
117                | (Nv16, Rgb)
118                | (Nv16, Rgba)
119                | (Nv16, Bgra)
120                | (Yuyv, Rgb)
121                | (Yuyv, Rgba)
122                | (Yuyv, Grey)
123                | (Yuyv, Yuyv)
124                | (Yuyv, PlanarRgb)
125                | (Yuyv, PlanarRgba)
126                | (Yuyv, Nv16)
127                | (Vyuy, Rgb)
128                | (Vyuy, Rgba)
129                | (Vyuy, Grey)
130                | (Vyuy, Vyuy)
131                | (Vyuy, PlanarRgb)
132                | (Vyuy, PlanarRgba)
133                | (Vyuy, Nv16)
134                | (Rgba, Rgb)
135                | (Rgba, Rgba)
136                | (Rgba, Grey)
137                | (Rgba, Yuyv)
138                | (Rgba, PlanarRgb)
139                | (Rgba, PlanarRgba)
140                | (Rgba, Nv16)
141                | (Rgb, Rgb)
142                | (Rgb, Rgba)
143                | (Rgb, Grey)
144                | (Rgb, Yuyv)
145                | (Rgb, PlanarRgb)
146                | (Rgb, PlanarRgba)
147                | (Rgb, Nv16)
148                | (Grey, Rgb)
149                | (Grey, Rgba)
150                | (Grey, Grey)
151                | (Grey, Yuyv)
152                | (Grey, PlanarRgb)
153                | (Grey, PlanarRgba)
154                | (Grey, Nv16)
155                | (Nv12, Bgra)
156                | (Yuyv, Bgra)
157                | (Vyuy, Bgra)
158                | (Rgba, Bgra)
159                | (Rgb, Bgra)
160                | (Grey, Bgra)
161                | (Bgra, Bgra)
162                | (PlanarRgb, Rgb)
163                | (PlanarRgb, Rgba)
164                | (PlanarRgba, Rgb)
165                | (PlanarRgba, Rgba)
166                | (PlanarRgb, Bgra)
167                | (PlanarRgba, Bgra)
168        )
169    }
170
171    /// Format conversion dispatch for Tensor<u8> with PixelFormat metadata.
172    pub(crate) fn convert_format_pf(
173        src: &Tensor<u8>,
174        dst: &mut Tensor<u8>,
175        src_fmt: PixelFormat,
176        dst_fmt: PixelFormat,
177    ) -> Result<()> {
178        let _timer = FunctionTimer::new(format!(
179            "ImageProcessor::convert_format {} to {}",
180            src_fmt, dst_fmt,
181        ));
182
183        use PixelFormat::*;
184        match (src_fmt, dst_fmt) {
185            (Nv12, Rgb) => Self::convert_nv12_to_rgb(src, dst),
186            (Nv12, Rgba) => Self::convert_nv12_to_rgba(src, dst),
187            (Nv12, Grey) => Self::convert_nv12_to_grey(src, dst),
188            (Yuyv, Rgb) => Self::convert_yuyv_to_rgb(src, dst),
189            (Yuyv, Rgba) => Self::convert_yuyv_to_rgba(src, dst),
190            (Yuyv, Grey) => Self::convert_yuyv_to_grey(src, dst),
191            (Yuyv, Yuyv) => Self::copy_image(src, dst),
192            (Yuyv, PlanarRgb) => Self::convert_yuyv_to_8bps(src, dst),
193            (Yuyv, PlanarRgba) => Self::convert_yuyv_to_prgba(src, dst),
194            (Yuyv, Nv16) => Self::convert_yuyv_to_nv16(src, dst),
195            (Vyuy, Rgb) => Self::convert_vyuy_to_rgb(src, dst),
196            (Vyuy, Rgba) => Self::convert_vyuy_to_rgba(src, dst),
197            (Vyuy, Grey) => Self::convert_vyuy_to_grey(src, dst),
198            (Vyuy, Vyuy) => Self::copy_image(src, dst),
199            (Vyuy, PlanarRgb) => Self::convert_vyuy_to_8bps(src, dst),
200            (Vyuy, PlanarRgba) => Self::convert_vyuy_to_prgba(src, dst),
201            (Vyuy, Nv16) => Self::convert_vyuy_to_nv16(src, dst),
202            (Rgba, Rgb) => Self::convert_rgba_to_rgb(src, dst),
203            (Rgba, Rgba) => Self::copy_image(src, dst),
204            (Rgba, Grey) => Self::convert_rgba_to_grey(src, dst),
205            (Rgba, Yuyv) => Self::convert_rgba_to_yuyv(src, dst),
206            (Rgba, PlanarRgb) => Self::convert_rgba_to_8bps(src, dst),
207            (Rgba, PlanarRgba) => Self::convert_rgba_to_prgba(src, dst),
208            (Rgba, Nv16) => Self::convert_rgba_to_nv16(src, dst),
209            (Rgb, Rgb) => Self::copy_image(src, dst),
210            (Rgb, Rgba) => Self::convert_rgb_to_rgba(src, dst),
211            (Rgb, Grey) => Self::convert_rgb_to_grey(src, dst),
212            (Rgb, Yuyv) => Self::convert_rgb_to_yuyv(src, dst),
213            (Rgb, PlanarRgb) => Self::convert_rgb_to_8bps(src, dst),
214            (Rgb, PlanarRgba) => Self::convert_rgb_to_prgba(src, dst),
215            (Rgb, Nv16) => Self::convert_rgb_to_nv16(src, dst),
216            (Grey, Rgb) => Self::convert_grey_to_rgb(src, dst),
217            (Grey, Rgba) => Self::convert_grey_to_rgba(src, dst),
218            (Grey, Grey) => Self::copy_image(src, dst),
219            (Grey, Yuyv) => Self::convert_grey_to_yuyv(src, dst),
220            (Grey, PlanarRgb) => Self::convert_grey_to_8bps(src, dst),
221            (Grey, PlanarRgba) => Self::convert_grey_to_prgba(src, dst),
222            (Grey, Nv16) => Self::convert_grey_to_nv16(src, dst),
223
224            // the following converts are added for use in testing
225            (Nv16, Rgb) => Self::convert_nv16_to_rgb(src, dst),
226            (Nv16, Rgba) => Self::convert_nv16_to_rgba(src, dst),
227            (PlanarRgb, Rgb) => Self::convert_8bps_to_rgb(src, dst),
228            (PlanarRgb, Rgba) => Self::convert_8bps_to_rgba(src, dst),
229            (PlanarRgba, Rgb) => Self::convert_prgba_to_rgb(src, dst),
230            (PlanarRgba, Rgba) => Self::convert_prgba_to_rgba(src, dst),
231
232            // BGRA destination: convert to RGBA layout, then swap R and B
233            (Bgra, Bgra) => Self::copy_image(src, dst),
234            (Nv12, Bgra) => {
235                Self::convert_nv12_to_rgba(src, dst)?;
236                Self::swizzle_rb_4chan(dst)
237            }
238            (Nv16, Bgra) => {
239                Self::convert_nv16_to_rgba(src, dst)?;
240                Self::swizzle_rb_4chan(dst)
241            }
242            (Yuyv, Bgra) => {
243                Self::convert_yuyv_to_rgba(src, dst)?;
244                Self::swizzle_rb_4chan(dst)
245            }
246            (Vyuy, Bgra) => {
247                Self::convert_vyuy_to_rgba(src, dst)?;
248                Self::swizzle_rb_4chan(dst)
249            }
250            (Rgba, Bgra) => {
251                dst.map()?.copy_from_slice(&src.map()?);
252                Self::swizzle_rb_4chan(dst)
253            }
254            (Rgb, Bgra) => {
255                Self::convert_rgb_to_rgba(src, dst)?;
256                Self::swizzle_rb_4chan(dst)
257            }
258            (Grey, Bgra) => {
259                Self::convert_grey_to_rgba(src, dst)?;
260                Self::swizzle_rb_4chan(dst)
261            }
262            (PlanarRgb, Bgra) => {
263                Self::convert_8bps_to_rgba(src, dst)?;
264                Self::swizzle_rb_4chan(dst)
265            }
266            (PlanarRgba, Bgra) => {
267                Self::convert_prgba_to_rgba(src, dst)?;
268                Self::swizzle_rb_4chan(dst)
269            }
270
271            (s, d) => Err(Error::NotSupported(format!("Conversion from {s} to {d}",))),
272        }
273    }
274
275    /// Tensor<u8>-based fill_image_outside_crop.
276    pub(crate) fn fill_image_outside_crop_u8(
277        dst: &mut Tensor<u8>,
278        rgba: [u8; 4],
279        crop: Rect,
280    ) -> Result<()> {
281        let dst_fmt = dst.format().unwrap();
282        let dst_w = dst.width().unwrap();
283        let dst_h = dst.height().unwrap();
284        let mut dst_map = dst.map()?;
285        let dst_tup = (dst_map.as_mut_slice(), dst_w, dst_h);
286        Self::fill_outside_crop_dispatch(dst_tup, dst_fmt, rgba, crop)
287    }
288
289    /// Common fill dispatch by format.
290    fn fill_outside_crop_dispatch(
291        dst: (&mut [u8], usize, usize),
292        fmt: PixelFormat,
293        rgba: [u8; 4],
294        crop: Rect,
295    ) -> Result<()> {
296        use PixelFormat::*;
297        match fmt {
298            Rgba | Bgra => Self::fill_image_outside_crop_(dst, rgba, crop),
299            Rgb => Self::fill_image_outside_crop_(dst, Self::rgba_to_rgb(rgba), crop),
300            Grey => Self::fill_image_outside_crop_(dst, Self::rgba_to_grey(rgba), crop),
301            Yuyv => Self::fill_image_outside_crop_(
302                (dst.0, dst.1 / 2, dst.2),
303                Self::rgba_to_yuyv(rgba),
304                Rect::new(crop.left / 2, crop.top, crop.width.div_ceil(2), crop.height),
305            ),
306            PlanarRgb => Self::fill_image_outside_crop_planar(dst, Self::rgba_to_rgb(rgba), crop),
307            PlanarRgba => Self::fill_image_outside_crop_planar(dst, rgba, crop),
308            Nv16 => {
309                let yuyv = Self::rgba_to_yuyv(rgba);
310                Self::fill_image_outside_crop_yuv_semiplanar(dst, yuyv[0], [yuyv[1], yuyv[3]], crop)
311            }
312            _ => Err(Error::Internal(format!(
313                "Found unexpected destination {fmt}",
314            ))),
315        }
316    }
317}
318
319impl ImageProcessorTrait for CPUProcessor {
320    fn convert(
321        &mut self,
322        src: &TensorDyn,
323        dst: &mut TensorDyn,
324        rotation: Rotation,
325        flip: Flip,
326        crop: Crop,
327    ) -> Result<()> {
328        self.convert_impl(src, dst, rotation, flip, crop)
329    }
330
331    fn draw_masks(
332        &mut self,
333        dst: &mut TensorDyn,
334        detect: &[DetectBox],
335        segmentation: &[Segmentation],
336    ) -> Result<()> {
337        let dst = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
338        self.draw_masks_impl(dst, detect, segmentation)
339    }
340
341    fn draw_masks_proto(
342        &mut self,
343        dst: &mut TensorDyn,
344        detect: &[DetectBox],
345        proto_data: &ProtoData,
346    ) -> Result<()> {
347        let dst = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
348        self.draw_masks_proto_impl(dst, detect, proto_data)
349    }
350
351    fn decode_masks_atlas(
352        &mut self,
353        detect: &[crate::DetectBox],
354        proto_data: crate::ProtoData,
355        output_width: usize,
356        output_height: usize,
357    ) -> Result<(Vec<u8>, Vec<crate::MaskRegion>)> {
358        use crate::FunctionTimer;
359
360        let _timer = FunctionTimer::new("CPUProcessor::decode_masks_atlas");
361
362        let padding = 4usize;
363
364        // Render per-detection masks via existing path
365        let mask_results =
366            self.render_masks_from_protos(detect, proto_data, output_width, output_height)?;
367
368        // Pack into compact atlas: each strip is padded bbox height
369        let ow = output_width as i32;
370        let oh = output_height as i32;
371        let pad = padding as i32;
372
373        let mut regions = Vec::with_capacity(mask_results.len());
374        let mut atlas_y = 0usize;
375
376        // Pre-compute regions
377        for mr in &mask_results {
378            let bx = mr.x as i32;
379            let by = mr.y as i32;
380            let bw = mr.w as i32;
381            let bh = mr.h as i32;
382            let padded_x = (bx - pad).max(0);
383            let padded_y = (by - pad).max(0);
384            let padded_w = ((bx + bw + pad).min(ow) - padded_x).max(1);
385            let padded_h = ((by + bh + pad).min(oh) - padded_y).max(1);
386            regions.push(crate::MaskRegion {
387                atlas_y_offset: atlas_y,
388                padded_x: padded_x as usize,
389                padded_y: padded_y as usize,
390                padded_w: padded_w as usize,
391                padded_h: padded_h as usize,
392                bbox_x: mr.x,
393                bbox_y: mr.y,
394                bbox_w: mr.w,
395                bbox_h: mr.h,
396            });
397            atlas_y += padded_h as usize;
398        }
399
400        let atlas_height = atlas_y;
401        let mut atlas = vec![0u8; output_width * atlas_height];
402
403        for (mr, region) in mask_results.iter().zip(regions.iter()) {
404            // Copy mask pixels into the atlas at the correct position
405            for row in 0..mr.h {
406                let dst_row = region.atlas_y_offset + (mr.y - region.padded_y) + row;
407                let dst_start = dst_row * output_width + mr.x;
408                let src_start = row * mr.w;
409                if dst_start + mr.w <= atlas.len() && src_start + mr.w <= mr.pixels.len() {
410                    atlas[dst_start..dst_start + mr.w]
411                        .copy_from_slice(&mr.pixels[src_start..src_start + mr.w]);
412                }
413            }
414        }
415
416        Ok((atlas, regions))
417    }
418
419    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()> {
420        for (c, new_c) in self.colors.iter_mut().zip(colors.iter()) {
421            *c = *new_c;
422        }
423        Ok(())
424    }
425}
426
427// Internal methods — dtype-aware dispatch layer.
428impl CPUProcessor {
429    /// Top-level conversion dispatcher: handles dtype combinations.
430    pub(crate) fn convert_impl(
431        &mut self,
432        src: &TensorDyn,
433        dst: &mut TensorDyn,
434        rotation: Rotation,
435        flip: Flip,
436        crop: Crop,
437    ) -> Result<()> {
438        let src_fmt = src.format().ok_or(Error::NotAnImage)?;
439        let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
440
441        match (src.dtype(), dst.dtype()) {
442            (DType::U8, DType::U8) => {
443                let src = src.as_u8().unwrap();
444                let dst = dst.as_u8_mut().unwrap();
445                self.convert_u8(src, dst, src_fmt, dst_fmt, rotation, flip, crop)
446            }
447            (DType::U8, DType::I8) => {
448                // Int8 output: reinterpret the i8 destination as u8 (layout-
449                // identical), convert directly into it, then XOR 0x80 in-place.
450                let src_u8 = src.as_u8().unwrap();
451                let dst_i8 = dst.as_i8_mut().unwrap();
452                // SAFETY: Tensor<i8> and Tensor<u8> are layout-identical
453                // (same element size, no T-dependent drop glue). Same
454                // rationale as gl::processor::tensor_i8_as_u8_mut.
455                let dst_u8 = unsafe { &mut *(dst_i8 as *mut Tensor<i8> as *mut Tensor<u8>) };
456                self.convert_u8(src_u8, dst_u8, src_fmt, dst_fmt, rotation, flip, crop)?;
457                // Apply XOR 0x80 bias in-place (u8 → i8 conversion)
458                let mut map = dst_u8.map()?;
459                apply_int8_xor_bias(map.as_mut_slice(), dst_fmt);
460                Ok(())
461            }
462            (s, d) => Err(Error::NotSupported(format!("dtype {s} -> {d}",))),
463        }
464    }
465
466    /// U8-to-U8 conversion: the full format conversion + resize pipeline.
467    #[allow(clippy::too_many_arguments)]
468    fn convert_u8(
469        &mut self,
470        src: &Tensor<u8>,
471        dst: &mut Tensor<u8>,
472        src_fmt: PixelFormat,
473        dst_fmt: PixelFormat,
474        rotation: Rotation,
475        flip: Flip,
476        crop: Crop,
477    ) -> Result<()> {
478        use PixelFormat::*;
479
480        let src_w = src.width().unwrap();
481        let src_h = src.height().unwrap();
482        let dst_w = dst.width().unwrap();
483        let dst_h = dst.height().unwrap();
484
485        crop.check_crop_dims(src_w, src_h, dst_w, dst_h)?;
486
487        // Determine intermediate format for the resize step
488        let intermediate = match (src_fmt, dst_fmt) {
489            (Nv12, Rgb) => Rgb,
490            (Nv12, Rgba) => Rgba,
491            (Nv12, Grey) => Grey,
492            (Nv12, Yuyv) => Rgba,
493            (Nv12, Nv16) => Rgba,
494            (Nv12, PlanarRgb) => Rgb,
495            (Nv12, PlanarRgba) => Rgba,
496            (Yuyv, Rgb) => Rgb,
497            (Yuyv, Rgba) => Rgba,
498            (Yuyv, Grey) => Grey,
499            (Yuyv, Yuyv) => Rgba,
500            (Yuyv, PlanarRgb) => Rgb,
501            (Yuyv, PlanarRgba) => Rgba,
502            (Yuyv, Nv16) => Rgba,
503            (Vyuy, Rgb) => Rgb,
504            (Vyuy, Rgba) => Rgba,
505            (Vyuy, Grey) => Grey,
506            (Vyuy, Vyuy) => Rgba,
507            (Vyuy, PlanarRgb) => Rgb,
508            (Vyuy, PlanarRgba) => Rgba,
509            (Vyuy, Nv16) => Rgba,
510            (Rgba, Rgb) => Rgba,
511            (Rgba, Rgba) => Rgba,
512            (Rgba, Grey) => Grey,
513            (Rgba, Yuyv) => Rgba,
514            (Rgba, PlanarRgb) => Rgba,
515            (Rgba, PlanarRgba) => Rgba,
516            (Rgba, Nv16) => Rgba,
517            (Rgb, Rgb) => Rgb,
518            (Rgb, Rgba) => Rgb,
519            (Rgb, Grey) => Grey,
520            (Rgb, Yuyv) => Rgb,
521            (Rgb, PlanarRgb) => Rgb,
522            (Rgb, PlanarRgba) => Rgb,
523            (Rgb, Nv16) => Rgb,
524            (Grey, Rgb) => Rgb,
525            (Grey, Rgba) => Rgba,
526            (Grey, Grey) => Grey,
527            (Grey, Yuyv) => Grey,
528            (Grey, PlanarRgb) => Grey,
529            (Grey, PlanarRgba) => Grey,
530            (Grey, Nv16) => Grey,
531            (Nv12, Bgra) => Rgba,
532            (Yuyv, Bgra) => Rgba,
533            (Vyuy, Bgra) => Rgba,
534            (Rgba, Bgra) => Rgba,
535            (Rgb, Bgra) => Rgb,
536            (Grey, Bgra) => Grey,
537            (Bgra, Bgra) => Bgra,
538            (Nv16, Rgb) => Rgb,
539            (Nv16, Rgba) => Rgba,
540            (Nv16, Bgra) => Rgba,
541            (PlanarRgb, Rgb) => Rgb,
542            (PlanarRgb, Rgba) => Rgb,
543            (PlanarRgb, Bgra) => Rgb,
544            (PlanarRgba, Rgb) => Rgba,
545            (PlanarRgba, Rgba) => Rgba,
546            (PlanarRgba, Bgra) => Rgba,
547            (s, d) => {
548                return Err(Error::NotSupported(format!("Conversion from {s} to {d}",)));
549            }
550        };
551
552        let need_resize_flip_rotation = rotation != Rotation::None
553            || flip != Flip::None
554            || src_w != dst_w
555            || src_h != dst_h
556            || crop.src_rect.is_some_and(|c| {
557                c != Rect {
558                    left: 0,
559                    top: 0,
560                    width: src_w,
561                    height: src_h,
562                }
563            })
564            || crop.dst_rect.is_some_and(|c| {
565                c != Rect {
566                    left: 0,
567                    top: 0,
568                    width: dst_w,
569                    height: dst_h,
570                }
571            });
572
573        // check if a direct conversion can be done
574        if !need_resize_flip_rotation && Self::support_conversion_pf(src_fmt, dst_fmt) {
575            return Self::convert_format_pf(src, dst, src_fmt, dst_fmt);
576        }
577
578        // any extra checks
579        if dst_fmt == Yuyv && !dst_w.is_multiple_of(2) {
580            return Err(Error::NotSupported(format!(
581                "{} destination must have width divisible by 2",
582                dst_fmt,
583            )));
584        }
585
586        // create tmp buffer
587        let mut tmp_buffer;
588        let tmp;
589        let tmp_fmt;
590        if intermediate != src_fmt {
591            tmp_buffer = Tensor::<u8>::image(src_w, src_h, intermediate, Some(TensorMemory::Mem))?;
592
593            Self::convert_format_pf(src, &mut tmp_buffer, src_fmt, intermediate)?;
594            tmp = &tmp_buffer;
595            tmp_fmt = intermediate;
596        } else {
597            tmp = src;
598            tmp_fmt = src_fmt;
599        }
600
601        // format must be RGB/RGBA/GREY
602        debug_assert!(matches!(tmp_fmt, Rgb | Rgba | Grey));
603        if tmp_fmt == dst_fmt {
604            self.resize_flip_rotate_pf(tmp, dst, dst_fmt, rotation, flip, crop)?;
605        } else if !need_resize_flip_rotation {
606            Self::convert_format_pf(tmp, dst, tmp_fmt, dst_fmt)?;
607        } else {
608            let mut tmp2 = Tensor::<u8>::image(dst_w, dst_h, tmp_fmt, Some(TensorMemory::Mem))?;
609            if crop.dst_rect.is_some_and(|c| {
610                c != Rect {
611                    left: 0,
612                    top: 0,
613                    width: dst_w,
614                    height: dst_h,
615                }
616            }) && crop.dst_color.is_none()
617            {
618                Self::convert_format_pf(dst, &mut tmp2, dst_fmt, tmp_fmt)?;
619            }
620            self.resize_flip_rotate_pf(tmp, &mut tmp2, tmp_fmt, rotation, flip, crop)?;
621            Self::convert_format_pf(&tmp2, dst, tmp_fmt, dst_fmt)?;
622        }
623        if let (Some(dst_rect), Some(dst_color)) = (crop.dst_rect, crop.dst_color) {
624            let full_rect = Rect {
625                left: 0,
626                top: 0,
627                width: dst_w,
628                height: dst_h,
629            };
630            if dst_rect != full_rect {
631                Self::fill_image_outside_crop_u8(dst, dst_color, dst_rect)?;
632            }
633        }
634
635        Ok(())
636    }
637
638    fn draw_masks_impl(
639        &mut self,
640        dst: &mut Tensor<u8>,
641        detect: &[DetectBox],
642        segmentation: &[Segmentation],
643    ) -> Result<()> {
644        let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
645        if !matches!(dst_fmt, PixelFormat::Rgba | PixelFormat::Rgb) {
646            return Err(crate::Error::NotSupported(
647                "CPU image rendering only supports RGBA or RGB images".to_string(),
648            ));
649        }
650
651        let _timer = FunctionTimer::new("CPUProcessor::draw_masks");
652
653        let dst_w = dst.width().unwrap();
654        let dst_h = dst.height().unwrap();
655        let dst_rs = row_stride_for(dst_w, dst_fmt);
656        let dst_c = dst_fmt.channels();
657
658        let mut map = dst.map()?;
659        let dst_slice = map.as_mut_slice();
660
661        self.render_box(dst_w, dst_h, dst_rs, dst_c, dst_slice, detect)?;
662
663        if segmentation.is_empty() {
664            return Ok(());
665        }
666
667        // Semantic segmentation (e.g. ModelPack) has C > 1 (multi-class),
668        // instance segmentation (e.g. YOLO) has C = 1 (binary per-instance).
669        let is_semantic = segmentation[0].segmentation.shape()[2] > 1;
670
671        if is_semantic {
672            self.render_modelpack_segmentation(
673                dst_w,
674                dst_h,
675                dst_rs,
676                dst_c,
677                dst_slice,
678                &segmentation[0],
679            )?;
680        } else {
681            for (seg, detect) in segmentation.iter().zip(detect) {
682                self.render_yolo_segmentation(
683                    dst_w,
684                    dst_h,
685                    dst_rs,
686                    dst_c,
687                    dst_slice,
688                    seg,
689                    detect.label,
690                )?;
691            }
692        }
693
694        Ok(())
695    }
696
697    fn draw_masks_proto_impl(
698        &mut self,
699        dst: &mut Tensor<u8>,
700        detect: &[DetectBox],
701        proto_data: &ProtoData,
702    ) -> Result<()> {
703        let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
704        if !matches!(dst_fmt, PixelFormat::Rgba | PixelFormat::Rgb) {
705            return Err(crate::Error::NotSupported(
706                "CPU image rendering only supports RGBA or RGB images".to_string(),
707            ));
708        }
709
710        let _timer = FunctionTimer::new("CPUProcessor::draw_masks_proto");
711
712        let dst_w = dst.width().unwrap();
713        let dst_h = dst.height().unwrap();
714        let dst_rs = row_stride_for(dst_w, dst_fmt);
715        let channels = dst_fmt.channels();
716
717        let mut map = dst.map()?;
718        let dst_slice = map.as_mut_slice();
719
720        self.render_box(dst_w, dst_h, dst_rs, channels, dst_slice, detect)?;
721
722        if detect.is_empty() || proto_data.mask_coefficients.is_empty() {
723            return Ok(());
724        }
725
726        let protos_cow = proto_data.protos.as_f32();
727        let protos = protos_cow.as_ref();
728        let proto_h = protos.shape()[0];
729        let proto_w = protos.shape()[1];
730        let num_protos = protos.shape()[2];
731
732        for (det, coeff) in detect.iter().zip(proto_data.mask_coefficients.iter()) {
733            let color = self.colors[det.label % self.colors.len()];
734            let alpha = color[3] as u16;
735
736            // Pixel bounds of the detection in dst image space
737            let start_x = (dst_w as f32 * det.bbox.xmin).round() as usize;
738            let start_y = (dst_h as f32 * det.bbox.ymin).round() as usize;
739            let end_x = ((dst_w as f32 * det.bbox.xmax).round() as usize).min(dst_w);
740            let end_y = ((dst_h as f32 * det.bbox.ymax).round() as usize).min(dst_h);
741
742            for y in start_y..end_y {
743                for x in start_x..end_x {
744                    // Map pixel (x, y) to proto space
745                    let px = (x as f32 / dst_w as f32) * proto_w as f32 - 0.5;
746                    let py = (y as f32 / dst_h as f32) * proto_h as f32 - 0.5;
747
748                    // Bilinear interpolation + dot product
749                    let acc = bilinear_dot(protos, coeff, num_protos, px, py, proto_w, proto_h);
750
751                    // Sigmoid threshold
752                    let mask = 1.0 / (1.0 + (-acc).exp());
753                    if mask < 0.5 {
754                        continue;
755                    }
756
757                    // Alpha blend
758                    let dst_index = y * dst_rs + x * channels;
759                    for c in 0..3 {
760                        dst_slice[dst_index + c] = ((color[c] as u16 * alpha
761                            + dst_slice[dst_index + c] as u16 * (255 - alpha))
762                            / 255) as u8;
763                    }
764                }
765            }
766        }
767
768        Ok(())
769    }
770}