1use crate::{Crop, Error, Flip, FunctionTimer, ImageProcessorTrait, Rect, Result, Rotation};
5use edgefirst_decoder::{DetectBox, ProtoData, Segmentation};
6use edgefirst_tensor::{
7 DType, PixelFormat, Tensor, TensorDyn, TensorMapTrait, TensorMemory, TensorTrait,
8};
9
10mod convert;
11mod masks;
12mod resize;
13mod tests;
14
15#[derive(Debug, Clone)]
21pub struct CPUProcessor {
22 resizer: fast_image_resize::Resizer,
23 options: fast_image_resize::ResizeOptions,
24 colors: [[u8; 4]; 20],
25}
26
27unsafe impl Send for CPUProcessor {}
28unsafe impl Sync for CPUProcessor {}
29
30impl Default for CPUProcessor {
31 fn default() -> Self {
32 Self::new_bilinear()
33 }
34}
35
36fn prepare_dst_base_cpu(dst: &mut TensorDyn, background: Option<&TensorDyn>) -> Result<()> {
47 match background {
48 Some(bg) => {
49 if bg.shape() != dst.shape() {
50 return Err(Error::InvalidShape(
51 "background shape does not match dst".into(),
52 ));
53 }
54 if bg.format() != dst.format() {
55 return Err(Error::InvalidShape(
56 "background pixel format does not match dst".into(),
57 ));
58 }
59 let bg_u8 = bg.as_u8().ok_or(Error::NotAnImage)?;
60 let dst_u8 = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
61 let bg_map = bg_u8.map()?;
62 let mut dst_map = dst_u8.map()?;
63 let bg_slice = bg_map.as_slice();
64 let dst_slice = dst_map.as_mut_slice();
65 if bg_slice.len() != dst_slice.len() {
66 return Err(Error::InvalidShape(
67 "background buffer size does not match dst".into(),
68 ));
69 }
70 dst_slice.copy_from_slice(bg_slice);
71 }
72 None => {
73 let dst_u8 = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
74 let mut dst_map = dst_u8.map()?;
75 dst_map.as_mut_slice().fill(0);
76 }
77 }
78 Ok(())
79}
80
81fn row_stride_for(width: usize, fmt: PixelFormat) -> usize {
83 use edgefirst_tensor::PixelLayout;
84 match fmt.layout() {
85 PixelLayout::Packed => width * fmt.channels(),
86 PixelLayout::Planar | PixelLayout::SemiPlanar => width,
87 _ => width, }
89}
90
91pub(crate) fn apply_int8_xor_bias(data: &mut [u8], fmt: PixelFormat) {
96 use edgefirst_tensor::PixelLayout;
97 if !fmt.has_alpha() {
98 for b in data.iter_mut() {
99 *b ^= 0x80;
100 }
101 } else if fmt.layout() == PixelLayout::Planar {
102 let channels = fmt.channels();
104 let plane_size = data.len() / channels;
105 for b in data[..plane_size * (channels - 1)].iter_mut() {
106 *b ^= 0x80;
107 }
108 } else {
109 let channels = fmt.channels();
111 for pixel in data.chunks_exact_mut(channels) {
112 for b in &mut pixel[..channels - 1] {
113 *b ^= 0x80;
114 }
115 }
116 }
117}
118
119impl CPUProcessor {
120 pub fn new() -> Self {
122 Self::new_bilinear()
123 }
124
125 fn new_bilinear() -> Self {
127 let resizer = fast_image_resize::Resizer::new();
128 let options = fast_image_resize::ResizeOptions::new()
129 .resize_alg(fast_image_resize::ResizeAlg::Convolution(
130 fast_image_resize::FilterType::Bilinear,
131 ))
132 .use_alpha(false);
133
134 log::debug!("CPUConverter created");
135 Self {
136 resizer,
137 options,
138 colors: crate::DEFAULT_COLORS_U8,
139 }
140 }
141
142 pub fn new_nearest() -> Self {
144 let resizer = fast_image_resize::Resizer::new();
145 let options = fast_image_resize::ResizeOptions::new()
146 .resize_alg(fast_image_resize::ResizeAlg::Nearest)
147 .use_alpha(false);
148 log::debug!("CPUConverter created");
149 Self {
150 resizer,
151 options,
152 colors: crate::DEFAULT_COLORS_U8,
153 }
154 }
155
156 pub(crate) fn support_conversion_pf(src: PixelFormat, dst: PixelFormat) -> bool {
157 use PixelFormat::*;
158 matches!(
159 (src, dst),
160 (Nv12, Rgb)
161 | (Nv12, Rgba)
162 | (Nv12, Grey)
163 | (Nv16, Rgb)
164 | (Nv16, Rgba)
165 | (Nv16, Bgra)
166 | (Yuyv, Rgb)
167 | (Yuyv, Rgba)
168 | (Yuyv, Grey)
169 | (Yuyv, Yuyv)
170 | (Yuyv, PlanarRgb)
171 | (Yuyv, PlanarRgba)
172 | (Yuyv, Nv16)
173 | (Vyuy, Rgb)
174 | (Vyuy, Rgba)
175 | (Vyuy, Grey)
176 | (Vyuy, Vyuy)
177 | (Vyuy, PlanarRgb)
178 | (Vyuy, PlanarRgba)
179 | (Vyuy, Nv16)
180 | (Rgba, Rgb)
181 | (Rgba, Rgba)
182 | (Rgba, Grey)
183 | (Rgba, Yuyv)
184 | (Rgba, PlanarRgb)
185 | (Rgba, PlanarRgba)
186 | (Rgba, Nv16)
187 | (Rgb, Rgb)
188 | (Rgb, Rgba)
189 | (Rgb, Grey)
190 | (Rgb, Yuyv)
191 | (Rgb, PlanarRgb)
192 | (Rgb, PlanarRgba)
193 | (Rgb, Nv16)
194 | (Grey, Rgb)
195 | (Grey, Rgba)
196 | (Grey, Grey)
197 | (Grey, Yuyv)
198 | (Grey, PlanarRgb)
199 | (Grey, PlanarRgba)
200 | (Grey, Nv16)
201 | (Nv12, Bgra)
202 | (Yuyv, Bgra)
203 | (Vyuy, Bgra)
204 | (Rgba, Bgra)
205 | (Rgb, Bgra)
206 | (Grey, Bgra)
207 | (Bgra, Bgra)
208 | (PlanarRgb, Rgb)
209 | (PlanarRgb, Rgba)
210 | (PlanarRgba, Rgb)
211 | (PlanarRgba, Rgba)
212 | (PlanarRgb, Bgra)
213 | (PlanarRgba, Bgra)
214 )
215 }
216
217 pub(crate) fn convert_format_pf(
219 src: &Tensor<u8>,
220 dst: &mut Tensor<u8>,
221 src_fmt: PixelFormat,
222 dst_fmt: PixelFormat,
223 ) -> Result<()> {
224 let _timer = FunctionTimer::new(format!(
225 "ImageProcessor::convert_format {} to {}",
226 src_fmt, dst_fmt,
227 ));
228
229 use PixelFormat::*;
230 match (src_fmt, dst_fmt) {
231 (Nv12, Rgb) => Self::convert_nv12_to_rgb(src, dst),
232 (Nv12, Rgba) => Self::convert_nv12_to_rgba(src, dst),
233 (Nv12, Grey) => Self::convert_nv12_to_grey(src, dst),
234 (Yuyv, Rgb) => Self::convert_yuyv_to_rgb(src, dst),
235 (Yuyv, Rgba) => Self::convert_yuyv_to_rgba(src, dst),
236 (Yuyv, Grey) => Self::convert_yuyv_to_grey(src, dst),
237 (Yuyv, Yuyv) => Self::copy_image(src, dst),
238 (Yuyv, PlanarRgb) => Self::convert_yuyv_to_8bps(src, dst),
239 (Yuyv, PlanarRgba) => Self::convert_yuyv_to_prgba(src, dst),
240 (Yuyv, Nv16) => Self::convert_yuyv_to_nv16(src, dst),
241 (Vyuy, Rgb) => Self::convert_vyuy_to_rgb(src, dst),
242 (Vyuy, Rgba) => Self::convert_vyuy_to_rgba(src, dst),
243 (Vyuy, Grey) => Self::convert_vyuy_to_grey(src, dst),
244 (Vyuy, Vyuy) => Self::copy_image(src, dst),
245 (Vyuy, PlanarRgb) => Self::convert_vyuy_to_8bps(src, dst),
246 (Vyuy, PlanarRgba) => Self::convert_vyuy_to_prgba(src, dst),
247 (Vyuy, Nv16) => Self::convert_vyuy_to_nv16(src, dst),
248 (Rgba, Rgb) => Self::convert_rgba_to_rgb(src, dst),
249 (Rgba, Rgba) => Self::copy_image(src, dst),
250 (Rgba, Grey) => Self::convert_rgba_to_grey(src, dst),
251 (Rgba, Yuyv) => Self::convert_rgba_to_yuyv(src, dst),
252 (Rgba, PlanarRgb) => Self::convert_rgba_to_8bps(src, dst),
253 (Rgba, PlanarRgba) => Self::convert_rgba_to_prgba(src, dst),
254 (Rgba, Nv16) => Self::convert_rgba_to_nv16(src, dst),
255 (Rgb, Rgb) => Self::copy_image(src, dst),
256 (Rgb, Rgba) => Self::convert_rgb_to_rgba(src, dst),
257 (Rgb, Grey) => Self::convert_rgb_to_grey(src, dst),
258 (Rgb, Yuyv) => Self::convert_rgb_to_yuyv(src, dst),
259 (Rgb, PlanarRgb) => Self::convert_rgb_to_8bps(src, dst),
260 (Rgb, PlanarRgba) => Self::convert_rgb_to_prgba(src, dst),
261 (Rgb, Nv16) => Self::convert_rgb_to_nv16(src, dst),
262 (Grey, Rgb) => Self::convert_grey_to_rgb(src, dst),
263 (Grey, Rgba) => Self::convert_grey_to_rgba(src, dst),
264 (Grey, Grey) => Self::copy_image(src, dst),
265 (Grey, Yuyv) => Self::convert_grey_to_yuyv(src, dst),
266 (Grey, PlanarRgb) => Self::convert_grey_to_8bps(src, dst),
267 (Grey, PlanarRgba) => Self::convert_grey_to_prgba(src, dst),
268 (Grey, Nv16) => Self::convert_grey_to_nv16(src, dst),
269
270 (Nv16, Rgb) => Self::convert_nv16_to_rgb(src, dst),
272 (Nv16, Rgba) => Self::convert_nv16_to_rgba(src, dst),
273 (PlanarRgb, Rgb) => Self::convert_8bps_to_rgb(src, dst),
274 (PlanarRgb, Rgba) => Self::convert_8bps_to_rgba(src, dst),
275 (PlanarRgba, Rgb) => Self::convert_prgba_to_rgb(src, dst),
276 (PlanarRgba, Rgba) => Self::convert_prgba_to_rgba(src, dst),
277
278 (Bgra, Bgra) => Self::copy_image(src, dst),
280 (Nv12, Bgra) => {
281 Self::convert_nv12_to_rgba(src, dst)?;
282 Self::swizzle_rb_4chan(dst)
283 }
284 (Nv16, Bgra) => {
285 Self::convert_nv16_to_rgba(src, dst)?;
286 Self::swizzle_rb_4chan(dst)
287 }
288 (Yuyv, Bgra) => {
289 Self::convert_yuyv_to_rgba(src, dst)?;
290 Self::swizzle_rb_4chan(dst)
291 }
292 (Vyuy, Bgra) => {
293 Self::convert_vyuy_to_rgba(src, dst)?;
294 Self::swizzle_rb_4chan(dst)
295 }
296 (Rgba, Bgra) => {
297 dst.map()?.copy_from_slice(&src.map()?);
298 Self::swizzle_rb_4chan(dst)
299 }
300 (Rgb, Bgra) => {
301 Self::convert_rgb_to_rgba(src, dst)?;
302 Self::swizzle_rb_4chan(dst)
303 }
304 (Grey, Bgra) => {
305 Self::convert_grey_to_rgba(src, dst)?;
306 Self::swizzle_rb_4chan(dst)
307 }
308 (PlanarRgb, Bgra) => {
309 Self::convert_8bps_to_rgba(src, dst)?;
310 Self::swizzle_rb_4chan(dst)
311 }
312 (PlanarRgba, Bgra) => {
313 Self::convert_prgba_to_rgba(src, dst)?;
314 Self::swizzle_rb_4chan(dst)
315 }
316
317 (s, d) => Err(Error::NotSupported(format!("Conversion from {s} to {d}",))),
318 }
319 }
320
321 pub(crate) fn fill_image_outside_crop_u8(
323 dst: &mut Tensor<u8>,
324 rgba: [u8; 4],
325 crop: Rect,
326 ) -> Result<()> {
327 let dst_fmt = dst.format().unwrap();
328 let dst_w = dst.width().unwrap();
329 let dst_h = dst.height().unwrap();
330 let mut dst_map = dst.map()?;
331 let dst_tup = (dst_map.as_mut_slice(), dst_w, dst_h);
332 Self::fill_outside_crop_dispatch(dst_tup, dst_fmt, rgba, crop)
333 }
334
335 fn fill_outside_crop_dispatch(
337 dst: (&mut [u8], usize, usize),
338 fmt: PixelFormat,
339 rgba: [u8; 4],
340 crop: Rect,
341 ) -> Result<()> {
342 use PixelFormat::*;
343 match fmt {
344 Rgba | Bgra => Self::fill_image_outside_crop_(dst, rgba, crop),
345 Rgb => Self::fill_image_outside_crop_(dst, Self::rgba_to_rgb(rgba), crop),
346 Grey => Self::fill_image_outside_crop_(dst, Self::rgba_to_grey(rgba), crop),
347 Yuyv => Self::fill_image_outside_crop_(
348 (dst.0, dst.1 / 2, dst.2),
349 Self::rgba_to_yuyv(rgba),
350 Rect::new(crop.left / 2, crop.top, crop.width.div_ceil(2), crop.height),
351 ),
352 PlanarRgb => Self::fill_image_outside_crop_planar(dst, Self::rgba_to_rgb(rgba), crop),
353 PlanarRgba => Self::fill_image_outside_crop_planar(dst, rgba, crop),
354 Nv16 => {
355 let yuyv = Self::rgba_to_yuyv(rgba);
356 Self::fill_image_outside_crop_yuv_semiplanar(dst, yuyv[0], [yuyv[1], yuyv[3]], crop)
357 }
358 _ => Err(Error::Internal(format!(
359 "Found unexpected destination {fmt}",
360 ))),
361 }
362 }
363}
364
365impl ImageProcessorTrait for CPUProcessor {
366 fn convert(
367 &mut self,
368 src: &TensorDyn,
369 dst: &mut TensorDyn,
370 rotation: Rotation,
371 flip: Flip,
372 crop: Crop,
373 ) -> Result<()> {
374 self.convert_impl(src, dst, rotation, flip, crop)
375 }
376
377 fn draw_decoded_masks(
378 &mut self,
379 dst: &mut TensorDyn,
380 detect: &[DetectBox],
381 segmentation: &[Segmentation],
382 overlay: crate::MaskOverlay<'_>,
383 ) -> Result<()> {
384 prepare_dst_base_cpu(dst, overlay.background)?;
388 let dst = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
389 self.draw_decoded_masks_impl(
390 dst,
391 detect,
392 segmentation,
393 overlay.opacity,
394 overlay.color_mode,
395 )
396 }
397
398 fn draw_proto_masks(
399 &mut self,
400 dst: &mut TensorDyn,
401 detect: &[DetectBox],
402 proto_data: &ProtoData,
403 overlay: crate::MaskOverlay<'_>,
404 ) -> Result<()> {
405 prepare_dst_base_cpu(dst, overlay.background)?;
406 let dst = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
407 self.draw_proto_masks_impl(
408 dst,
409 detect,
410 proto_data,
411 overlay.opacity,
412 overlay.letterbox,
413 overlay.color_mode,
414 )
415 }
416
417 fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()> {
418 for (c, new_c) in self.colors.iter_mut().zip(colors.iter()) {
419 *c = *new_c;
420 }
421 Ok(())
422 }
423}
424
425impl CPUProcessor {
427 pub(crate) fn convert_impl(
429 &mut self,
430 src: &TensorDyn,
431 dst: &mut TensorDyn,
432 rotation: Rotation,
433 flip: Flip,
434 crop: Crop,
435 ) -> Result<()> {
436 let src_fmt = src.format().ok_or(Error::NotAnImage)?;
437 let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
438
439 match (src.dtype(), dst.dtype()) {
440 (DType::U8, DType::U8) => {
441 let src = src.as_u8().unwrap();
442 let dst = dst.as_u8_mut().unwrap();
443 self.convert_u8(src, dst, src_fmt, dst_fmt, rotation, flip, crop)
444 }
445 (DType::U8, DType::I8) => {
446 let src_u8 = src.as_u8().unwrap();
449 let dst_i8 = dst.as_i8_mut().unwrap();
450 let dst_u8 = unsafe { &mut *(dst_i8 as *mut Tensor<i8> as *mut Tensor<u8>) };
454 self.convert_u8(src_u8, dst_u8, src_fmt, dst_fmt, rotation, flip, crop)?;
455 let mut map = dst_u8.map()?;
457 apply_int8_xor_bias(map.as_mut_slice(), dst_fmt);
458 Ok(())
459 }
460 (s, d) => Err(Error::NotSupported(format!("dtype {s} -> {d}",))),
461 }
462 }
463
464 #[allow(clippy::too_many_arguments)]
466 fn convert_u8(
467 &mut self,
468 src: &Tensor<u8>,
469 dst: &mut Tensor<u8>,
470 src_fmt: PixelFormat,
471 dst_fmt: PixelFormat,
472 rotation: Rotation,
473 flip: Flip,
474 crop: Crop,
475 ) -> Result<()> {
476 use PixelFormat::*;
477
478 let src_w = src.width().unwrap();
479 let src_h = src.height().unwrap();
480 let dst_w = dst.width().unwrap();
481 let dst_h = dst.height().unwrap();
482
483 crop.check_crop_dims(src_w, src_h, dst_w, dst_h)?;
484
485 let intermediate = match (src_fmt, dst_fmt) {
487 (Nv12, Rgb) => Rgb,
488 (Nv12, Rgba) => Rgba,
489 (Nv12, Grey) => Grey,
490 (Nv12, Yuyv) => Rgba,
491 (Nv12, Nv16) => Rgba,
492 (Nv12, PlanarRgb) => Rgb,
493 (Nv12, PlanarRgba) => Rgba,
494 (Yuyv, Rgb) => Rgb,
495 (Yuyv, Rgba) => Rgba,
496 (Yuyv, Grey) => Grey,
497 (Yuyv, Yuyv) => Rgba,
498 (Yuyv, PlanarRgb) => Rgb,
499 (Yuyv, PlanarRgba) => Rgba,
500 (Yuyv, Nv16) => Rgba,
501 (Vyuy, Rgb) => Rgb,
502 (Vyuy, Rgba) => Rgba,
503 (Vyuy, Grey) => Grey,
504 (Vyuy, Vyuy) => Rgba,
505 (Vyuy, PlanarRgb) => Rgb,
506 (Vyuy, PlanarRgba) => Rgba,
507 (Vyuy, Nv16) => Rgba,
508 (Rgba, Rgb) => Rgba,
509 (Rgba, Rgba) => Rgba,
510 (Rgba, Grey) => Grey,
511 (Rgba, Yuyv) => Rgba,
512 (Rgba, PlanarRgb) => Rgba,
513 (Rgba, PlanarRgba) => Rgba,
514 (Rgba, Nv16) => Rgba,
515 (Rgb, Rgb) => Rgb,
516 (Rgb, Rgba) => Rgb,
517 (Rgb, Grey) => Grey,
518 (Rgb, Yuyv) => Rgb,
519 (Rgb, PlanarRgb) => Rgb,
520 (Rgb, PlanarRgba) => Rgb,
521 (Rgb, Nv16) => Rgb,
522 (Grey, Rgb) => Rgb,
523 (Grey, Rgba) => Rgba,
524 (Grey, Grey) => Grey,
525 (Grey, Yuyv) => Grey,
526 (Grey, PlanarRgb) => Grey,
527 (Grey, PlanarRgba) => Grey,
528 (Grey, Nv16) => Grey,
529 (Nv12, Bgra) => Rgba,
530 (Yuyv, Bgra) => Rgba,
531 (Vyuy, Bgra) => Rgba,
532 (Rgba, Bgra) => Rgba,
533 (Rgb, Bgra) => Rgb,
534 (Grey, Bgra) => Grey,
535 (Bgra, Bgra) => Bgra,
536 (Nv16, Rgb) => Rgb,
537 (Nv16, Rgba) => Rgba,
538 (Nv16, Bgra) => Rgba,
539 (PlanarRgb, Rgb) => Rgb,
540 (PlanarRgb, Rgba) => Rgb,
541 (PlanarRgb, Bgra) => Rgb,
542 (PlanarRgba, Rgb) => Rgba,
543 (PlanarRgba, Rgba) => Rgba,
544 (PlanarRgba, Bgra) => Rgba,
545 (s, d) => {
546 return Err(Error::NotSupported(format!("Conversion from {s} to {d}",)));
547 }
548 };
549
550 let need_resize_flip_rotation = rotation != Rotation::None
551 || flip != Flip::None
552 || src_w != dst_w
553 || src_h != dst_h
554 || crop.src_rect.is_some_and(|c| {
555 c != Rect {
556 left: 0,
557 top: 0,
558 width: src_w,
559 height: src_h,
560 }
561 })
562 || crop.dst_rect.is_some_and(|c| {
563 c != Rect {
564 left: 0,
565 top: 0,
566 width: dst_w,
567 height: dst_h,
568 }
569 });
570
571 if !need_resize_flip_rotation && Self::support_conversion_pf(src_fmt, dst_fmt) {
573 return Self::convert_format_pf(src, dst, src_fmt, dst_fmt);
574 }
575
576 if dst_fmt == Yuyv && !dst_w.is_multiple_of(2) {
578 return Err(Error::NotSupported(format!(
579 "{} destination must have width divisible by 2",
580 dst_fmt,
581 )));
582 }
583
584 let mut tmp_buffer;
586 let tmp;
587 let tmp_fmt;
588 if intermediate != src_fmt {
589 tmp_buffer = Tensor::<u8>::image(src_w, src_h, intermediate, Some(TensorMemory::Mem))?;
590
591 Self::convert_format_pf(src, &mut tmp_buffer, src_fmt, intermediate)?;
592 tmp = &tmp_buffer;
593 tmp_fmt = intermediate;
594 } else {
595 tmp = src;
596 tmp_fmt = src_fmt;
597 }
598
599 debug_assert!(matches!(tmp_fmt, Rgb | Rgba | Grey));
601 if tmp_fmt == dst_fmt {
602 self.resize_flip_rotate_pf(tmp, dst, dst_fmt, rotation, flip, crop)?;
603 } else if !need_resize_flip_rotation {
604 Self::convert_format_pf(tmp, dst, tmp_fmt, dst_fmt)?;
605 } else {
606 let mut tmp2 = Tensor::<u8>::image(dst_w, dst_h, tmp_fmt, Some(TensorMemory::Mem))?;
607 if crop.dst_rect.is_some_and(|c| {
608 c != Rect {
609 left: 0,
610 top: 0,
611 width: dst_w,
612 height: dst_h,
613 }
614 }) && crop.dst_color.is_none()
615 {
616 Self::convert_format_pf(dst, &mut tmp2, dst_fmt, tmp_fmt)?;
617 }
618 self.resize_flip_rotate_pf(tmp, &mut tmp2, tmp_fmt, rotation, flip, crop)?;
619 Self::convert_format_pf(&tmp2, dst, tmp_fmt, dst_fmt)?;
620 }
621 if let (Some(dst_rect), Some(dst_color)) = (crop.dst_rect, crop.dst_color) {
622 let full_rect = Rect {
623 left: 0,
624 top: 0,
625 width: dst_w,
626 height: dst_h,
627 };
628 if dst_rect != full_rect {
629 Self::fill_image_outside_crop_u8(dst, dst_color, dst_rect)?;
630 }
631 }
632
633 Ok(())
634 }
635
636 fn draw_decoded_masks_impl(
637 &mut self,
638 dst: &mut Tensor<u8>,
639 detect: &[DetectBox],
640 segmentation: &[Segmentation],
641 opacity: f32,
642 color_mode: crate::ColorMode,
643 ) -> Result<()> {
644 let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
645 if !matches!(dst_fmt, PixelFormat::Rgba | PixelFormat::Rgb) {
646 return Err(crate::Error::NotSupported(
647 "CPU image rendering only supports RGBA or RGB images".to_string(),
648 ));
649 }
650
651 let _timer = FunctionTimer::new("CPUProcessor::draw_decoded_masks");
652
653 let dst_w = dst.width().unwrap();
654 let dst_h = dst.height().unwrap();
655 let dst_rs = row_stride_for(dst_w, dst_fmt);
656 let dst_c = dst_fmt.channels();
657
658 let mut map = dst.map()?;
659 let dst_slice = map.as_mut_slice();
660
661 self.render_box(dst_w, dst_h, dst_rs, dst_c, dst_slice, detect, color_mode)?;
662
663 if segmentation.is_empty() {
664 return Ok(());
665 }
666
667 let is_semantic = segmentation[0].segmentation.shape()[2] > 1;
670
671 if is_semantic {
672 self.render_modelpack_segmentation(
673 dst_w,
674 dst_h,
675 dst_rs,
676 dst_c,
677 dst_slice,
678 &segmentation[0],
679 opacity,
680 )?;
681 } else {
682 for (idx, (seg, det)) in segmentation.iter().zip(detect).enumerate() {
683 let color_index = color_mode.index(idx, det.label);
684 self.render_yolo_segmentation(
685 dst_w,
686 dst_h,
687 dst_rs,
688 dst_c,
689 dst_slice,
690 seg,
691 color_index,
692 opacity,
693 )?;
694 }
695 }
696
697 Ok(())
698 }
699
700 fn draw_proto_masks_impl(
701 &mut self,
702 dst: &mut Tensor<u8>,
703 detect: &[DetectBox],
704 proto_data: &ProtoData,
705 opacity: f32,
706 letterbox: Option<[f32; 4]>,
707 color_mode: crate::ColorMode,
708 ) -> Result<()> {
709 let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
710 if !matches!(dst_fmt, PixelFormat::Rgba | PixelFormat::Rgb) {
711 return Err(crate::Error::NotSupported(
712 "CPU image rendering only supports RGBA or RGB images".to_string(),
713 ));
714 }
715
716 let _timer = FunctionTimer::new("CPUProcessor::draw_proto_masks");
717
718 let dst_w = dst.width().unwrap();
719 let dst_h = dst.height().unwrap();
720 let dst_rs = row_stride_for(dst_w, dst_fmt);
721 let channels = dst_fmt.channels();
722
723 let mut map = dst.map()?;
724 let dst_slice = map.as_mut_slice();
725
726 self.render_box(
727 dst_w, dst_h, dst_rs, channels, dst_slice, detect, color_mode,
728 )?;
729
730 if detect.is_empty() {
731 return Ok(());
732 }
733 let proto_shape = proto_data.protos.shape();
734 if proto_shape.len() != 3 {
735 return Err(Error::InvalidShape(format!(
736 "protos tensor must be rank-3, got {proto_shape:?}"
737 )));
738 }
739 let proto_h = proto_shape[0];
740 let proto_w = proto_shape[1];
741 let num_protos = proto_shape[2];
742 let coeff_shape = proto_data.mask_coefficients.shape();
743 if coeff_shape.len() != 2 {
744 return Err(Error::InvalidShape(format!(
745 "mask_coefficients tensor must be rank-2, got {coeff_shape:?}"
746 )));
747 }
748 if coeff_shape[0] == 0 {
750 return Ok(());
751 }
752 if coeff_shape[1] != num_protos {
753 return Err(Error::InvalidShape(format!(
754 "mask_coefficients second dimension must match num_protos \
755 ({num_protos}), got {coeff_shape:?}"
756 )));
757 }
758
759 let coeff_f32: Vec<f32> = match proto_data.mask_coefficients.dtype() {
761 DType::F32 => {
762 let t = proto_data.mask_coefficients.as_f32().expect("F32");
763 let m = t.map()?;
764 m.as_slice().to_vec()
765 }
766 DType::F16 => {
767 let t = proto_data.mask_coefficients.as_f16().expect("F16");
768 let m = t.map()?;
769 m.as_slice().iter().map(|v| v.to_f32()).collect()
770 }
771 other => {
772 return Err(Error::InvalidShape(format!(
773 "mask_coefficients dtype {other:?} not supported"
774 )));
775 }
776 };
777
778 let (lx0, lx_range, ly0, ly_range) = match letterbox {
780 Some([lx0, ly0, lx1, ly1]) => (lx0, lx1 - lx0, ly0, ly1 - ly0),
781 None => (0.0_f32, 1.0_f32, 0.0_f32, 1.0_f32),
782 };
783
784 match proto_data.protos.dtype() {
787 DType::F32 => {
788 let t = proto_data.protos.as_f32().expect("F32");
789 let m = t.map()?;
790 self.draw_proto_masks_inner(
791 dst_slice,
792 dst_w,
793 dst_h,
794 dst_rs,
795 channels,
796 detect,
797 m.as_slice(),
798 &coeff_f32,
799 proto_h,
800 proto_w,
801 num_protos,
802 opacity,
803 (lx0, lx_range, ly0, ly_range),
804 color_mode,
805 0.0_f32,
806 |p: &f32, _| *p,
807 );
808 }
809 DType::F16 => {
810 let t = proto_data.protos.as_f16().expect("F16");
811 let m = t.map()?;
812 self.draw_proto_masks_inner(
813 dst_slice,
814 dst_w,
815 dst_h,
816 dst_rs,
817 channels,
818 detect,
819 m.as_slice(),
820 &coeff_f32,
821 proto_h,
822 proto_w,
823 num_protos,
824 opacity,
825 (lx0, lx_range, ly0, ly_range),
826 color_mode,
827 0.0_f32,
828 |p: &half::f16, _| p.to_f32(),
829 );
830 }
831 DType::I8 => {
832 use edgefirst_tensor::QuantMode;
833 let t = proto_data.protos.as_i8().expect("I8");
834 let m = t.map()?;
835 let quant = t.quantization().ok_or_else(|| {
836 Error::InvalidShape("I8 protos require quantization metadata".into())
837 })?;
838 let (scale, zp) = match quant.mode() {
839 QuantMode::PerTensor { scale, zero_point } => (scale, zero_point as f32),
840 QuantMode::PerTensorSymmetric { scale } => (scale, 0.0),
841 QuantMode::PerChannel { axis, .. }
842 | QuantMode::PerChannelSymmetric { axis, .. } => {
843 return Err(Error::NotSupported(format!(
844 "per-channel quantization (axis={axis}) in draw_proto_masks \
845 CPU path not yet supported"
846 )));
847 }
848 };
849 self.draw_proto_masks_inner(
850 dst_slice,
851 dst_w,
852 dst_h,
853 dst_rs,
854 channels,
855 detect,
856 m.as_slice(),
857 &coeff_f32,
858 proto_h,
859 proto_w,
860 num_protos,
861 opacity,
862 (lx0, lx_range, ly0, ly_range),
863 color_mode,
864 scale,
865 move |p: &i8, _| (*p as f32) - zp,
866 );
867 }
868 other => {
869 return Err(Error::InvalidShape(format!(
870 "proto tensor dtype {other:?} not supported"
871 )));
872 }
873 }
874
875 Ok(())
876 }
877
878 #[allow(clippy::too_many_arguments)]
879 fn draw_proto_masks_inner<P: Copy>(
880 &self,
881 dst_slice: &mut [u8],
882 dst_w: usize,
883 dst_h: usize,
884 dst_rs: usize,
885 channels: usize,
886 detect: &[DetectBox],
887 protos: &[P],
888 coeff_all_f32: &[f32],
889 proto_h: usize,
890 proto_w: usize,
891 num_protos: usize,
892 opacity: f32,
893 letterbox_xy: (f32, f32, f32, f32),
894 color_mode: crate::ColorMode,
895 acc_scale: f32,
896 load_f32: impl Fn(&P, f32) -> f32 + Copy,
897 ) {
898 let (lx0, lx_range, ly0, ly_range) = letterbox_xy;
899 let stride_y = proto_w * num_protos;
900 for (idx, det) in detect.iter().enumerate() {
901 let coeff = &coeff_all_f32[idx * num_protos..(idx + 1) * num_protos];
902 let color_index = color_mode.index(idx, det.label);
903 let color = self.colors[color_index % self.colors.len()];
904 let alpha = if opacity == 1.0 {
905 color[3] as u16
906 } else {
907 (color[3] as f32 * opacity).round() as u16
908 };
909
910 let start_x = (dst_w as f32 * det.bbox.xmin).round() as usize;
911 let start_y = (dst_h as f32 * det.bbox.ymin).round() as usize;
912 let end_x = ((dst_w as f32 * det.bbox.xmax).round() as usize).min(dst_w);
913 let end_y = ((dst_h as f32 * det.bbox.ymax).round() as usize).min(dst_h);
914
915 for y in start_y..end_y {
916 for x in start_x..end_x {
917 let px = (lx0 + (x as f32 / dst_w as f32) * lx_range) * proto_w as f32 - 0.5;
918 let py = (ly0 + (y as f32 / dst_h as f32) * ly_range) * proto_h as f32 - 0.5;
919
920 let x0 = (px.floor() as isize).clamp(0, proto_w as isize - 1) as usize;
924 let y0 = (py.floor() as isize).clamp(0, proto_h as isize - 1) as usize;
925 let x1 = (x0 + 1).min(proto_w - 1);
926 let y1 = (y0 + 1).min(proto_h - 1);
927 let fx = px - px.floor();
928 let fy = py - py.floor();
929 let w00 = (1.0 - fx) * (1.0 - fy);
930 let w10 = fx * (1.0 - fy);
931 let w01 = (1.0 - fx) * fy;
932 let w11 = fx * fy;
933 let b00 = y0 * stride_y + x0 * num_protos;
934 let b10 = y0 * stride_y + x1 * num_protos;
935 let b01 = y1 * stride_y + x0 * num_protos;
936 let b11 = y1 * stride_y + x1 * num_protos;
937 let mut acc = 0.0_f32;
938 for p in 0..num_protos {
939 let v00 = load_f32(&protos[b00 + p], 0.0);
940 let v10 = load_f32(&protos[b10 + p], 0.0);
941 let v01 = load_f32(&protos[b01 + p], 0.0);
942 let v11 = load_f32(&protos[b11 + p], 0.0);
943 let val = w00 * v00 + w10 * v10 + w01 * v01 + w11 * v11;
944 acc += coeff[p] * val;
945 }
946 let final_acc = if acc_scale == 0.0 {
947 acc
948 } else {
949 acc_scale * acc
950 };
951 let mask = 1.0 / (1.0 + (-final_acc).exp());
955 if mask < 0.5 {
956 continue;
957 }
958 let dst_index = y * dst_rs + x * channels;
959 for c in 0..3 {
960 dst_slice[dst_index + c] = ((color[c] as u16 * alpha
961 + dst_slice[dst_index + c] as u16 * (255 - alpha))
962 / 255) as u8;
963 }
964 }
965 }
966 }
967 }
968}