1use crate::{Crop, Error, Flip, FunctionTimer, ImageProcessorTrait, Rect, Result, Rotation};
5use edgefirst_decoder::{DetectBox, ProtoData, Segmentation};
6use edgefirst_tensor::{
7 DType, PixelFormat, Tensor, TensorDyn, TensorMapTrait, TensorMemory, TensorTrait,
8};
9
10mod convert;
11mod masks;
12mod resize;
13mod tests;
14
15#[derive(Debug, Clone)]
21pub struct CPUProcessor {
22 resizer: fast_image_resize::Resizer,
23 options: fast_image_resize::ResizeOptions,
24 colors: [[u8; 4]; 20],
25}
26
27unsafe impl Send for CPUProcessor {}
28unsafe impl Sync for CPUProcessor {}
29
30impl Default for CPUProcessor {
31 fn default() -> Self {
32 Self::new_bilinear()
33 }
34}
35
36fn prepare_dst_base_cpu(dst: &mut TensorDyn, background: Option<&TensorDyn>) -> Result<()> {
47 match background {
48 Some(bg) => {
49 if bg.shape() != dst.shape() {
50 return Err(Error::InvalidShape(
51 "background shape does not match dst".into(),
52 ));
53 }
54 if bg.format() != dst.format() {
55 return Err(Error::InvalidShape(
56 "background pixel format does not match dst".into(),
57 ));
58 }
59 let bg_u8 = bg.as_u8().ok_or(Error::NotAnImage)?;
60 let dst_u8 = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
61 let bg_map = bg_u8.map()?;
62 let mut dst_map = dst_u8.map()?;
63 let bg_slice = bg_map.as_slice();
64 let dst_slice = dst_map.as_mut_slice();
65 if bg_slice.len() != dst_slice.len() {
66 return Err(Error::InvalidShape(
67 "background buffer size does not match dst".into(),
68 ));
69 }
70 dst_slice.copy_from_slice(bg_slice);
71 }
72 None => {
73 let dst_u8 = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
74 let mut dst_map = dst_u8.map()?;
75 dst_map.as_mut_slice().fill(0);
76 }
77 }
78 Ok(())
79}
80
81fn row_stride_for(width: usize, fmt: PixelFormat) -> usize {
83 use edgefirst_tensor::PixelLayout;
84 match fmt.layout() {
85 PixelLayout::Packed => width * fmt.channels(),
86 PixelLayout::Planar | PixelLayout::SemiPlanar => width,
87 _ => width, }
89}
90
91pub(crate) fn apply_int8_xor_bias(data: &mut [u8], fmt: PixelFormat) {
96 use edgefirst_tensor::PixelLayout;
97 if !fmt.has_alpha() {
98 for b in data.iter_mut() {
99 *b ^= 0x80;
100 }
101 } else if fmt.layout() == PixelLayout::Planar {
102 let channels = fmt.channels();
104 let plane_size = data.len() / channels;
105 for b in data[..plane_size * (channels - 1)].iter_mut() {
106 *b ^= 0x80;
107 }
108 } else {
109 let channels = fmt.channels();
111 for pixel in data.chunks_exact_mut(channels) {
112 for b in &mut pixel[..channels - 1] {
113 *b ^= 0x80;
114 }
115 }
116 }
117}
118
119impl CPUProcessor {
120 pub fn new() -> Self {
122 Self::new_bilinear()
123 }
124
125 fn new_bilinear() -> Self {
127 let resizer = fast_image_resize::Resizer::new();
128 let options = fast_image_resize::ResizeOptions::new()
129 .resize_alg(fast_image_resize::ResizeAlg::Convolution(
130 fast_image_resize::FilterType::Bilinear,
131 ))
132 .use_alpha(false);
133
134 log::debug!("CPUConverter created");
135 Self {
136 resizer,
137 options,
138 colors: crate::DEFAULT_COLORS_U8,
139 }
140 }
141
142 pub fn new_nearest() -> Self {
144 let resizer = fast_image_resize::Resizer::new();
145 let options = fast_image_resize::ResizeOptions::new()
146 .resize_alg(fast_image_resize::ResizeAlg::Nearest)
147 .use_alpha(false);
148 log::debug!("CPUConverter created");
149 Self {
150 resizer,
151 options,
152 colors: crate::DEFAULT_COLORS_U8,
153 }
154 }
155
156 pub(crate) fn support_conversion_pf(src: PixelFormat, dst: PixelFormat) -> bool {
157 use PixelFormat::*;
158 matches!(
159 (src, dst),
160 (Nv12, Rgb)
161 | (Nv12, Rgba)
162 | (Nv12, Grey)
163 | (Nv16, Rgb)
164 | (Nv16, Rgba)
165 | (Nv16, Bgra)
166 | (Yuyv, Rgb)
167 | (Yuyv, Rgba)
168 | (Yuyv, Grey)
169 | (Yuyv, Yuyv)
170 | (Yuyv, PlanarRgb)
171 | (Yuyv, PlanarRgba)
172 | (Yuyv, Nv16)
173 | (Vyuy, Rgb)
174 | (Vyuy, Rgba)
175 | (Vyuy, Grey)
176 | (Vyuy, Vyuy)
177 | (Vyuy, PlanarRgb)
178 | (Vyuy, PlanarRgba)
179 | (Vyuy, Nv16)
180 | (Rgba, Rgb)
181 | (Rgba, Rgba)
182 | (Rgba, Grey)
183 | (Rgba, Yuyv)
184 | (Rgba, PlanarRgb)
185 | (Rgba, PlanarRgba)
186 | (Rgba, Nv16)
187 | (Rgb, Rgb)
188 | (Rgb, Rgba)
189 | (Rgb, Grey)
190 | (Rgb, Yuyv)
191 | (Rgb, PlanarRgb)
192 | (Rgb, PlanarRgba)
193 | (Rgb, Nv16)
194 | (Grey, Rgb)
195 | (Grey, Rgba)
196 | (Grey, Grey)
197 | (Grey, Yuyv)
198 | (Grey, PlanarRgb)
199 | (Grey, PlanarRgba)
200 | (Grey, Nv16)
201 | (Nv12, Bgra)
202 | (Yuyv, Bgra)
203 | (Vyuy, Bgra)
204 | (Rgba, Bgra)
205 | (Rgb, Bgra)
206 | (Grey, Bgra)
207 | (Bgra, Bgra)
208 | (PlanarRgb, Rgb)
209 | (PlanarRgb, Rgba)
210 | (PlanarRgba, Rgb)
211 | (PlanarRgba, Rgba)
212 | (PlanarRgb, Bgra)
213 | (PlanarRgba, Bgra)
214 )
215 }
216
217 pub(crate) fn convert_format_pf(
219 src: &Tensor<u8>,
220 dst: &mut Tensor<u8>,
221 src_fmt: PixelFormat,
222 dst_fmt: PixelFormat,
223 ) -> Result<()> {
224 let _timer = FunctionTimer::new(format!(
225 "ImageProcessor::convert_format {} to {}",
226 src_fmt, dst_fmt,
227 ));
228
229 use PixelFormat::*;
230 match (src_fmt, dst_fmt) {
231 (Nv12, Rgb) => Self::convert_nv12_to_rgb(src, dst),
232 (Nv12, Rgba) => Self::convert_nv12_to_rgba(src, dst),
233 (Nv12, Grey) => Self::convert_nv12_to_grey(src, dst),
234 (Yuyv, Rgb) => Self::convert_yuyv_to_rgb(src, dst),
235 (Yuyv, Rgba) => Self::convert_yuyv_to_rgba(src, dst),
236 (Yuyv, Grey) => Self::convert_yuyv_to_grey(src, dst),
237 (Yuyv, Yuyv) => Self::copy_image(src, dst),
238 (Yuyv, PlanarRgb) => Self::convert_yuyv_to_8bps(src, dst),
239 (Yuyv, PlanarRgba) => Self::convert_yuyv_to_prgba(src, dst),
240 (Yuyv, Nv16) => Self::convert_yuyv_to_nv16(src, dst),
241 (Vyuy, Rgb) => Self::convert_vyuy_to_rgb(src, dst),
242 (Vyuy, Rgba) => Self::convert_vyuy_to_rgba(src, dst),
243 (Vyuy, Grey) => Self::convert_vyuy_to_grey(src, dst),
244 (Vyuy, Vyuy) => Self::copy_image(src, dst),
245 (Vyuy, PlanarRgb) => Self::convert_vyuy_to_8bps(src, dst),
246 (Vyuy, PlanarRgba) => Self::convert_vyuy_to_prgba(src, dst),
247 (Vyuy, Nv16) => Self::convert_vyuy_to_nv16(src, dst),
248 (Rgba, Rgb) => Self::convert_rgba_to_rgb(src, dst),
249 (Rgba, Rgba) => Self::copy_image(src, dst),
250 (Rgba, Grey) => Self::convert_rgba_to_grey(src, dst),
251 (Rgba, Yuyv) => Self::convert_rgba_to_yuyv(src, dst),
252 (Rgba, PlanarRgb) => Self::convert_rgba_to_8bps(src, dst),
253 (Rgba, PlanarRgba) => Self::convert_rgba_to_prgba(src, dst),
254 (Rgba, Nv16) => Self::convert_rgba_to_nv16(src, dst),
255 (Rgb, Rgb) => Self::copy_image(src, dst),
256 (Rgb, Rgba) => Self::convert_rgb_to_rgba(src, dst),
257 (Rgb, Grey) => Self::convert_rgb_to_grey(src, dst),
258 (Rgb, Yuyv) => Self::convert_rgb_to_yuyv(src, dst),
259 (Rgb, PlanarRgb) => Self::convert_rgb_to_8bps(src, dst),
260 (Rgb, PlanarRgba) => Self::convert_rgb_to_prgba(src, dst),
261 (Rgb, Nv16) => Self::convert_rgb_to_nv16(src, dst),
262 (Grey, Rgb) => Self::convert_grey_to_rgb(src, dst),
263 (Grey, Rgba) => Self::convert_grey_to_rgba(src, dst),
264 (Grey, Grey) => Self::copy_image(src, dst),
265 (Grey, Yuyv) => Self::convert_grey_to_yuyv(src, dst),
266 (Grey, PlanarRgb) => Self::convert_grey_to_8bps(src, dst),
267 (Grey, PlanarRgba) => Self::convert_grey_to_prgba(src, dst),
268 (Grey, Nv16) => Self::convert_grey_to_nv16(src, dst),
269
270 (Nv16, Rgb) => Self::convert_nv16_to_rgb(src, dst),
272 (Nv16, Rgba) => Self::convert_nv16_to_rgba(src, dst),
273 (PlanarRgb, Rgb) => Self::convert_8bps_to_rgb(src, dst),
274 (PlanarRgb, Rgba) => Self::convert_8bps_to_rgba(src, dst),
275 (PlanarRgba, Rgb) => Self::convert_prgba_to_rgb(src, dst),
276 (PlanarRgba, Rgba) => Self::convert_prgba_to_rgba(src, dst),
277
278 (Bgra, Bgra) => Self::copy_image(src, dst),
280 (Nv12, Bgra) => {
281 Self::convert_nv12_to_rgba(src, dst)?;
282 Self::swizzle_rb_4chan(dst)
283 }
284 (Nv16, Bgra) => {
285 Self::convert_nv16_to_rgba(src, dst)?;
286 Self::swizzle_rb_4chan(dst)
287 }
288 (Yuyv, Bgra) => {
289 Self::convert_yuyv_to_rgba(src, dst)?;
290 Self::swizzle_rb_4chan(dst)
291 }
292 (Vyuy, Bgra) => {
293 Self::convert_vyuy_to_rgba(src, dst)?;
294 Self::swizzle_rb_4chan(dst)
295 }
296 (Rgba, Bgra) => {
297 dst.map()?.copy_from_slice(&src.map()?);
298 Self::swizzle_rb_4chan(dst)
299 }
300 (Rgb, Bgra) => {
301 Self::convert_rgb_to_rgba(src, dst)?;
302 Self::swizzle_rb_4chan(dst)
303 }
304 (Grey, Bgra) => {
305 Self::convert_grey_to_rgba(src, dst)?;
306 Self::swizzle_rb_4chan(dst)
307 }
308 (PlanarRgb, Bgra) => {
309 Self::convert_8bps_to_rgba(src, dst)?;
310 Self::swizzle_rb_4chan(dst)
311 }
312 (PlanarRgba, Bgra) => {
313 Self::convert_prgba_to_rgba(src, dst)?;
314 Self::swizzle_rb_4chan(dst)
315 }
316
317 (s, d) => Err(Error::NotSupported(format!("Conversion from {s} to {d}",))),
318 }
319 }
320
321 pub(crate) fn fill_image_outside_crop_u8(
323 dst: &mut Tensor<u8>,
324 rgba: [u8; 4],
325 crop: Rect,
326 ) -> Result<()> {
327 let dst_fmt = dst.format().unwrap();
328 let dst_w = dst.width().unwrap();
329 let dst_h = dst.height().unwrap();
330 let mut dst_map = dst.map()?;
331 let dst_tup = (dst_map.as_mut_slice(), dst_w, dst_h);
332 Self::fill_outside_crop_dispatch(dst_tup, dst_fmt, rgba, crop)
333 }
334
335 fn fill_outside_crop_dispatch(
337 dst: (&mut [u8], usize, usize),
338 fmt: PixelFormat,
339 rgba: [u8; 4],
340 crop: Rect,
341 ) -> Result<()> {
342 use PixelFormat::*;
343 match fmt {
344 Rgba | Bgra => Self::fill_image_outside_crop_(dst, rgba, crop),
345 Rgb => Self::fill_image_outside_crop_(dst, Self::rgba_to_rgb(rgba), crop),
346 Grey => Self::fill_image_outside_crop_(dst, Self::rgba_to_grey(rgba), crop),
347 Yuyv => Self::fill_image_outside_crop_(
348 (dst.0, dst.1 / 2, dst.2),
349 Self::rgba_to_yuyv(rgba),
350 Rect::new(crop.left / 2, crop.top, crop.width.div_ceil(2), crop.height),
351 ),
352 PlanarRgb => Self::fill_image_outside_crop_planar(dst, Self::rgba_to_rgb(rgba), crop),
353 PlanarRgba => Self::fill_image_outside_crop_planar(dst, rgba, crop),
354 Nv16 => {
355 let yuyv = Self::rgba_to_yuyv(rgba);
356 Self::fill_image_outside_crop_yuv_semiplanar(dst, yuyv[0], [yuyv[1], yuyv[3]], crop)
357 }
358 _ => Err(Error::Internal(format!(
359 "Found unexpected destination {fmt}",
360 ))),
361 }
362 }
363}
364
365impl ImageProcessorTrait for CPUProcessor {
366 fn convert(
367 &mut self,
368 src: &TensorDyn,
369 dst: &mut TensorDyn,
370 rotation: Rotation,
371 flip: Flip,
372 crop: Crop,
373 ) -> Result<()> {
374 self.convert_impl(src, dst, rotation, flip, crop)
375 }
376
377 fn draw_decoded_masks(
378 &mut self,
379 dst: &mut TensorDyn,
380 detect: &[DetectBox],
381 segmentation: &[Segmentation],
382 overlay: crate::MaskOverlay<'_>,
383 ) -> Result<()> {
384 prepare_dst_base_cpu(dst, overlay.background)?;
388 let dst = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
389 self.draw_decoded_masks_impl(
390 dst,
391 detect,
392 segmentation,
393 overlay.opacity,
394 overlay.color_mode,
395 )
396 }
397
398 fn draw_proto_masks(
399 &mut self,
400 dst: &mut TensorDyn,
401 detect: &[DetectBox],
402 proto_data: &ProtoData,
403 overlay: crate::MaskOverlay<'_>,
404 ) -> Result<()> {
405 prepare_dst_base_cpu(dst, overlay.background)?;
406 let dst = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
407 self.draw_proto_masks_impl(
408 dst,
409 detect,
410 proto_data,
411 overlay.opacity,
412 overlay.letterbox,
413 overlay.color_mode,
414 )
415 }
416
417 fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()> {
418 for (c, new_c) in self.colors.iter_mut().zip(colors.iter()) {
419 *c = *new_c;
420 }
421 Ok(())
422 }
423}
424
425impl CPUProcessor {
427 pub(crate) fn convert_impl(
429 &mut self,
430 src: &TensorDyn,
431 dst: &mut TensorDyn,
432 rotation: Rotation,
433 flip: Flip,
434 crop: Crop,
435 ) -> Result<()> {
436 let src_fmt = src.format().ok_or(Error::NotAnImage)?;
437 let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
438
439 match (src.dtype(), dst.dtype()) {
440 (DType::U8, DType::U8) => {
441 let src = src.as_u8().unwrap();
442 let dst = dst.as_u8_mut().unwrap();
443 self.convert_u8(src, dst, src_fmt, dst_fmt, rotation, flip, crop)
444 }
445 (DType::U8, DType::I8) => {
446 let src_u8 = src.as_u8().unwrap();
449 let dst_i8 = dst.as_i8_mut().unwrap();
450 let dst_u8 = unsafe { &mut *(dst_i8 as *mut Tensor<i8> as *mut Tensor<u8>) };
454 self.convert_u8(src_u8, dst_u8, src_fmt, dst_fmt, rotation, flip, crop)?;
455 let mut map = dst_u8.map()?;
457 apply_int8_xor_bias(map.as_mut_slice(), dst_fmt);
458 Ok(())
459 }
460 (s, d) => Err(Error::NotSupported(format!("dtype {s} -> {d}",))),
461 }
462 }
463
464 #[allow(clippy::too_many_arguments)]
466 fn convert_u8(
467 &mut self,
468 src: &Tensor<u8>,
469 dst: &mut Tensor<u8>,
470 src_fmt: PixelFormat,
471 dst_fmt: PixelFormat,
472 rotation: Rotation,
473 flip: Flip,
474 crop: Crop,
475 ) -> Result<()> {
476 use PixelFormat::*;
477
478 let src_w = src.width().unwrap();
479 let src_h = src.height().unwrap();
480 let dst_w = dst.width().unwrap();
481 let dst_h = dst.height().unwrap();
482
483 crop.check_crop_dims(src_w, src_h, dst_w, dst_h)?;
484
485 let intermediate = match (src_fmt, dst_fmt) {
487 (Nv12, Rgb) => Rgb,
488 (Nv12, Rgba) => Rgba,
489 (Nv12, Grey) => Grey,
490 (Nv12, Yuyv) => Rgba,
491 (Nv12, Nv16) => Rgba,
492 (Nv12, PlanarRgb) => Rgb,
493 (Nv12, PlanarRgba) => Rgba,
494 (Yuyv, Rgb) => Rgb,
495 (Yuyv, Rgba) => Rgba,
496 (Yuyv, Grey) => Grey,
497 (Yuyv, Yuyv) => Rgba,
498 (Yuyv, PlanarRgb) => Rgb,
499 (Yuyv, PlanarRgba) => Rgba,
500 (Yuyv, Nv16) => Rgba,
501 (Vyuy, Rgb) => Rgb,
502 (Vyuy, Rgba) => Rgba,
503 (Vyuy, Grey) => Grey,
504 (Vyuy, Vyuy) => Rgba,
505 (Vyuy, PlanarRgb) => Rgb,
506 (Vyuy, PlanarRgba) => Rgba,
507 (Vyuy, Nv16) => Rgba,
508 (Rgba, Rgb) => Rgba,
509 (Rgba, Rgba) => Rgba,
510 (Rgba, Grey) => Grey,
511 (Rgba, Yuyv) => Rgba,
512 (Rgba, PlanarRgb) => Rgba,
513 (Rgba, PlanarRgba) => Rgba,
514 (Rgba, Nv16) => Rgba,
515 (Rgb, Rgb) => Rgb,
516 (Rgb, Rgba) => Rgb,
517 (Rgb, Grey) => Grey,
518 (Rgb, Yuyv) => Rgb,
519 (Rgb, PlanarRgb) => Rgb,
520 (Rgb, PlanarRgba) => Rgb,
521 (Rgb, Nv16) => Rgb,
522 (Grey, Rgb) => Rgb,
523 (Grey, Rgba) => Rgba,
524 (Grey, Grey) => Grey,
525 (Grey, Yuyv) => Grey,
526 (Grey, PlanarRgb) => Grey,
527 (Grey, PlanarRgba) => Grey,
528 (Grey, Nv16) => Grey,
529 (Nv12, Bgra) => Rgba,
530 (Yuyv, Bgra) => Rgba,
531 (Vyuy, Bgra) => Rgba,
532 (Rgba, Bgra) => Rgba,
533 (Rgb, Bgra) => Rgb,
534 (Grey, Bgra) => Grey,
535 (Bgra, Bgra) => Bgra,
536 (Nv16, Rgb) => Rgb,
537 (Nv16, Rgba) => Rgba,
538 (Nv16, Bgra) => Rgba,
539 (PlanarRgb, Rgb) => Rgb,
540 (PlanarRgb, Rgba) => Rgb,
541 (PlanarRgb, Bgra) => Rgb,
542 (PlanarRgba, Rgb) => Rgba,
543 (PlanarRgba, Rgba) => Rgba,
544 (PlanarRgba, Bgra) => Rgba,
545 (s, d) => {
546 return Err(Error::NotSupported(format!("Conversion from {s} to {d}",)));
547 }
548 };
549
550 let need_resize_flip_rotation = rotation != Rotation::None
551 || flip != Flip::None
552 || src_w != dst_w
553 || src_h != dst_h
554 || crop.src_rect.is_some_and(|c| {
555 c != Rect {
556 left: 0,
557 top: 0,
558 width: src_w,
559 height: src_h,
560 }
561 })
562 || crop.dst_rect.is_some_and(|c| {
563 c != Rect {
564 left: 0,
565 top: 0,
566 width: dst_w,
567 height: dst_h,
568 }
569 });
570
571 if !need_resize_flip_rotation && Self::support_conversion_pf(src_fmt, dst_fmt) {
573 return Self::convert_format_pf(src, dst, src_fmt, dst_fmt);
574 }
575
576 if dst_fmt == Yuyv && !dst_w.is_multiple_of(2) {
578 return Err(Error::NotSupported(format!(
579 "{} destination must have width divisible by 2",
580 dst_fmt,
581 )));
582 }
583
584 let mut tmp_buffer;
586 let tmp;
587 let tmp_fmt;
588 if intermediate != src_fmt {
589 let _s = tracing::trace_span!(
590 "cpu_format_convert",
591 from = ?src_fmt,
592 to = ?intermediate,
593 pass = "pre_resize",
594 )
595 .entered();
596 tmp_buffer = Tensor::<u8>::image(src_w, src_h, intermediate, Some(TensorMemory::Mem))?;
597
598 Self::convert_format_pf(src, &mut tmp_buffer, src_fmt, intermediate)?;
599 tmp = &tmp_buffer;
600 tmp_fmt = intermediate;
601 } else {
602 tmp = src;
603 tmp_fmt = src_fmt;
604 }
605
606 debug_assert!(matches!(tmp_fmt, Rgb | Rgba | Grey));
608 if tmp_fmt == dst_fmt {
609 let _s = tracing::trace_span!("cpu_resize").entered();
610 self.resize_flip_rotate_pf(tmp, dst, dst_fmt, rotation, flip, crop)?;
611 } else if !need_resize_flip_rotation {
612 let _s = tracing::trace_span!(
613 "cpu_format_convert",
614 from = ?tmp_fmt,
615 to = ?dst_fmt,
616 pass = "direct",
617 )
618 .entered();
619 Self::convert_format_pf(tmp, dst, tmp_fmt, dst_fmt)?;
620 } else {
621 let mut tmp2 = Tensor::<u8>::image(dst_w, dst_h, tmp_fmt, Some(TensorMemory::Mem))?;
622 if crop.dst_rect.is_some_and(|c| {
623 c != Rect {
624 left: 0,
625 top: 0,
626 width: dst_w,
627 height: dst_h,
628 }
629 }) && crop.dst_color.is_none()
630 {
631 Self::convert_format_pf(dst, &mut tmp2, dst_fmt, tmp_fmt)?;
632 }
633 {
634 let _s = tracing::trace_span!("cpu_resize").entered();
635 self.resize_flip_rotate_pf(tmp, &mut tmp2, tmp_fmt, rotation, flip, crop)?;
636 }
637 {
638 let _s = tracing::trace_span!(
639 "cpu_format_convert",
640 from = ?tmp_fmt,
641 to = ?dst_fmt,
642 pass = "post_resize",
643 )
644 .entered();
645 Self::convert_format_pf(&tmp2, dst, tmp_fmt, dst_fmt)?;
646 }
647 }
648 if let (Some(dst_rect), Some(dst_color)) = (crop.dst_rect, crop.dst_color) {
649 let full_rect = Rect {
650 left: 0,
651 top: 0,
652 width: dst_w,
653 height: dst_h,
654 };
655 if dst_rect != full_rect {
656 Self::fill_image_outside_crop_u8(dst, dst_color, dst_rect)?;
657 }
658 }
659
660 Ok(())
661 }
662
663 fn draw_decoded_masks_impl(
664 &mut self,
665 dst: &mut Tensor<u8>,
666 detect: &[DetectBox],
667 segmentation: &[Segmentation],
668 opacity: f32,
669 color_mode: crate::ColorMode,
670 ) -> Result<()> {
671 let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
672 if !matches!(dst_fmt, PixelFormat::Rgba | PixelFormat::Rgb) {
673 return Err(crate::Error::NotSupported(
674 "CPU image rendering only supports RGBA or RGB images".to_string(),
675 ));
676 }
677
678 let _timer = FunctionTimer::new("CPUProcessor::draw_decoded_masks");
679
680 let dst_w = dst.width().unwrap();
681 let dst_h = dst.height().unwrap();
682 let dst_rs = row_stride_for(dst_w, dst_fmt);
683 let dst_c = dst_fmt.channels();
684
685 let mut map = dst.map()?;
686 let dst_slice = map.as_mut_slice();
687
688 self.render_box(dst_w, dst_h, dst_rs, dst_c, dst_slice, detect, color_mode)?;
689
690 if segmentation.is_empty() {
691 return Ok(());
692 }
693
694 let is_semantic = segmentation[0].segmentation.shape()[2] > 1;
697
698 if is_semantic {
699 self.render_modelpack_segmentation(
700 dst_w,
701 dst_h,
702 dst_rs,
703 dst_c,
704 dst_slice,
705 &segmentation[0],
706 opacity,
707 )?;
708 } else {
709 for (idx, (seg, det)) in segmentation.iter().zip(detect).enumerate() {
710 let color_index = color_mode.index(idx, det.label);
711 self.render_yolo_segmentation(
712 dst_w,
713 dst_h,
714 dst_rs,
715 dst_c,
716 dst_slice,
717 seg,
718 color_index,
719 opacity,
720 )?;
721 }
722 }
723
724 Ok(())
725 }
726
727 fn draw_proto_masks_impl(
728 &mut self,
729 dst: &mut Tensor<u8>,
730 detect: &[DetectBox],
731 proto_data: &ProtoData,
732 opacity: f32,
733 letterbox: Option<[f32; 4]>,
734 color_mode: crate::ColorMode,
735 ) -> Result<()> {
736 let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
737 if !matches!(dst_fmt, PixelFormat::Rgba | PixelFormat::Rgb) {
738 return Err(crate::Error::NotSupported(
739 "CPU image rendering only supports RGBA or RGB images".to_string(),
740 ));
741 }
742
743 let _timer = FunctionTimer::new("CPUProcessor::draw_proto_masks");
744
745 let dst_w = dst.width().unwrap();
746 let dst_h = dst.height().unwrap();
747 let dst_rs = row_stride_for(dst_w, dst_fmt);
748 let channels = dst_fmt.channels();
749
750 let mut map = dst.map()?;
751 let dst_slice = map.as_mut_slice();
752
753 self.render_box(
754 dst_w, dst_h, dst_rs, channels, dst_slice, detect, color_mode,
755 )?;
756
757 if detect.is_empty() {
758 return Ok(());
759 }
760 let proto_shape = proto_data.protos.shape();
761 if proto_shape.len() != 3 {
762 return Err(Error::InvalidShape(format!(
763 "protos tensor must be rank-3, got {proto_shape:?}"
764 )));
765 }
766 let proto_h = proto_shape[0];
767 let proto_w = proto_shape[1];
768 let num_protos = proto_shape[2];
769 let coeff_shape = proto_data.mask_coefficients.shape();
770 if coeff_shape.len() != 2 {
771 return Err(Error::InvalidShape(format!(
772 "mask_coefficients tensor must be rank-2, got {coeff_shape:?}"
773 )));
774 }
775 if coeff_shape[0] == 0 {
777 return Ok(());
778 }
779 if coeff_shape[1] != num_protos {
780 return Err(Error::InvalidShape(format!(
781 "mask_coefficients second dimension must match num_protos \
782 ({num_protos}), got {coeff_shape:?}"
783 )));
784 }
785
786 let coeff_f32: Vec<f32> = match proto_data.mask_coefficients.dtype() {
788 DType::F32 => {
789 let t = proto_data.mask_coefficients.as_f32().expect("F32");
790 let m = t.map()?;
791 m.as_slice().to_vec()
792 }
793 DType::F16 => {
794 let t = proto_data.mask_coefficients.as_f16().expect("F16");
795 let m = t.map()?;
796 m.as_slice().iter().map(|v| v.to_f32()).collect()
797 }
798 DType::I8 => {
799 let t = proto_data.mask_coefficients.as_i8().expect("I8");
800 let m = t.map()?;
801 if let Some(q) = t.quantization() {
802 use edgefirst_tensor::QuantMode;
803 let (scale, zp) = match q.mode() {
804 QuantMode::PerTensor { scale, zero_point } => (scale, zero_point as f32),
805 QuantMode::PerTensorSymmetric { scale } => (scale, 0.0),
806 other => {
807 return Err(Error::NotSupported(format!(
808 "I8 mask_coefficients quantization mode {other:?} not supported"
809 )));
810 }
811 };
812 m.as_slice()
813 .iter()
814 .map(|&v| (v as f32 - zp) * scale)
815 .collect()
816 } else {
817 m.as_slice().iter().map(|&v| v as f32).collect()
818 }
819 }
820 other => {
821 return Err(Error::InvalidShape(format!(
822 "mask_coefficients dtype {other:?} not supported"
823 )));
824 }
825 };
826
827 let (lx0, lx_range, ly0, ly_range) = match letterbox {
829 Some([lx0, ly0, lx1, ly1]) => (lx0, lx1 - lx0, ly0, ly1 - ly0),
830 None => (0.0_f32, 1.0_f32, 0.0_f32, 1.0_f32),
831 };
832
833 match proto_data.protos.dtype() {
836 DType::F32 => {
837 let t = proto_data.protos.as_f32().expect("F32");
838 let m = t.map()?;
839 self.draw_proto_masks_inner(
840 dst_slice,
841 dst_w,
842 dst_h,
843 dst_rs,
844 channels,
845 detect,
846 m.as_slice(),
847 &coeff_f32,
848 proto_h,
849 proto_w,
850 num_protos,
851 opacity,
852 (lx0, lx_range, ly0, ly_range),
853 color_mode,
854 0.0_f32,
855 |p: &f32, _| *p,
856 );
857 }
858 DType::F16 => {
859 let t = proto_data.protos.as_f16().expect("F16");
860 let m = t.map()?;
861 self.draw_proto_masks_inner(
862 dst_slice,
863 dst_w,
864 dst_h,
865 dst_rs,
866 channels,
867 detect,
868 m.as_slice(),
869 &coeff_f32,
870 proto_h,
871 proto_w,
872 num_protos,
873 opacity,
874 (lx0, lx_range, ly0, ly_range),
875 color_mode,
876 0.0_f32,
877 |p: &half::f16, _| p.to_f32(),
878 );
879 }
880 DType::I8 => {
881 use edgefirst_tensor::QuantMode;
882 let t = proto_data.protos.as_i8().expect("I8");
883 let m = t.map()?;
884 let quant = t.quantization().ok_or_else(|| {
885 Error::InvalidShape("I8 protos require quantization metadata".into())
886 })?;
887 let (scale, zp) = match quant.mode() {
888 QuantMode::PerTensor { scale, zero_point } => (scale, zero_point as f32),
889 QuantMode::PerTensorSymmetric { scale } => (scale, 0.0),
890 QuantMode::PerChannel { axis, .. }
891 | QuantMode::PerChannelSymmetric { axis, .. } => {
892 return Err(Error::NotSupported(format!(
893 "per-channel quantization (axis={axis}) in draw_proto_masks \
894 CPU path not yet supported"
895 )));
896 }
897 };
898 self.draw_proto_masks_inner(
899 dst_slice,
900 dst_w,
901 dst_h,
902 dst_rs,
903 channels,
904 detect,
905 m.as_slice(),
906 &coeff_f32,
907 proto_h,
908 proto_w,
909 num_protos,
910 opacity,
911 (lx0, lx_range, ly0, ly_range),
912 color_mode,
913 scale,
914 move |p: &i8, _| (*p as f32) - zp,
915 );
916 }
917 other => {
918 return Err(Error::InvalidShape(format!(
919 "proto tensor dtype {other:?} not supported"
920 )));
921 }
922 }
923
924 Ok(())
925 }
926
927 #[allow(clippy::too_many_arguments)]
928 fn draw_proto_masks_inner<P: Copy>(
929 &self,
930 dst_slice: &mut [u8],
931 dst_w: usize,
932 dst_h: usize,
933 dst_rs: usize,
934 channels: usize,
935 detect: &[DetectBox],
936 protos: &[P],
937 coeff_all_f32: &[f32],
938 proto_h: usize,
939 proto_w: usize,
940 num_protos: usize,
941 opacity: f32,
942 letterbox_xy: (f32, f32, f32, f32),
943 color_mode: crate::ColorMode,
944 acc_scale: f32,
945 load_f32: impl Fn(&P, f32) -> f32 + Copy,
946 ) {
947 let (lx0, lx_range, ly0, ly_range) = letterbox_xy;
948 let stride_y = proto_w * num_protos;
949 for (idx, det) in detect.iter().enumerate() {
950 let coeff = &coeff_all_f32[idx * num_protos..(idx + 1) * num_protos];
951 let color_index = color_mode.index(idx, det.label);
952 let color = self.colors[color_index % self.colors.len()];
953 let alpha = if opacity == 1.0 {
954 color[3] as u16
955 } else {
956 (color[3] as f32 * opacity).round() as u16
957 };
958
959 let start_x = (dst_w as f32 * det.bbox.xmin).round() as usize;
960 let start_y = (dst_h as f32 * det.bbox.ymin).round() as usize;
961 let end_x = ((dst_w as f32 * det.bbox.xmax).round() as usize).min(dst_w);
962 let end_y = ((dst_h as f32 * det.bbox.ymax).round() as usize).min(dst_h);
963
964 for y in start_y..end_y {
965 for x in start_x..end_x {
966 let px = (lx0 + (x as f32 / dst_w as f32) * lx_range) * proto_w as f32 - 0.5;
967 let py = (ly0 + (y as f32 / dst_h as f32) * ly_range) * proto_h as f32 - 0.5;
968
969 let x0 = (px.floor() as isize).clamp(0, proto_w as isize - 1) as usize;
973 let y0 = (py.floor() as isize).clamp(0, proto_h as isize - 1) as usize;
974 let x1 = (x0 + 1).min(proto_w - 1);
975 let y1 = (y0 + 1).min(proto_h - 1);
976 let fx = px - px.floor();
977 let fy = py - py.floor();
978 let w00 = (1.0 - fx) * (1.0 - fy);
979 let w10 = fx * (1.0 - fy);
980 let w01 = (1.0 - fx) * fy;
981 let w11 = fx * fy;
982 let b00 = y0 * stride_y + x0 * num_protos;
983 let b10 = y0 * stride_y + x1 * num_protos;
984 let b01 = y1 * stride_y + x0 * num_protos;
985 let b11 = y1 * stride_y + x1 * num_protos;
986 let mut acc = 0.0_f32;
987 for p in 0..num_protos {
988 let v00 = load_f32(&protos[b00 + p], 0.0);
989 let v10 = load_f32(&protos[b10 + p], 0.0);
990 let v01 = load_f32(&protos[b01 + p], 0.0);
991 let v11 = load_f32(&protos[b11 + p], 0.0);
992 let val = w00 * v00 + w10 * v10 + w01 * v01 + w11 * v11;
993 acc += coeff[p] * val;
994 }
995 let final_acc = if acc_scale == 0.0 {
996 acc
997 } else {
998 acc_scale * acc
999 };
1000 let mask = 1.0 / (1.0 + (-final_acc).exp());
1004 if mask < 0.5 {
1005 continue;
1006 }
1007 let dst_index = y * dst_rs + x * channels;
1008 for c in 0..3 {
1009 dst_slice[dst_index + c] = ((color[c] as u16 * alpha
1010 + dst_slice[dst_index + c] as u16 * (255 - alpha))
1011 / 255) as u8;
1012 }
1013 }
1014 }
1015 }
1016 }
1017}