1use crate::{Crop, Error, Flip, FunctionTimer, ImageProcessorTrait, Rect, Result, Rotation};
5use edgefirst_decoder::{DetectBox, ProtoData, Segmentation};
6use edgefirst_tensor::{
7 DType, PixelFormat, Tensor, TensorDyn, TensorMapTrait, TensorMemory, TensorTrait,
8};
9
10mod convert;
11mod masks;
12mod resize;
13mod tests;
14
15#[derive(Debug, Clone)]
21pub struct CPUProcessor {
22 resizer: fast_image_resize::Resizer,
23 options: fast_image_resize::ResizeOptions,
24 colors: [[u8; 4]; 20],
25}
26
27unsafe impl Send for CPUProcessor {}
28unsafe impl Sync for CPUProcessor {}
29
30impl Default for CPUProcessor {
31 fn default() -> Self {
32 Self::new_bilinear()
33 }
34}
35
36fn prepare_dst_base_cpu(dst: &mut TensorDyn, background: Option<&TensorDyn>) -> Result<()> {
47 match background {
48 Some(bg) => {
49 if bg.shape() != dst.shape() {
50 return Err(Error::InvalidShape(
51 "background shape does not match dst".into(),
52 ));
53 }
54 if bg.format() != dst.format() {
55 return Err(Error::InvalidShape(
56 "background pixel format does not match dst".into(),
57 ));
58 }
59 let bg_u8 = bg.as_u8().ok_or(Error::NotAnImage)?;
60 let dst_u8 = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
61 let bg_map = bg_u8.map()?;
62 let mut dst_map = dst_u8.map()?;
63 let bg_slice = bg_map.as_slice();
64 let dst_slice = dst_map.as_mut_slice();
65 if bg_slice.len() != dst_slice.len() {
66 return Err(Error::InvalidShape(
67 "background buffer size does not match dst".into(),
68 ));
69 }
70 dst_slice.copy_from_slice(bg_slice);
71 }
72 None => {
73 let dst_u8 = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
74 let mut dst_map = dst_u8.map()?;
75 dst_map.as_mut_slice().fill(0);
76 }
77 }
78 Ok(())
79}
80
81fn row_stride_for(width: usize, fmt: PixelFormat) -> usize {
83 use edgefirst_tensor::PixelLayout;
84 match fmt.layout() {
85 PixelLayout::Packed => width * fmt.channels(),
86 PixelLayout::Planar | PixelLayout::SemiPlanar => width,
87 _ => width, }
89}
90
91fn tensor_row_stride(tensor: &Tensor<u8>) -> usize {
96 tensor.effective_row_stride().unwrap_or_else(|| {
97 let w = tensor.width().unwrap_or(0);
98 let fmt = tensor.format().unwrap_or(PixelFormat::Rgb);
99 row_stride_for(w, fmt)
100 })
101}
102
103pub(crate) fn apply_int8_xor_bias(data: &mut [u8], fmt: PixelFormat) {
108 use edgefirst_tensor::PixelLayout;
109 if !fmt.has_alpha() {
110 for b in data.iter_mut() {
111 *b ^= 0x80;
112 }
113 } else if fmt.layout() == PixelLayout::Planar {
114 let channels = fmt.channels();
116 let plane_size = data.len() / channels;
117 for b in data[..plane_size * (channels - 1)].iter_mut() {
118 *b ^= 0x80;
119 }
120 } else {
121 let channels = fmt.channels();
123 for pixel in data.chunks_exact_mut(channels) {
124 for b in &mut pixel[..channels - 1] {
125 *b ^= 0x80;
126 }
127 }
128 }
129}
130
131impl CPUProcessor {
132 pub fn new() -> Self {
134 Self::new_bilinear()
135 }
136
137 fn new_bilinear() -> Self {
139 let resizer = fast_image_resize::Resizer::new();
140 let options = fast_image_resize::ResizeOptions::new()
141 .resize_alg(fast_image_resize::ResizeAlg::Convolution(
142 fast_image_resize::FilterType::Bilinear,
143 ))
144 .use_alpha(false);
145
146 log::debug!("CPUConverter created");
147 Self {
148 resizer,
149 options,
150 colors: crate::DEFAULT_COLORS_U8,
151 }
152 }
153
154 pub fn new_nearest() -> Self {
156 let resizer = fast_image_resize::Resizer::new();
157 let options = fast_image_resize::ResizeOptions::new()
158 .resize_alg(fast_image_resize::ResizeAlg::Nearest)
159 .use_alpha(false);
160 log::debug!("CPUConverter created");
161 Self {
162 resizer,
163 options,
164 colors: crate::DEFAULT_COLORS_U8,
165 }
166 }
167
168 pub(crate) fn support_conversion_pf(src: PixelFormat, dst: PixelFormat) -> bool {
169 use PixelFormat::*;
170 matches!(
171 (src, dst),
172 (Nv12, Rgb)
173 | (Nv12, Rgba)
174 | (Nv12, Grey)
175 | (Nv16, Rgb)
176 | (Nv16, Rgba)
177 | (Nv16, Bgra)
178 | (Yuyv, Rgb)
179 | (Yuyv, Rgba)
180 | (Yuyv, Grey)
181 | (Yuyv, Yuyv)
182 | (Yuyv, PlanarRgb)
183 | (Yuyv, PlanarRgba)
184 | (Yuyv, Nv16)
185 | (Vyuy, Rgb)
186 | (Vyuy, Rgba)
187 | (Vyuy, Grey)
188 | (Vyuy, Vyuy)
189 | (Vyuy, PlanarRgb)
190 | (Vyuy, PlanarRgba)
191 | (Vyuy, Nv16)
192 | (Rgba, Rgb)
193 | (Rgba, Rgba)
194 | (Rgba, Grey)
195 | (Rgba, Yuyv)
196 | (Rgba, PlanarRgb)
197 | (Rgba, PlanarRgba)
198 | (Rgba, Nv16)
199 | (Rgb, Rgb)
200 | (Rgb, Rgba)
201 | (Rgb, Grey)
202 | (Rgb, Yuyv)
203 | (Rgb, PlanarRgb)
204 | (Rgb, PlanarRgba)
205 | (Rgb, Nv16)
206 | (Grey, Rgb)
207 | (Grey, Rgba)
208 | (Grey, Grey)
209 | (Grey, Yuyv)
210 | (Grey, PlanarRgb)
211 | (Grey, PlanarRgba)
212 | (Grey, Nv16)
213 | (Nv12, Bgra)
214 | (Yuyv, Bgra)
215 | (Vyuy, Bgra)
216 | (Rgba, Bgra)
217 | (Rgb, Bgra)
218 | (Grey, Bgra)
219 | (Bgra, Bgra)
220 | (PlanarRgb, Rgb)
221 | (PlanarRgb, Rgba)
222 | (PlanarRgba, Rgb)
223 | (PlanarRgba, Rgba)
224 | (PlanarRgb, Bgra)
225 | (PlanarRgba, Bgra)
226 )
227 }
228
229 pub(crate) fn convert_format_pf(
231 src: &Tensor<u8>,
232 dst: &mut Tensor<u8>,
233 src_fmt: PixelFormat,
234 dst_fmt: PixelFormat,
235 ) -> Result<()> {
236 let _timer = FunctionTimer::new(format!(
237 "ImageProcessor::convert_format {} to {}",
238 src_fmt, dst_fmt,
239 ));
240
241 use PixelFormat::*;
242 match (src_fmt, dst_fmt) {
243 (Nv12, Rgb) => Self::convert_nv12_to_rgb(src, dst),
244 (Nv12, Rgba) => Self::convert_nv12_to_rgba(src, dst),
245 (Nv12, Grey) => Self::convert_nv12_to_grey(src, dst),
246 (Yuyv, Rgb) => Self::convert_yuyv_to_rgb(src, dst),
247 (Yuyv, Rgba) => Self::convert_yuyv_to_rgba(src, dst),
248 (Yuyv, Grey) => Self::convert_yuyv_to_grey(src, dst),
249 (Yuyv, Yuyv) => Self::copy_image(src, dst),
250 (Yuyv, PlanarRgb) => Self::convert_yuyv_to_8bps(src, dst),
251 (Yuyv, PlanarRgba) => Self::convert_yuyv_to_prgba(src, dst),
252 (Yuyv, Nv16) => Self::convert_yuyv_to_nv16(src, dst),
253 (Vyuy, Rgb) => Self::convert_vyuy_to_rgb(src, dst),
254 (Vyuy, Rgba) => Self::convert_vyuy_to_rgba(src, dst),
255 (Vyuy, Grey) => Self::convert_vyuy_to_grey(src, dst),
256 (Vyuy, Vyuy) => Self::copy_image(src, dst),
257 (Vyuy, PlanarRgb) => Self::convert_vyuy_to_8bps(src, dst),
258 (Vyuy, PlanarRgba) => Self::convert_vyuy_to_prgba(src, dst),
259 (Vyuy, Nv16) => Self::convert_vyuy_to_nv16(src, dst),
260 (Rgba, Rgb) => Self::convert_rgba_to_rgb(src, dst),
261 (Rgba, Rgba) => Self::copy_image(src, dst),
262 (Rgba, Grey) => Self::convert_rgba_to_grey(src, dst),
263 (Rgba, Yuyv) => Self::convert_rgba_to_yuyv(src, dst),
264 (Rgba, PlanarRgb) => Self::convert_rgba_to_8bps(src, dst),
265 (Rgba, PlanarRgba) => Self::convert_rgba_to_prgba(src, dst),
266 (Rgba, Nv16) => Self::convert_rgba_to_nv16(src, dst),
267 (Rgb, Rgb) => Self::copy_image(src, dst),
268 (Rgb, Rgba) => Self::convert_rgb_to_rgba(src, dst),
269 (Rgb, Grey) => Self::convert_rgb_to_grey(src, dst),
270 (Rgb, Yuyv) => Self::convert_rgb_to_yuyv(src, dst),
271 (Rgb, PlanarRgb) => Self::convert_rgb_to_8bps(src, dst),
272 (Rgb, PlanarRgba) => Self::convert_rgb_to_prgba(src, dst),
273 (Rgb, Nv16) => Self::convert_rgb_to_nv16(src, dst),
274 (Grey, Rgb) => Self::convert_grey_to_rgb(src, dst),
275 (Grey, Rgba) => Self::convert_grey_to_rgba(src, dst),
276 (Grey, Grey) => Self::copy_image(src, dst),
277 (Grey, Yuyv) => Self::convert_grey_to_yuyv(src, dst),
278 (Grey, PlanarRgb) => Self::convert_grey_to_8bps(src, dst),
279 (Grey, PlanarRgba) => Self::convert_grey_to_prgba(src, dst),
280 (Grey, Nv16) => Self::convert_grey_to_nv16(src, dst),
281
282 (Nv16, Rgb) => Self::convert_nv16_to_rgb(src, dst),
284 (Nv16, Rgba) => Self::convert_nv16_to_rgba(src, dst),
285 (PlanarRgb, Rgb) => Self::convert_8bps_to_rgb(src, dst),
286 (PlanarRgb, Rgba) => Self::convert_8bps_to_rgba(src, dst),
287 (PlanarRgba, Rgb) => Self::convert_prgba_to_rgb(src, dst),
288 (PlanarRgba, Rgba) => Self::convert_prgba_to_rgba(src, dst),
289
290 (Bgra, Bgra) => Self::copy_image(src, dst),
292 (Nv12, Bgra) => {
293 Self::convert_nv12_to_rgba(src, dst)?;
294 Self::swizzle_rb_4chan(dst)
295 }
296 (Nv16, Bgra) => {
297 Self::convert_nv16_to_rgba(src, dst)?;
298 Self::swizzle_rb_4chan(dst)
299 }
300 (Yuyv, Bgra) => {
301 Self::convert_yuyv_to_rgba(src, dst)?;
302 Self::swizzle_rb_4chan(dst)
303 }
304 (Vyuy, Bgra) => {
305 Self::convert_vyuy_to_rgba(src, dst)?;
306 Self::swizzle_rb_4chan(dst)
307 }
308 (Rgba, Bgra) => {
309 dst.map()?.copy_from_slice(&src.map()?);
310 Self::swizzle_rb_4chan(dst)
311 }
312 (Rgb, Bgra) => {
313 Self::convert_rgb_to_rgba(src, dst)?;
314 Self::swizzle_rb_4chan(dst)
315 }
316 (Grey, Bgra) => {
317 Self::convert_grey_to_rgba(src, dst)?;
318 Self::swizzle_rb_4chan(dst)
319 }
320 (PlanarRgb, Bgra) => {
321 Self::convert_8bps_to_rgba(src, dst)?;
322 Self::swizzle_rb_4chan(dst)
323 }
324 (PlanarRgba, Bgra) => {
325 Self::convert_prgba_to_rgba(src, dst)?;
326 Self::swizzle_rb_4chan(dst)
327 }
328
329 (s, d) => Err(Error::NotSupported(format!("Conversion from {s} to {d}",))),
330 }
331 }
332
333 pub(crate) fn fill_image_outside_crop_u8(
335 dst: &mut Tensor<u8>,
336 rgba: [u8; 4],
337 crop: Rect,
338 ) -> Result<()> {
339 let dst_fmt = dst.format().unwrap();
340 let dst_w = dst.width().unwrap();
341 let dst_h = dst.height().unwrap();
342 let mut dst_map = dst.map()?;
343 let dst_tup = (dst_map.as_mut_slice(), dst_w, dst_h);
344 Self::fill_outside_crop_dispatch(dst_tup, dst_fmt, rgba, crop)
345 }
346
347 fn fill_outside_crop_dispatch(
349 dst: (&mut [u8], usize, usize),
350 fmt: PixelFormat,
351 rgba: [u8; 4],
352 crop: Rect,
353 ) -> Result<()> {
354 use PixelFormat::*;
355 match fmt {
356 Rgba | Bgra => Self::fill_image_outside_crop_(dst, rgba, crop),
357 Rgb => Self::fill_image_outside_crop_(dst, Self::rgba_to_rgb(rgba), crop),
358 Grey => Self::fill_image_outside_crop_(dst, Self::rgba_to_grey(rgba), crop),
359 Yuyv => Self::fill_image_outside_crop_(
360 (dst.0, dst.1 / 2, dst.2),
361 Self::rgba_to_yuyv(rgba),
362 Rect::new(crop.left / 2, crop.top, crop.width.div_ceil(2), crop.height),
363 ),
364 PlanarRgb => Self::fill_image_outside_crop_planar(dst, Self::rgba_to_rgb(rgba), crop),
365 PlanarRgba => Self::fill_image_outside_crop_planar(dst, rgba, crop),
366 Nv16 => {
367 let yuyv = Self::rgba_to_yuyv(rgba);
368 Self::fill_image_outside_crop_yuv_semiplanar(dst, yuyv[0], [yuyv[1], yuyv[3]], crop)
369 }
370 _ => Err(Error::Internal(format!(
371 "Found unexpected destination {fmt}",
372 ))),
373 }
374 }
375}
376
377impl ImageProcessorTrait for CPUProcessor {
378 fn convert(
379 &mut self,
380 src: &TensorDyn,
381 dst: &mut TensorDyn,
382 rotation: Rotation,
383 flip: Flip,
384 crop: Crop,
385 ) -> Result<()> {
386 self.convert_impl(src, dst, rotation, flip, crop)
387 }
388
389 fn draw_decoded_masks(
390 &mut self,
391 dst: &mut TensorDyn,
392 detect: &[DetectBox],
393 segmentation: &[Segmentation],
394 overlay: crate::MaskOverlay<'_>,
395 ) -> Result<()> {
396 prepare_dst_base_cpu(dst, overlay.background)?;
400 let dst = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
401 self.draw_decoded_masks_impl(
402 dst,
403 detect,
404 segmentation,
405 overlay.opacity,
406 overlay.color_mode,
407 )
408 }
409
410 fn draw_proto_masks(
411 &mut self,
412 dst: &mut TensorDyn,
413 detect: &[DetectBox],
414 proto_data: &ProtoData,
415 overlay: crate::MaskOverlay<'_>,
416 ) -> Result<()> {
417 prepare_dst_base_cpu(dst, overlay.background)?;
418 let dst = dst.as_u8_mut().ok_or(Error::NotAnImage)?;
419 self.draw_proto_masks_impl(
420 dst,
421 detect,
422 proto_data,
423 overlay.opacity,
424 overlay.letterbox,
425 overlay.color_mode,
426 )
427 }
428
429 fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<()> {
430 for (c, new_c) in self.colors.iter_mut().zip(colors.iter()) {
431 *c = *new_c;
432 }
433 Ok(())
434 }
435}
436
437impl CPUProcessor {
439 pub(crate) fn convert_impl(
441 &mut self,
442 src: &TensorDyn,
443 dst: &mut TensorDyn,
444 rotation: Rotation,
445 flip: Flip,
446 crop: Crop,
447 ) -> Result<()> {
448 let src_fmt = src.format().ok_or(Error::NotAnImage)?;
449 let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
450
451 match (src.dtype(), dst.dtype()) {
452 (DType::U8, DType::U8) => {
453 let src = src.as_u8().unwrap();
454 let dst = dst.as_u8_mut().unwrap();
455 self.convert_u8(src, dst, src_fmt, dst_fmt, rotation, flip, crop)
456 }
457 (DType::U8, DType::I8) => {
458 let src_u8 = src.as_u8().unwrap();
461 let dst_i8 = dst.as_i8_mut().unwrap();
462 let dst_u8 = unsafe { &mut *(dst_i8 as *mut Tensor<i8> as *mut Tensor<u8>) };
466 self.convert_u8(src_u8, dst_u8, src_fmt, dst_fmt, rotation, flip, crop)?;
467 let mut map = dst_u8.map()?;
469 apply_int8_xor_bias(map.as_mut_slice(), dst_fmt);
470 Ok(())
471 }
472 (s, d) => Err(Error::NotSupported(format!("dtype {s} -> {d}",))),
473 }
474 }
475
476 #[allow(clippy::too_many_arguments)]
478 fn convert_u8(
479 &mut self,
480 src: &Tensor<u8>,
481 dst: &mut Tensor<u8>,
482 src_fmt: PixelFormat,
483 dst_fmt: PixelFormat,
484 rotation: Rotation,
485 flip: Flip,
486 crop: Crop,
487 ) -> Result<()> {
488 use PixelFormat::*;
489
490 let src_w = src.width().unwrap();
491 let src_h = src.height().unwrap();
492 let dst_w = dst.width().unwrap();
493 let dst_h = dst.height().unwrap();
494
495 crop.check_crop_dims(src_w, src_h, dst_w, dst_h)?;
496
497 let intermediate = match (src_fmt, dst_fmt) {
499 (Nv12, Rgb) => Rgb,
500 (Nv12, Rgba) => Rgba,
501 (Nv12, Grey) => Grey,
502 (Nv12, Yuyv) => Rgba,
503 (Nv12, Nv16) => Rgba,
504 (Nv12, PlanarRgb) => Rgb,
505 (Nv12, PlanarRgba) => Rgba,
506 (Yuyv, Rgb) => Rgb,
507 (Yuyv, Rgba) => Rgba,
508 (Yuyv, Grey) => Grey,
509 (Yuyv, Yuyv) => Rgba,
510 (Yuyv, PlanarRgb) => Rgb,
511 (Yuyv, PlanarRgba) => Rgba,
512 (Yuyv, Nv16) => Rgba,
513 (Vyuy, Rgb) => Rgb,
514 (Vyuy, Rgba) => Rgba,
515 (Vyuy, Grey) => Grey,
516 (Vyuy, Vyuy) => Rgba,
517 (Vyuy, PlanarRgb) => Rgb,
518 (Vyuy, PlanarRgba) => Rgba,
519 (Vyuy, Nv16) => Rgba,
520 (Rgba, Rgb) => Rgba,
521 (Rgba, Rgba) => Rgba,
522 (Rgba, Grey) => Grey,
523 (Rgba, Yuyv) => Rgba,
524 (Rgba, PlanarRgb) => Rgba,
525 (Rgba, PlanarRgba) => Rgba,
526 (Rgba, Nv16) => Rgba,
527 (Rgb, Rgb) => Rgb,
528 (Rgb, Rgba) => Rgb,
529 (Rgb, Grey) => Grey,
530 (Rgb, Yuyv) => Rgb,
531 (Rgb, PlanarRgb) => Rgb,
532 (Rgb, PlanarRgba) => Rgb,
533 (Rgb, Nv16) => Rgb,
534 (Grey, Rgb) => Rgb,
535 (Grey, Rgba) => Rgba,
536 (Grey, Grey) => Grey,
537 (Grey, Yuyv) => Grey,
538 (Grey, PlanarRgb) => Grey,
539 (Grey, PlanarRgba) => Grey,
540 (Grey, Nv16) => Grey,
541 (Nv12, Bgra) => Rgba,
542 (Yuyv, Bgra) => Rgba,
543 (Vyuy, Bgra) => Rgba,
544 (Rgba, Bgra) => Rgba,
545 (Rgb, Bgra) => Rgb,
546 (Grey, Bgra) => Grey,
547 (Bgra, Bgra) => Bgra,
548 (Nv16, Rgb) => Rgb,
549 (Nv16, Rgba) => Rgba,
550 (Nv16, Bgra) => Rgba,
551 (PlanarRgb, Rgb) => Rgb,
552 (PlanarRgb, Rgba) => Rgb,
553 (PlanarRgb, Bgra) => Rgb,
554 (PlanarRgba, Rgb) => Rgba,
555 (PlanarRgba, Rgba) => Rgba,
556 (PlanarRgba, Bgra) => Rgba,
557 (s, d) => {
558 return Err(Error::NotSupported(format!("Conversion from {s} to {d}",)));
559 }
560 };
561
562 let need_resize_flip_rotation = rotation != Rotation::None
563 || flip != Flip::None
564 || src_w != dst_w
565 || src_h != dst_h
566 || crop.src_rect.is_some_and(|c| {
567 c != Rect {
568 left: 0,
569 top: 0,
570 width: src_w,
571 height: src_h,
572 }
573 })
574 || crop.dst_rect.is_some_and(|c| {
575 c != Rect {
576 left: 0,
577 top: 0,
578 width: dst_w,
579 height: dst_h,
580 }
581 });
582
583 if !need_resize_flip_rotation && Self::support_conversion_pf(src_fmt, dst_fmt) {
585 return Self::convert_format_pf(src, dst, src_fmt, dst_fmt);
586 }
587
588 if dst_fmt == Yuyv && !dst_w.is_multiple_of(2) {
590 return Err(Error::NotSupported(format!(
591 "{} destination must have width divisible by 2",
592 dst_fmt,
593 )));
594 }
595
596 let mut tmp_buffer;
598 let tmp;
599 let tmp_fmt;
600 if intermediate != src_fmt {
601 let _s = tracing::trace_span!(
602 "image.convert.cpu.format_convert",
603 from = ?src_fmt,
604 to = ?intermediate,
605 pass = "pre_resize",
606 )
607 .entered();
608 tmp_buffer = Tensor::<u8>::image(src_w, src_h, intermediate, Some(TensorMemory::Mem))?;
609
610 Self::convert_format_pf(src, &mut tmp_buffer, src_fmt, intermediate)?;
611 tmp = &tmp_buffer;
612 tmp_fmt = intermediate;
613 } else {
614 tmp = src;
615 tmp_fmt = src_fmt;
616 }
617
618 debug_assert!(matches!(tmp_fmt, Rgb | Rgba | Grey));
620 if tmp_fmt == dst_fmt {
621 let _s = tracing::trace_span!("image.convert.cpu.resize_flip_rotate").entered();
622 self.resize_flip_rotate_pf(tmp, dst, dst_fmt, rotation, flip, crop)?;
623 } else if !need_resize_flip_rotation {
624 let _s = tracing::trace_span!(
625 "image.convert.cpu.format_convert",
626 from = ?tmp_fmt,
627 to = ?dst_fmt,
628 pass = "direct",
629 )
630 .entered();
631 Self::convert_format_pf(tmp, dst, tmp_fmt, dst_fmt)?;
632 } else {
633 let mut tmp2 = Tensor::<u8>::image(dst_w, dst_h, tmp_fmt, Some(TensorMemory::Mem))?;
634 if crop.dst_rect.is_some_and(|c| {
635 c != Rect {
636 left: 0,
637 top: 0,
638 width: dst_w,
639 height: dst_h,
640 }
641 }) && crop.dst_color.is_none()
642 {
643 Self::convert_format_pf(dst, &mut tmp2, dst_fmt, tmp_fmt)?;
644 }
645 {
646 let _s = tracing::trace_span!("image.convert.cpu.resize_flip_rotate").entered();
647 self.resize_flip_rotate_pf(tmp, &mut tmp2, tmp_fmt, rotation, flip, crop)?;
648 }
649 {
650 let _s = tracing::trace_span!(
651 "image.convert.cpu.format_convert",
652 from = ?tmp_fmt,
653 to = ?dst_fmt,
654 pass = "post_resize",
655 )
656 .entered();
657 Self::convert_format_pf(&tmp2, dst, tmp_fmt, dst_fmt)?;
658 }
659 }
660 if let (Some(dst_rect), Some(dst_color)) = (crop.dst_rect, crop.dst_color) {
661 let full_rect = Rect {
662 left: 0,
663 top: 0,
664 width: dst_w,
665 height: dst_h,
666 };
667 if dst_rect != full_rect {
668 Self::fill_image_outside_crop_u8(dst, dst_color, dst_rect)?;
669 }
670 }
671
672 Ok(())
673 }
674
675 fn draw_decoded_masks_impl(
676 &mut self,
677 dst: &mut Tensor<u8>,
678 detect: &[DetectBox],
679 segmentation: &[Segmentation],
680 opacity: f32,
681 color_mode: crate::ColorMode,
682 ) -> Result<()> {
683 let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
684 if !matches!(dst_fmt, PixelFormat::Rgba | PixelFormat::Rgb) {
685 return Err(crate::Error::NotSupported(
686 "CPU image rendering only supports RGBA or RGB images".to_string(),
687 ));
688 }
689
690 let _timer = FunctionTimer::new("CPUProcessor::draw_decoded_masks");
691
692 let dst_w = dst.width().unwrap();
693 let dst_h = dst.height().unwrap();
694 let dst_rs = tensor_row_stride(dst);
695 let dst_c = dst_fmt.channels();
696
697 let mut map = dst.map()?;
698 let dst_slice = map.as_mut_slice();
699
700 self.render_box(dst_w, dst_h, dst_rs, dst_c, dst_slice, detect, color_mode)?;
701
702 if segmentation.is_empty() {
703 return Ok(());
704 }
705
706 let is_semantic = segmentation[0].segmentation.shape()[2] > 1;
709
710 if is_semantic {
711 self.render_modelpack_segmentation(
712 dst_w,
713 dst_h,
714 dst_rs,
715 dst_c,
716 dst_slice,
717 &segmentation[0],
718 opacity,
719 )?;
720 } else {
721 for (idx, (seg, det)) in segmentation.iter().zip(detect).enumerate() {
722 let color_index = color_mode.index(idx, det.label);
723 self.render_yolo_segmentation(
724 dst_w,
725 dst_h,
726 dst_rs,
727 dst_c,
728 dst_slice,
729 seg,
730 color_index,
731 opacity,
732 )?;
733 }
734 }
735
736 Ok(())
737 }
738
739 fn draw_proto_masks_impl(
740 &mut self,
741 dst: &mut Tensor<u8>,
742 detect: &[DetectBox],
743 proto_data: &ProtoData,
744 opacity: f32,
745 letterbox: Option<[f32; 4]>,
746 color_mode: crate::ColorMode,
747 ) -> Result<()> {
748 let dst_fmt = dst.format().ok_or(Error::NotAnImage)?;
749 if !matches!(dst_fmt, PixelFormat::Rgba | PixelFormat::Rgb) {
750 return Err(crate::Error::NotSupported(
751 "CPU image rendering only supports RGBA or RGB images".to_string(),
752 ));
753 }
754
755 let _timer = FunctionTimer::new("CPUProcessor::draw_proto_masks");
756
757 let dst_w = dst.width().unwrap();
758 let dst_h = dst.height().unwrap();
759 let dst_rs = tensor_row_stride(dst);
760 let channels = dst_fmt.channels();
761
762 let mut map = dst.map()?;
763 let dst_slice = map.as_mut_slice();
764
765 self.render_box(
766 dst_w, dst_h, dst_rs, channels, dst_slice, detect, color_mode,
767 )?;
768
769 if detect.is_empty() {
770 return Ok(());
771 }
772 let proto_shape = proto_data.protos.shape();
773 if proto_shape.len() != 3 {
774 return Err(Error::InvalidShape(format!(
775 "protos tensor must be rank-3, got {proto_shape:?}"
776 )));
777 }
778 let proto_h = proto_shape[0];
779 let proto_w = proto_shape[1];
780 let num_protos = proto_shape[2];
781 let coeff_shape = proto_data.mask_coefficients.shape();
782 if coeff_shape.len() != 2 {
783 return Err(Error::InvalidShape(format!(
784 "mask_coefficients tensor must be rank-2, got {coeff_shape:?}"
785 )));
786 }
787 if coeff_shape[0] == 0 {
789 return Ok(());
790 }
791 if coeff_shape[1] != num_protos {
792 return Err(Error::InvalidShape(format!(
793 "mask_coefficients second dimension must match num_protos \
794 ({num_protos}), got {coeff_shape:?}"
795 )));
796 }
797
798 let coeff_f32: Vec<f32> = match proto_data.mask_coefficients.dtype() {
800 DType::F32 => {
801 let t = proto_data.mask_coefficients.as_f32().expect("F32");
802 let m = t.map()?;
803 m.as_slice().to_vec()
804 }
805 DType::F16 => {
806 let t = proto_data.mask_coefficients.as_f16().expect("F16");
807 let m = t.map()?;
808 m.as_slice().iter().map(|v| v.to_f32()).collect()
809 }
810 DType::I8 => {
811 let t = proto_data.mask_coefficients.as_i8().expect("I8");
812 let m = t.map()?;
813 if let Some(q) = t.quantization() {
814 use edgefirst_tensor::QuantMode;
815 let (scale, zp) = match q.mode() {
816 QuantMode::PerTensor { scale, zero_point } => (scale, zero_point as f32),
817 QuantMode::PerTensorSymmetric { scale } => (scale, 0.0),
818 other => {
819 return Err(Error::NotSupported(format!(
820 "I8 mask_coefficients quantization mode {other:?} not supported"
821 )));
822 }
823 };
824 m.as_slice()
825 .iter()
826 .map(|&v| (v as f32 - zp) * scale)
827 .collect()
828 } else {
829 m.as_slice().iter().map(|&v| v as f32).collect()
830 }
831 }
832 DType::I16 => {
833 let t = proto_data.mask_coefficients.as_i16().expect("I16");
834 let m = t.map()?;
835 if let Some(q) = t.quantization() {
836 use edgefirst_tensor::QuantMode;
837 let (scale, zp) = match q.mode() {
838 QuantMode::PerTensor { scale, zero_point } => (scale, zero_point as f32),
839 QuantMode::PerTensorSymmetric { scale } => (scale, 0.0),
840 other => {
841 return Err(Error::NotSupported(format!(
842 "I16 mask_coefficients quantization mode {other:?} not supported"
843 )));
844 }
845 };
846 m.as_slice()
847 .iter()
848 .map(|&v| (v as f32 - zp) * scale)
849 .collect()
850 } else {
851 m.as_slice().iter().map(|&v| v as f32).collect()
852 }
853 }
854 other => {
855 return Err(Error::InvalidShape(format!(
856 "mask_coefficients dtype {other:?} not supported"
857 )));
858 }
859 };
860
861 let (lx0, lx_range, ly0, ly_range) = match letterbox {
863 Some([lx0, ly0, lx1, ly1]) => (lx0, lx1 - lx0, ly0, ly1 - ly0),
864 None => (0.0_f32, 1.0_f32, 0.0_f32, 1.0_f32),
865 };
866
867 match proto_data.protos.dtype() {
870 DType::F32 => {
871 let t = proto_data.protos.as_f32().expect("F32");
872 let m = t.map()?;
873 self.draw_proto_masks_inner(
874 dst_slice,
875 dst_w,
876 dst_h,
877 dst_rs,
878 channels,
879 detect,
880 m.as_slice(),
881 &coeff_f32,
882 proto_h,
883 proto_w,
884 num_protos,
885 opacity,
886 (lx0, lx_range, ly0, ly_range),
887 color_mode,
888 0.0_f32,
889 |p: &f32, _| *p,
890 );
891 }
892 DType::F16 => {
893 let t = proto_data.protos.as_f16().expect("F16");
894 let m = t.map()?;
895 self.draw_proto_masks_inner(
896 dst_slice,
897 dst_w,
898 dst_h,
899 dst_rs,
900 channels,
901 detect,
902 m.as_slice(),
903 &coeff_f32,
904 proto_h,
905 proto_w,
906 num_protos,
907 opacity,
908 (lx0, lx_range, ly0, ly_range),
909 color_mode,
910 0.0_f32,
911 |p: &half::f16, _| p.to_f32(),
912 );
913 }
914 DType::I8 => {
915 use edgefirst_tensor::QuantMode;
916 let t = proto_data.protos.as_i8().expect("I8");
917 let m = t.map()?;
918 let quant = t.quantization().ok_or_else(|| {
919 Error::InvalidShape("I8 protos require quantization metadata".into())
920 })?;
921 let (scale, zp) = match quant.mode() {
922 QuantMode::PerTensor { scale, zero_point } => (scale, zero_point as f32),
923 QuantMode::PerTensorSymmetric { scale } => (scale, 0.0),
924 QuantMode::PerChannel { axis, .. }
925 | QuantMode::PerChannelSymmetric { axis, .. } => {
926 return Err(Error::NotSupported(format!(
927 "per-channel quantization (axis={axis}) in draw_proto_masks \
928 CPU path not yet supported"
929 )));
930 }
931 };
932 self.draw_proto_masks_inner(
933 dst_slice,
934 dst_w,
935 dst_h,
936 dst_rs,
937 channels,
938 detect,
939 m.as_slice(),
940 &coeff_f32,
941 proto_h,
942 proto_w,
943 num_protos,
944 opacity,
945 (lx0, lx_range, ly0, ly_range),
946 color_mode,
947 scale,
948 move |p: &i8, _| (*p as f32) - zp,
949 );
950 }
951 other => {
952 return Err(Error::InvalidShape(format!(
953 "proto tensor dtype {other:?} not supported"
954 )));
955 }
956 }
957
958 Ok(())
959 }
960
961 #[allow(clippy::too_many_arguments)]
962 fn draw_proto_masks_inner<P: Copy>(
963 &self,
964 dst_slice: &mut [u8],
965 dst_w: usize,
966 dst_h: usize,
967 dst_rs: usize,
968 channels: usize,
969 detect: &[DetectBox],
970 protos: &[P],
971 coeff_all_f32: &[f32],
972 proto_h: usize,
973 proto_w: usize,
974 num_protos: usize,
975 opacity: f32,
976 letterbox_xy: (f32, f32, f32, f32),
977 color_mode: crate::ColorMode,
978 acc_scale: f32,
979 load_f32: impl Fn(&P, f32) -> f32 + Copy,
980 ) {
981 let (lx0, lx_range, ly0, ly_range) = letterbox_xy;
982 let stride_y = proto_w * num_protos;
983 for (idx, det) in detect.iter().enumerate() {
984 let coeff = &coeff_all_f32[idx * num_protos..(idx + 1) * num_protos];
985 let color_index = color_mode.index(idx, det.label);
986 let color = self.colors[color_index % self.colors.len()];
987 let alpha = if opacity == 1.0 {
988 color[3] as u16
989 } else {
990 (color[3] as f32 * opacity).round() as u16
991 };
992
993 let start_x = (dst_w as f32 * det.bbox.xmin).round() as usize;
994 let start_y = (dst_h as f32 * det.bbox.ymin).round() as usize;
995 let end_x = ((dst_w as f32 * det.bbox.xmax).round() as usize).min(dst_w);
996 let end_y = ((dst_h as f32 * det.bbox.ymax).round() as usize).min(dst_h);
997
998 for y in start_y..end_y {
999 for x in start_x..end_x {
1000 let px = (lx0 + (x as f32 / dst_w as f32) * lx_range) * proto_w as f32 - 0.5;
1001 let py = (ly0 + (y as f32 / dst_h as f32) * ly_range) * proto_h as f32 - 0.5;
1002
1003 let x0 = (px.floor() as isize).clamp(0, proto_w as isize - 1) as usize;
1007 let y0 = (py.floor() as isize).clamp(0, proto_h as isize - 1) as usize;
1008 let x1 = (x0 + 1).min(proto_w - 1);
1009 let y1 = (y0 + 1).min(proto_h - 1);
1010 let fx = px - px.floor();
1011 let fy = py - py.floor();
1012 let w00 = (1.0 - fx) * (1.0 - fy);
1013 let w10 = fx * (1.0 - fy);
1014 let w01 = (1.0 - fx) * fy;
1015 let w11 = fx * fy;
1016 let b00 = y0 * stride_y + x0 * num_protos;
1017 let b10 = y0 * stride_y + x1 * num_protos;
1018 let b01 = y1 * stride_y + x0 * num_protos;
1019 let b11 = y1 * stride_y + x1 * num_protos;
1020 let mut acc = 0.0_f32;
1021 for p in 0..num_protos {
1022 let v00 = load_f32(&protos[b00 + p], 0.0);
1023 let v10 = load_f32(&protos[b10 + p], 0.0);
1024 let v01 = load_f32(&protos[b01 + p], 0.0);
1025 let v11 = load_f32(&protos[b11 + p], 0.0);
1026 let val = w00 * v00 + w10 * v10 + w01 * v01 + w11 * v11;
1027 acc += coeff[p] * val;
1028 }
1029 let final_acc = if acc_scale == 0.0 {
1030 acc
1031 } else {
1032 acc_scale * acc
1033 };
1034 let mask = 1.0 / (1.0 + (-final_acc).exp());
1038 if mask < 0.5 {
1039 continue;
1040 }
1041 let dst_index = y * dst_rs + x * channels;
1042 for c in 0..3 {
1043 dst_slice[dst_index + c] = ((color[c] as u16 * alpha
1044 + dst_slice[dst_index + c] as u16 * (255 - alpha))
1045 / 255) as u8;
1046 }
1047 }
1048 }
1049 }
1050 }
1051}