1use crate::edge_mode::{clamp_edge, reflect_index, reflect_index_101};
30use crate::filter1d::arena_roi::copy_roi;
31use crate::filter1d::filter_element::KernelShape;
32use crate::img_size::ImageSize;
33use crate::primitives::PrimitiveCast;
34use crate::util::check_slice_size;
35use crate::{BlurError, BlurImage, EdgeMode, EdgeMode2D, Scalar};
36use num_traits::AsPrimitive;
37use std::fmt::Debug;
38
39#[derive(Copy, Clone)]
40pub struct Arena {
41 pub width: usize,
42 #[allow(dead_code)]
43 pub height: usize,
44 pub pad_w: usize,
45 pub pad_h: usize,
46 pub components: usize,
47}
48
49impl Arena {
50 pub fn new(
51 arena_width: usize,
52 arena_height: usize,
53 arena_pad_w: usize,
54 arena_pad_h: usize,
55 components: usize,
56 ) -> Arena {
57 Arena {
58 width: arena_width,
59 height: arena_height,
60 pad_w: arena_pad_w,
61 pad_h: arena_pad_h,
62 components,
63 }
64 }
65}
66
67#[derive(Copy, Clone)]
68pub(crate) struct ArenaPads {
69 pub pad_left: usize,
70 pub pad_top: usize,
71 pub pad_right: usize,
72 pub pad_bottom: usize,
73}
74
75impl ArenaPads {
76 pub(crate) fn constant(v: usize) -> ArenaPads {
77 ArenaPads::new(v, v, v, v)
78 }
79
80 pub(crate) fn new(
81 pad_left: usize,
82 pad_top: usize,
83 pad_right: usize,
84 pad_bottom: usize,
85 ) -> ArenaPads {
86 ArenaPads {
87 pad_left,
88 pad_top,
89 pad_right,
90 pad_bottom,
91 }
92 }
93
94 pub(crate) fn from_kernel_shape(kernel_shape: KernelShape) -> ArenaPads {
95 let pad_w = kernel_shape.width / 2;
96 let pad_h = kernel_shape.height / 2;
97 ArenaPads::new(pad_w, pad_h, pad_w, pad_h)
98 }
99}
100
101pub(crate) fn make_arena<T, const CN: usize>(
103 image: &[T],
104 image_stride: usize,
105 image_size: ImageSize,
106 pads: ArenaPads,
107 edge_modes: EdgeMode2D,
108 scalar: Scalar,
109) -> Result<(Vec<T>, Arena), BlurError>
110where
111 T: Default + Copy + Send + Sync + 'static,
112 f64: AsPrimitive<T>,
113{
114 #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "avx"))]
115 {
116 if std::arch::is_x86_feature_detected!("avx2") {
117 return unsafe {
118 make_arena_avx2::<T, CN>(image, image_stride, image_size, pads, edge_modes, scalar)
119 };
120 }
121 }
122 #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
123 {
124 if std::arch::is_x86_feature_detected!("sse4.1") {
125 return unsafe {
126 make_arena_sse4_1::<T, CN>(
127 image,
128 image_stride,
129 image_size,
130 pads,
131 edge_modes,
132 scalar,
133 )
134 };
135 }
136 }
137 make_arena_exec::<T, CN>(image, image_stride, image_size, pads, edge_modes, scalar)
138}
139
140#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "avx"))]
141#[target_feature(enable = "avx2")]
142unsafe fn make_arena_avx2<T, const CN: usize>(
143 image: &[T],
144 image_stride: usize,
145 image_size: ImageSize,
146 pads: ArenaPads,
147 edge_modes: EdgeMode2D,
148 scalar: Scalar,
149) -> Result<(Vec<T>, Arena), BlurError>
150where
151 T: Default + Copy + Send + Sync + 'static,
152 f64: AsPrimitive<T>,
153{
154 make_arena_exec::<T, CN>(image, image_stride, image_size, pads, edge_modes, scalar)
155}
156
157#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
158#[target_feature(enable = "sse4.1")]
159unsafe fn make_arena_sse4_1<T, const CN: usize>(
160 image: &[T],
161 image_stride: usize,
162 image_size: ImageSize,
163 pads: ArenaPads,
164 edge_modes: EdgeMode2D,
165 scalar: Scalar,
166) -> Result<(Vec<T>, Arena), BlurError>
167where
168 T: Default + Copy + Send + Sync + 'static,
169 f64: AsPrimitive<T>,
170{
171 make_arena_exec::<T, CN>(image, image_stride, image_size, pads, edge_modes, scalar)
172}
173
174#[inline(always)]
176fn make_arena_exec<T, const CN: usize>(
177 image: &[T],
178 image_stride: usize,
179 image_size: ImageSize,
180 pads: ArenaPads,
181 edge_modes: EdgeMode2D,
182 scalar: Scalar,
183) -> Result<(Vec<T>, Arena), BlurError>
184where
185 T: Default + Copy + Send + Sync + 'static,
186 f64: AsPrimitive<T>,
187{
188 check_slice_size(image, image_stride, image_size.width, image_size.height, CN)?;
189
190 let new_height = image_size.height + pads.pad_top + pads.pad_bottom;
191 let new_width = image_size.width + pads.pad_left + pads.pad_right;
192
193 let height = image_size.height;
194 let width = image_size.width;
195
196 let mut padded_image = vec![T::default(); new_height * new_width * CN];
197
198 let old_stride = image_stride;
199 let new_stride = new_width * CN;
200
201 let offset = pads.pad_top * new_stride + pads.pad_left * CN;
202 copy_roi(&mut padded_image[offset..], image, new_stride, old_stride);
203
204 let filling_ranges = [
205 (0..pads.pad_top, 0..new_width), (
207 pads.pad_top..(new_height - pads.pad_bottom),
208 0..pads.pad_left,
209 ), ((height + pads.pad_top)..new_height, 0..new_width), (
212 pads.pad_top..(new_height - pads.pad_bottom),
213 (width + pads.pad_left)..new_width,
214 ), ];
216
217 let pad_w = pads.pad_left;
218 let pad_h = pads.pad_top;
219
220 if edge_modes.horizontal != EdgeMode::Constant && edge_modes.vertical != EdgeMode::Constant {
221 for ranges in filling_ranges.iter() {
222 for (i, dst) in ranges.0.clone().zip(
223 padded_image
224 .chunks_exact_mut(new_stride)
225 .skip(ranges.0.start),
226 ) {
227 for (j, dst) in ranges
228 .1
229 .clone()
230 .zip(dst.chunks_exact_mut(CN).skip(ranges.1.start))
231 {
232 let y = clamp_edge!(
233 edge_modes.vertical,
234 i as i64 - pad_h as i64,
235 0,
236 height as i64
237 );
238 let x = clamp_edge!(
239 edge_modes.horizontal,
240 j as i64 - pad_w as i64,
241 0,
242 width as i64
243 );
244
245 let v_src = y * old_stride + x * CN;
246 let src_iter = &image[v_src..(v_src + CN)];
247 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
248 *dst = *src;
249 }
250 }
251 }
252 }
253 } else if edge_modes.vertical != EdgeMode::Constant
254 && edge_modes.horizontal == EdgeMode::Constant
255 {
256 for ranges in filling_ranges.iter() {
258 for (i, dst) in ranges.0.clone().zip(
259 padded_image
260 .chunks_exact_mut(new_stride)
261 .skip(ranges.0.start),
262 ) {
263 for (j, dst) in ranges
264 .1
265 .clone()
266 .zip(dst.chunks_exact_mut(CN).skip(ranges.1.start))
267 {
268 let y = clamp_edge!(
269 edge_modes.vertical,
270 i as i64 - pad_h as i64,
271 0,
272 height as i64
273 );
274 let x = j as i64 - pad_w as i64;
275 if x < 0 || x >= width as i64 {
276 for (y, dst) in dst.iter_mut().enumerate() {
277 *dst = scalar[y].as_();
278 }
279 } else {
280 let v_src = y * old_stride + (x as usize) * CN;
281 let src_iter = &image[v_src..(v_src + CN)];
282 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
283 *dst = *src;
284 }
285 }
286 }
287 }
288 }
289 } else if edge_modes.vertical == EdgeMode::Constant
290 && edge_modes.horizontal != EdgeMode::Constant
291 {
292 for ranges in filling_ranges.iter() {
294 for (i, dst) in ranges.0.clone().zip(
295 padded_image
296 .chunks_exact_mut(new_stride)
297 .skip(ranges.0.start),
298 ) {
299 for (j, dst) in ranges
300 .1
301 .clone()
302 .zip(dst.chunks_exact_mut(CN).skip(ranges.1.start))
303 {
304 let y = i as i64 - pad_h as i64;
305 let x = clamp_edge!(
306 edge_modes.horizontal,
307 j as i64 - pad_w as i64,
308 0,
309 width as i64
310 );
311
312 if y < 0 || y >= height as i64 {
313 for (y, dst) in dst.iter_mut().enumerate() {
314 *dst = scalar[y].as_();
315 }
316 } else {
317 let v_src = (y as usize) * old_stride + x * CN;
318 let src_iter = &image[v_src..(v_src + CN)];
319 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
320 *dst = *src;
321 }
322 }
323 }
324 }
325 }
326 } else {
327 for ranges in filling_ranges.iter() {
329 for (_, dst) in ranges.0.clone().zip(
330 padded_image
331 .chunks_exact_mut(new_stride)
332 .skip(ranges.0.start),
333 ) {
334 for (_, dst) in ranges
335 .1
336 .clone()
337 .zip(dst.chunks_exact_mut(CN).skip(ranges.1.start))
338 {
339 for (y, dst) in dst.iter_mut().enumerate() {
340 *dst = scalar[y].as_();
341 }
342 }
343 }
344 }
345 }
346 Ok((
347 padded_image,
348 Arena::new(new_width, new_height, pad_w, pad_h, CN),
349 ))
350}
351
352pub fn make_arena_row<T, const CN: usize>(
354 image: &BlurImage<T>,
355 source_y: usize,
356 kernel_size: KernelShape,
357 border_mode: EdgeMode,
358 scalar: Scalar,
359) -> Result<(Vec<T>, usize), BlurError>
360where
361 T: Default + Copy + Send + Sync + 'static + Debug,
362 f64: PrimitiveCast<T>,
363{
364 image.check_layout()?;
365 let pad_w = kernel_size.width / 2;
366
367 let image_size = image.size();
368
369 let arena_width = image_size.width * CN + pad_w * 2 * CN;
370 let mut row = vec![T::default(); arena_width];
371 write_arena_row::<T, CN>(&mut row, image, source_y, kernel_size, border_mode, scalar)?;
372 Ok((row, image_size.width + pad_w * 2))
373}
374
375pub(crate) fn write_arena_row<T, const CN: usize>(
376 row: &mut [T],
377 image: &BlurImage<T>,
378 source_y: usize,
379 kernel_size: KernelShape,
380 border_mode: EdgeMode,
381 scalar: Scalar,
382) -> Result<(), BlurError>
383where
384 T: Default + Copy + Send + Sync + 'static + Debug,
385 f64: PrimitiveCast<T>,
386{
387 image.check_layout()?;
388 let pad_w = kernel_size.width / 2;
389
390 let image_size = image.size();
391
392 let arena_width = image_size.width * CN + pad_w * 2 * CN;
393 if row.len() < arena_width {
394 return Err(BlurError::ImagesMustMatch);
395 }
396
397 let source_offset = source_y * image.row_stride() as usize;
398
399 let source_row = &image.data.as_ref()[source_offset..(source_offset + image_size.width * CN)];
400
401 let row_dst = &mut row[pad_w * CN..(pad_w * CN + image_size.width * CN)];
402
403 for (dst, src) in row_dst.iter_mut().zip(source_row.iter()) {
404 *dst = *src;
405 }
406
407 for (x, dst) in (0..pad_w).zip(row.chunks_exact_mut(CN)) {
408 match border_mode {
409 EdgeMode::Clamp => {
410 let old_x = x.saturating_sub(pad_w).min(image_size.width - 1);
411 let old_px = old_x * CN;
412 let src_iter = &source_row[old_px..(old_px + CN)];
413 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
414 *dst = *src;
415 }
416 }
417 EdgeMode::Wrap => {
418 let old_x = (x as i64 - pad_w as i64).rem_euclid(image_size.width as i64) as usize;
419 let old_px = old_x * CN;
420 let src_iter = &source_row[old_px..(old_px + CN)];
421 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
422 *dst = *src;
423 }
424 }
425 EdgeMode::Reflect => {
426 let old_x = reflect_index(x as isize - pad_w as isize, image_size.width as isize);
427 let old_px = old_x * CN;
428 let src_iter = &source_row[old_px..(old_px + CN)];
429 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
430 *dst = *src;
431 }
432 }
433 EdgeMode::Reflect101 => {
434 let old_x =
435 reflect_index_101(x as isize - pad_w as isize, image_size.width as isize);
436 let old_px = old_x * CN;
437 let src_iter = &source_row[old_px..(old_px + CN)];
438 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
439 *dst = *src;
440 }
441 }
442 EdgeMode::Constant => {
443 for (i, dst) in dst.iter_mut().enumerate() {
444 *dst = scalar[i].cast_();
445 }
446 }
447 }
448 }
449
450 for (x, dst) in
451 (image_size.width..(image_size.width + pad_w)).zip(row.chunks_exact_mut(CN).rev())
452 {
453 match border_mode {
454 EdgeMode::Clamp => {
455 let old_x = x.max(0).min(image_size.width - 1);
456 let old_px = old_x * CN;
457 let src_iter = &source_row[old_px..(old_px + CN)];
458 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
459 *dst = *src;
460 }
461 }
462 EdgeMode::Wrap => {
463 let old_x = (x as i64).rem_euclid(image_size.width as i64) as usize;
464 let old_px = old_x * CN;
465 let src_iter = &source_row[old_px..(old_px + CN)];
466 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
467 *dst = *src;
468 }
469 }
470 EdgeMode::Reflect => {
471 let old_x = reflect_index(x as isize, image_size.width as isize);
472 let old_px = old_x * CN;
473 let src_iter = &source_row[old_px..(old_px + CN)];
474 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
475 *dst = *src;
476 }
477 }
478 EdgeMode::Reflect101 => {
479 let old_x = reflect_index_101(x as isize, image_size.width as isize);
480 let old_px = old_x * CN;
481 let src_iter = &source_row[old_px..(old_px + CN)];
482 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
483 *dst = *src;
484 }
485 }
486 EdgeMode::Constant => {
487 for (i, dst) in dst.iter_mut().enumerate() {
488 *dst = scalar[i].cast_();
489 }
490 }
491 }
492 }
493
494 Ok(())
495}
496
497#[derive(Clone)]
498pub struct ArenaColumns<T>
499where
500 T: Copy,
501{
502 pub top_pad: Vec<T>,
503 pub bottom_pad: Vec<T>,
504}
505
506impl<T> ArenaColumns<T>
507where
508 T: Copy,
509{
510 pub fn new(top_pad: Vec<T>, bottom_pad: Vec<T>) -> ArenaColumns<T> {
511 ArenaColumns {
512 top_pad,
513 bottom_pad,
514 }
515 }
516}
517
518pub(crate) fn make_arena_columns<T, const CN: usize>(
520 image: &[T],
521 image_size: ImageSize,
522 kernel_size: KernelShape,
523 border_mode: EdgeMode,
524 scalar_projection: [T; CN],
525) -> Result<ArenaColumns<T>, BlurError>
526where
527 T: Default + Copy + Send + Sync + 'static,
528{
529 #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "avx"))]
530 {
531 if std::arch::is_x86_feature_detected!("avx2") {
532 return unsafe {
533 mac_avx2::<T, CN>(
534 image,
535 image_size,
536 kernel_size,
537 border_mode,
538 scalar_projection,
539 )
540 };
541 }
542 }
543 #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
544 {
545 if std::arch::is_x86_feature_detected!("sse4.1") {
546 return unsafe {
547 mac_sse_4_1::<T, CN>(
548 image,
549 image_size,
550 kernel_size,
551 border_mode,
552 scalar_projection,
553 )
554 };
555 }
556 }
557 make_arena_columns_exec::<T, CN>(
558 image,
559 image_size,
560 kernel_size,
561 border_mode,
562 scalar_projection,
563 )
564}
565
566#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "avx"))]
567#[target_feature(enable = "avx2")]
568unsafe fn mac_avx2<T, const CN: usize>(
569 image: &[T],
570 image_size: ImageSize,
571 kernel_size: KernelShape,
572 border_mode: EdgeMode,
573 scalar_projection: [T; CN],
574) -> Result<ArenaColumns<T>, BlurError>
575where
576 T: Default + Copy + Send + Sync + 'static,
577{
578 make_arena_columns_exec::<T, CN>(
579 image,
580 image_size,
581 kernel_size,
582 border_mode,
583 scalar_projection,
584 )
585}
586
587#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
588#[target_feature(enable = "sse4.1")]
589unsafe fn mac_sse_4_1<T, const CN: usize>(
590 image: &[T],
591 image_size: ImageSize,
592 kernel_size: KernelShape,
593 border_mode: EdgeMode,
594 scalar_projection: [T; CN],
595) -> Result<ArenaColumns<T>, BlurError>
596where
597 T: Default + Copy + Send + Sync + 'static,
598{
599 make_arena_columns_exec::<T, CN>(
600 image,
601 image_size,
602 kernel_size,
603 border_mode,
604 scalar_projection,
605 )
606}
607
608#[inline(always)]
610fn make_arena_columns_exec<T, const CN: usize>(
611 image: &[T],
612 image_size: ImageSize,
613 kernel_size: KernelShape,
614 border_mode: EdgeMode,
615 scalar_projection: [T; CN],
616) -> Result<ArenaColumns<T>, BlurError>
617where
618 T: Default + Copy + Send + Sync + 'static,
619{
620 check_slice_size(
621 image,
622 image_size.width * CN,
623 image_size.width,
624 image_size.height,
625 CN,
626 )?;
627 let pad_h = kernel_size.height / 2;
628
629 let mut top_pad = vec![T::default(); pad_h * image_size.width * CN];
630 let mut bottom_pad = vec![T::default(); pad_h * image_size.width * CN];
631
632 let top_pad_stride = image_size.width * CN;
633
634 for (ky, dst) in (0..pad_h).zip(top_pad.chunks_exact_mut(top_pad_stride)) {
635 for (kx, dst) in (0..image_size.width).zip(dst.chunks_exact_mut(CN)) {
636 match border_mode {
637 EdgeMode::Clamp => {
638 let y = ky.saturating_sub(pad_h).min(image_size.height - 1);
639 let v_src = y * top_pad_stride + kx * CN;
640
641 let src_iter = &image[v_src..(v_src + CN)];
642 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
643 *dst = *src;
644 }
645 }
646 EdgeMode::Wrap => {
647 let y =
648 (ky as i64 - pad_h as i64).rem_euclid(image_size.height as i64) as usize;
649 let v_src = y * top_pad_stride + kx * CN;
650 let src_iter = &image[v_src..(v_src + CN)];
651 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
652 *dst = *src;
653 }
654 }
655 EdgeMode::Reflect => {
656 let y = reflect_index(ky as isize - pad_h as isize, image_size.height as isize);
657 let v_src = y * top_pad_stride + kx * CN;
658 let src_iter = &image[v_src..(v_src + CN)];
659 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
660 *dst = *src;
661 }
662 }
663 EdgeMode::Reflect101 => {
664 let y =
665 reflect_index_101(ky as isize - pad_h as isize, image_size.height as isize);
666 let v_src = y * top_pad_stride + kx * CN;
667 let src_iter = &image[v_src..(v_src + CN)];
668 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
669 *dst = *src;
670 }
671 }
672 EdgeMode::Constant => {
673 for (i, dst) in dst.iter_mut().enumerate() {
674 *dst = scalar_projection[i];
675 }
676 }
677 }
678 }
679 }
680
681 let bottom_iter_dst = bottom_pad.chunks_exact_mut(top_pad_stride);
682
683 for (ky, dst) in (0..pad_h).zip(bottom_iter_dst) {
684 for (kx, dst) in (0..image_size.width).zip(dst.chunks_exact_mut(CN)) {
685 match border_mode {
686 EdgeMode::Clamp => {
687 let y = (ky + image_size.height).min(image_size.height - 1);
688 let v_src = y * top_pad_stride + kx * CN;
689 let src_iter = &image[v_src..(v_src + CN)];
690 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
691 *dst = *src;
692 }
693 }
694 EdgeMode::Wrap => {
695 let y = (ky as i64 + image_size.height as i64)
696 .rem_euclid(image_size.height as i64) as usize;
697 let v_src = y * top_pad_stride + kx * CN;
698 let src_iter = &image[v_src..(v_src + CN)];
699 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
700 *dst = *src;
701 }
702 }
703 EdgeMode::Reflect => {
704 let y = reflect_index(
705 ky as isize + image_size.height as isize,
706 image_size.height as isize,
707 );
708 let v_src = y * top_pad_stride + kx * CN;
709 let src_iter = &image[v_src..(v_src + CN)];
710 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
711 *dst = *src;
712 }
713 }
714 EdgeMode::Reflect101 => {
715 let y = reflect_index_101(
716 ky as isize + image_size.height as isize,
717 image_size.height as isize,
718 );
719 let v_src = y * top_pad_stride + kx * CN;
720 let src_iter = &image[v_src..(v_src + CN)];
721 for (dst, src) in dst.iter_mut().zip(src_iter.iter()) {
722 *dst = *src;
723 }
724 }
725 EdgeMode::Constant => {
726 for (i, dst) in dst.iter_mut().enumerate() {
727 *dst = scalar_projection[i];
728 }
729 }
730 }
731 }
732 }
733
734 Ok(ArenaColumns::new(top_pad, bottom_pad))
735}