1#![allow(unsafe_code, clippy::cast_sign_loss)]
11use crate::config::DetectorConfig;
12use crate::image::ImageView;
13use bumpalo::Bump;
14use bumpalo::collections::Vec as BumpVec;
15use multiversion::multiversion;
16use rayon::prelude::*;
17
18#[derive(Clone, Copy, Debug, Default)]
20pub struct TileStats {
21 pub min: u8,
23 pub max: u8,
25}
26
27pub struct ThresholdEngine {
29 pub tile_size: usize,
31 pub min_range: u8,
33}
34
35impl Default for ThresholdEngine {
36 fn default() -> Self {
37 Self::new()
38 }
39}
40
41impl ThresholdEngine {
42 #[must_use]
44 pub fn new() -> Self {
45 Self {
46 tile_size: 8, min_range: 10, }
49 }
50
51 #[must_use]
53 pub fn from_config(config: &DetectorConfig) -> Self {
54 Self {
55 tile_size: config.threshold_tile_size,
56 min_range: config.threshold_min_range,
57 }
58 }
59
60 #[must_use]
63 #[tracing::instrument(skip_all, name = "pipeline::threshold_compute_stats")]
64 pub fn compute_tile_stats<'a>(
65 &self,
66 arena: &'a Bump,
67 img: &ImageView,
68 ) -> BumpVec<'a, TileStats> {
69 let ts = self.tile_size;
70 let tiles_wide = img.width / ts;
71 let tiles_high = img.height / ts;
72 let mut stats = BumpVec::with_capacity_in(tiles_wide * tiles_high, arena);
73 stats.resize(tiles_wide * tiles_high, TileStats { min: 255, max: 0 });
74
75 stats
76 .par_chunks_mut(tiles_wide)
77 .enumerate()
78 .for_each(|(ty, stats_row)| {
79 for dy in 0..ts {
82 let py = ty * ts + dy;
83 let src_row = img.get_row(py);
84
85 compute_row_tile_stats_simd(src_row, stats_row, ts);
87 }
88 });
89 stats
90 }
91
92 #[allow(clippy::too_many_lines, clippy::needless_range_loop)]
95 #[allow(dead_code)]
96 pub fn apply_threshold(
97 &self,
98 arena: &Bump,
99 img: &ImageView,
100 stats: &[TileStats],
101 output: &mut [u8],
102 ) {
103 let ts = self.tile_size;
104 let tiles_wide = img.width / ts;
105 let tiles_high = img.height / ts;
106
107 let mut tile_thresholds = BumpVec::with_capacity_in(tiles_wide * tiles_high, arena);
108 tile_thresholds.resize(tiles_wide * tiles_high, 0u8);
109 let mut tile_valid = BumpVec::with_capacity_in(tiles_wide * tiles_high, arena);
110 tile_valid.resize(tiles_wide * tiles_high, 0u8);
111
112 tile_thresholds
113 .par_chunks_mut(tiles_wide)
114 .enumerate()
115 .for_each(|(ty, t_row)| {
116 let y_start = ty.saturating_sub(1);
117 let y_end = (ty + 1).min(tiles_high - 1);
118
119 for tx in 0..tiles_wide {
120 let mut nmin = 255u8;
121 let mut nmax = 0u8;
122
123 let x_start = tx.saturating_sub(1);
124 let x_end = (tx + 1).min(tiles_wide - 1);
125
126 for ny in y_start..=y_end {
127 let row_off = ny * tiles_wide;
128 for nx in x_start..=x_end {
129 let s = stats[row_off + nx];
130 if s.min < nmin {
131 nmin = s.min;
132 }
133 if s.max > nmax {
134 nmax = s.max;
135 }
136 }
137 }
138
139 let t_idx = tx;
140 let res = ((u16::from(nmin) + u16::from(nmax)) >> 1) as u8;
141
142 t_row[t_idx] = res;
145 }
146 });
147
148 for ty in 0..tiles_high {
150 for tx in 0..tiles_wide {
151 let mut nmin = 255;
152 let mut nmax = 0;
153 let y_start = ty.saturating_sub(1);
154 let y_end = (ty + 1).min(tiles_high - 1);
155 let x_start = tx.saturating_sub(1);
156 let x_end = (tx + 1).min(tiles_wide - 1);
157
158 for ny in y_start..=y_end {
159 let row_off = ny * tiles_wide;
160 for nx in x_start..=x_end {
161 let s = stats[row_off + nx];
162 if s.min < nmin {
163 nmin = s.min;
164 }
165 if s.max > nmax {
166 nmax = s.max;
167 }
168 }
169 }
170 let idx = ty * tiles_wide + tx;
171 tile_valid[idx] = if nmax.saturating_sub(nmin) < self.min_range {
172 0
173 } else {
174 255
175 };
176 }
177 }
178
179 for _ in 0..2 {
183 for ty in 0..tiles_high {
185 for tx in 0..tiles_wide {
186 let idx = ty * tiles_wide + tx;
187 if tile_valid[idx] == 0 {
188 let mut sum_thresh = 0u32;
189 let mut count = 0u32;
190
191 let y_start = ty.saturating_sub(1);
192 let y_end = (ty + 1).min(tiles_high - 1);
193 let x_start = tx.saturating_sub(1);
194 let x_end = (tx + 1).min(tiles_wide - 1);
195
196 for ny in y_start..=y_end {
197 let row_off = ny * tiles_wide;
198 for nx in x_start..=x_end {
199 let n_idx = row_off + nx;
200 if tile_valid[n_idx] > 0 {
201 sum_thresh += u32::from(tile_thresholds[n_idx]);
202 count += 1;
203 }
204 }
205 }
206
207 if count > 0 {
208 tile_thresholds[idx] = (sum_thresh / count) as u8;
209 tile_valid[idx] = 128; }
211 }
212 }
213 }
214 }
215
216 let thresholds_slice = tile_thresholds.as_slice();
217 let valid_slice = tile_valid.as_slice();
218
219 output
220 .par_chunks_mut(ts * img.width)
221 .enumerate()
222 .for_each_init(
223 || (vec![0u8; img.width], vec![0u8; img.width]),
224 |(row_thresholds, row_valid), (ty, output_tile_rows)| {
225 if ty >= tiles_high {
226 return;
227 }
228 row_thresholds.fill(0);
250 row_valid.fill(0);
251
252 for tx in 0..tiles_wide {
254 let idx = ty * tiles_wide + tx;
255 let thresh = thresholds_slice[idx];
256 let valid = valid_slice[idx];
257 for i in 0..ts {
258 row_thresholds[tx * ts + i] = thresh;
259 row_valid[tx * ts + i] = valid;
260 }
261 }
262
263 for dy in 0..ts {
264 let py = ty * ts + dy;
265 let src_row = img.get_row(py);
266 let dst_row = &mut output_tile_rows[dy * img.width..(dy + 1) * img.width];
267
268 threshold_row_simd(src_row, dst_row, row_thresholds, row_valid);
269 }
270 },
271 );
272 }
273
274 #[allow(clippy::needless_range_loop)]
279 #[tracing::instrument(skip_all, name = "pipeline::threshold_apply_map")]
280 pub fn apply_threshold_with_map(
281 &self,
282 arena: &Bump,
283 img: &ImageView,
284 stats: &[TileStats],
285 binary_output: &mut [u8],
286 threshold_output: &mut [u8],
287 ) {
288 let ts = self.tile_size;
289 let tiles_wide = img.width / ts;
290 let tiles_high = img.height / ts;
291
292 let mut tile_thresholds = BumpVec::with_capacity_in(tiles_wide * tiles_high, arena);
293 tile_thresholds.resize(tiles_wide * tiles_high, 0u8);
294 let mut tile_valid = BumpVec::with_capacity_in(tiles_wide * tiles_high, arena);
295 tile_valid.resize(tiles_wide * tiles_high, 0u8);
296
297 tile_thresholds
298 .par_chunks_mut(tiles_wide)
299 .enumerate()
300 .for_each(|(ty, t_row)| {
301 let y_start = ty.saturating_sub(1);
302 let y_end = (ty + 1).min(tiles_high - 1);
303
304 for tx in 0..tiles_wide {
305 let mut nmin = 255u8;
306 let mut nmax = 0u8;
307
308 let x_start = tx.saturating_sub(1);
309 let x_end = (tx + 1).min(tiles_wide - 1);
310
311 for ny in y_start..=y_end {
312 let row_off = ny * tiles_wide;
313 for nx in x_start..=x_end {
314 let s = stats[row_off + nx];
315 if s.min < nmin {
316 nmin = s.min;
317 }
318 if s.max > nmax {
319 nmax = s.max;
320 }
321 }
322 }
323
324 t_row[tx] = ((u16::from(nmin) + u16::from(nmax)) >> 1) as u8;
325 }
326 });
327
328 for ty in 0..tiles_high {
330 for tx in 0..tiles_wide {
331 let mut nmin = 255;
332 let mut nmax = 0;
333 let y_start = ty.saturating_sub(1);
334 let y_end = (ty + 1).min(tiles_high - 1);
335 let x_start = tx.saturating_sub(1);
336 let x_end = (tx + 1).min(tiles_wide - 1);
337
338 for ny in y_start..=y_end {
339 let row_off = ny * tiles_wide;
340 for nx in x_start..=x_end {
341 let s = stats[row_off + nx];
342 if s.min < nmin {
343 nmin = s.min;
344 }
345 if s.max > nmax {
346 nmax = s.max;
347 }
348 }
349 }
350 let idx = ty * tiles_wide + tx;
351 tile_valid[idx] = if nmax.saturating_sub(nmin) < self.min_range {
352 0
353 } else {
354 255
355 };
356 }
357 }
358
359 let thresholds_slice = tile_thresholds.as_slice();
397 let valid_slice = tile_valid.as_slice();
398
399 binary_output
400 .par_chunks_mut(ts * img.width)
401 .enumerate()
402 .for_each_init(
403 || (vec![0u8; img.width], vec![0u8; img.width]),
404 |(row_thresholds, row_valid), (ty, bin_tile_rows)| {
405 if ty >= tiles_high {
406 return;
407 }
408 let thresh_tile_rows = unsafe {
410 let ptr = threshold_output.as_ptr().cast_mut();
411 std::slice::from_raw_parts_mut(ptr.add(ty * ts * img.width), ts * img.width)
412 };
413
414 row_thresholds.fill(0);
415 row_valid.fill(0);
416
417 for tx in 0..tiles_wide {
418 let idx = ty * tiles_wide + tx;
419 let thresh = thresholds_slice[idx];
420 let valid = valid_slice[idx];
421 for i in 0..ts {
422 row_thresholds[tx * ts + i] = thresh;
423 row_valid[tx * ts + i] = valid;
424 }
425 }
426
427 for dy in 0..ts {
428 let py = ty * ts + dy;
429 let src_row = img.get_row(py);
430
431 let bin_row = &mut bin_tile_rows[dy * img.width..(dy + 1) * img.width];
433 threshold_row_simd(src_row, bin_row, row_thresholds, row_valid);
434
435 thresh_tile_rows[dy * img.width..(dy + 1) * img.width]
437 .copy_from_slice(row_thresholds);
438 }
439 },
440 );
441 }
442}
443
444#[multiversion(targets(
445 "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
446 "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
447 "aarch64+neon"
448))]
449fn compute_row_tile_stats_simd(src_row: &[u8], stats: &mut [TileStats], tile_size: usize) {
450 let chunks = src_row.chunks_exact(tile_size);
451 for (chunk, stat) in chunks.zip(stats.iter_mut()) {
452 let mut rmin = stat.min;
453 let mut rmax = stat.max;
454
455 for &p in chunk {
457 rmin = rmin.min(p);
458 rmax = rmax.max(p);
459 }
460
461 stat.min = rmin;
462 stat.max = rmax;
463 }
464}
465
466#[multiversion(targets(
467 "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
468 "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
469 "aarch64+neon"
470))]
471fn threshold_row_simd(src: &[u8], dst: &mut [u8], thresholds: &[u8], valid_mask: &[u8]) {
472 let len = src.len();
473 let src_chunks = src.chunks_exact(16);
475 let dst_chunks = dst.chunks_exact_mut(16);
476 let thresh_chunks = thresholds.chunks_exact(16);
477 let mask_chunks = valid_mask.chunks_exact(16);
478
479 for (((s_c, d_c), t_c), m_c) in src_chunks
480 .zip(dst_chunks)
481 .zip(thresh_chunks)
482 .zip(mask_chunks)
483 {
484 for i in 0..16 {
485 let s = s_c[i];
486 let t = t_c[i];
487 let v = m_c[i];
488 d_c[i] = if v > 0 {
490 if s < t { 0 } else { 255 }
491 } else {
492 255
493 };
494 }
495 }
496
497 let processed = (len / 16) * 16;
499 for i in processed..len {
500 let s = src[i];
501 let t = thresholds[i];
502 let v = valid_mask[i];
503 dst[i] = if v > 0 {
504 if s < t { 0 } else { 255 }
505 } else {
506 255
507 };
508 }
509}
510
511#[cfg(test)]
512#[allow(
513 clippy::unwrap_used,
514 clippy::naive_bytecount,
515 clippy::cast_possible_truncation,
516 clippy::cast_sign_loss
517)]
518mod tests {
519 use super::*;
520 use proptest::prelude::*;
521
522 proptest! {
523 #[test]
524 fn test_threshold_invariants(data in prop::collection::vec(0..=255u8, 16)) {
525 let mut min = 255u8;
526 let mut max = 0u8;
527 for &b in &data {
528 if b < min { min = b; }
529 if b > max { max = b; }
530 }
531 let (rmin, rmax) = compute_min_max_simd(&data);
532 assert_eq!(rmin, min);
533 assert_eq!(rmax, max);
534 }
535
536 #[test]
537 fn test_binarization_invariants(src in prop::collection::vec(0..=255u8, 16), thresh in 0..=255u8) {
538 let mut dst = vec![0u8; 16];
539 let valid = vec![255u8; 16];
540 threshold_row_simd(&src, &mut dst, &[thresh; 16], &valid);
541
542 for (i, &s) in src.iter().enumerate() {
543 if s >= thresh {
544 assert_eq!(dst[i], 255);
545 } else {
546 assert_eq!(dst[i], 0);
547 }
548 }
549
550 let invalid = vec![0u8; 16];
552 threshold_row_simd(&src, &mut dst, &[thresh; 16], &invalid);
553 for d in dst {
554 assert_eq!(d, 255);
555 }
556 }
557 }
558
559 #[test]
560 fn test_threshold_engine_e2e() {
561 let width = 16;
562 let height = 16;
563 let mut data = vec![128u8; width * height];
564 for y in 4..12 {
566 for x in 4..12 {
567 data[y * width + x] = 50;
568 }
569 }
570 for y in 0..height {
571 for x in 0..width {
572 if !(2..=14).contains(&x) || !(2..=14).contains(&y) {
573 data[y * width + x] = 200;
574 }
575 }
576 }
577
578 let img = ImageView::new(&data, width, height, width).unwrap();
579 let engine = ThresholdEngine::new();
580 let arena = Bump::new();
581 let stats = engine.compute_tile_stats(&arena, &img);
582 let mut output = vec![0u8; width * height];
583 engine.apply_threshold(&arena, &img, &stats, &mut output);
584
585 assert_eq!(output[8 * width + 8], 0);
587 assert_eq!(output[width + 1], 255);
589 }
590
591 #[test]
592 fn test_threshold_with_decimation() {
593 let width = 32;
594 let height = 32;
595 let mut data = vec![200u8; width * height];
596 for y in 8..24 {
598 for x in 8..24 {
599 data[y * width + x] = 50;
600 }
601 }
602
603 let img = ImageView::new(&data, width, height, width).unwrap();
604
605 let mut decimated_data = vec![0u8; 16 * 16];
607 let decimated_img = img
608 .decimate_to(2, &mut decimated_data)
609 .expect("decimation failed");
610
611 assert_eq!(decimated_img.width, 16);
612 assert_eq!(decimated_img.height, 16);
613 let arena = Bump::new();
614 let engine = ThresholdEngine::new();
615 let stats = engine.compute_tile_stats(&arena, &decimated_img);
616 let mut output = vec![0u8; 16 * 16];
617 engine.apply_threshold(&arena, &decimated_img, &stats, &mut output);
618
619 assert_eq!(output[4 * 16 + 4], 0);
621 assert_eq!(output[0], 255);
623 }
624
625 use crate::config::TagFamily;
630 use crate::test_utils::{
631 TestImageParams, generate_test_image_with_params, measure_border_integrity,
632 };
633
634 #[test]
637 fn test_threshold_preserves_tag_structure_at_varying_sizes() {
638 let canvas_size = 640;
639 let tag_sizes = [32, 48, 64, 100, 150, 200, 300];
641
642 for tag_size in tag_sizes {
643 let params = TestImageParams {
644 family: TagFamily::AprilTag36h11,
645 id: 0,
646 tag_size,
647 canvas_size,
648 ..Default::default()
649 };
650
651 let (data, corners) = generate_test_image_with_params(¶ms);
652 let img = ImageView::new(&data, canvas_size, canvas_size, canvas_size).unwrap();
653
654 let engine = ThresholdEngine::new();
655 let arena = Bump::new();
656 let stats = engine.compute_tile_stats(&arena, &img);
657 let mut binary = vec![0u8; canvas_size * canvas_size];
658 engine.apply_threshold(&arena, &img, &stats, &mut binary);
659
660 let integrity = measure_border_integrity(&binary, canvas_size, &corners);
661
662 assert!(
664 integrity > 0.5,
665 "Tag size {} failed: border integrity = {:.2}% (expected >50%)",
666 tag_size,
667 integrity * 100.0
668 );
669
670 println!(
671 "Tag size {:>3}px: border integrity = {:.1}%",
672 tag_size,
673 integrity * 100.0
674 );
675 }
676 }
677
678 #[test]
680 fn test_threshold_robustness_brightness_contrast() {
681 let canvas_size = 320;
682 let tag_size = 120;
683 let brightness_offsets = [-50, -25, 0, 25, 50];
684 let contrast_scales = [0.50, 0.75, 1.0, 1.25, 1.50];
685
686 for &brightness in &brightness_offsets {
687 for &contrast in &contrast_scales {
688 let params = TestImageParams {
689 family: TagFamily::AprilTag36h11,
690 id: 0,
691 tag_size,
692 canvas_size,
693 brightness_offset: brightness,
694 contrast_scale: contrast,
695 ..Default::default()
696 };
697
698 let (data, corners) = generate_test_image_with_params(¶ms);
699 let img = ImageView::new(&data, canvas_size, canvas_size, canvas_size).unwrap();
700
701 let engine = ThresholdEngine::new();
702 let arena = Bump::new();
703 let stats = engine.compute_tile_stats(&arena, &img);
704 let mut binary = vec![0u8; canvas_size * canvas_size];
705 engine.apply_threshold(&arena, &img, &stats, &mut binary);
706
707 let integrity = measure_border_integrity(&binary, canvas_size, &corners);
708
709 let is_moderate = brightness.abs() <= 25 && contrast >= 0.75;
711 if is_moderate {
712 assert!(
713 integrity > 0.4,
714 "Brightness {}, Contrast {:.2}: integrity {:.1}% too low",
715 brightness,
716 contrast,
717 integrity * 100.0
718 );
719 }
720
721 println!(
722 "Brightness {:>3}, Contrast {:.2}: integrity = {:.1}%",
723 brightness,
724 contrast,
725 integrity * 100.0
726 );
727 }
728 }
729 }
730
731 #[test]
733 fn test_threshold_robustness_noise() {
734 let canvas_size = 320;
735 let tag_size = 120;
736 let noise_levels = [0.0, 5.0, 10.0, 15.0, 20.0, 30.0];
737
738 for &noise_sigma in &noise_levels {
739 let params = TestImageParams {
740 family: TagFamily::AprilTag36h11,
741 id: 0,
742 tag_size,
743 canvas_size,
744 noise_sigma,
745 ..Default::default()
746 };
747
748 let (data, corners) = generate_test_image_with_params(¶ms);
749 let img = ImageView::new(&data, canvas_size, canvas_size, canvas_size).unwrap();
750
751 let engine = ThresholdEngine::new();
752 let arena = Bump::new();
753 let stats = engine.compute_tile_stats(&arena, &img);
754 let mut binary = vec![0u8; canvas_size * canvas_size];
755 engine.apply_threshold(&arena, &img, &stats, &mut binary);
756
757 let integrity = measure_border_integrity(&binary, canvas_size, &corners);
758
759 if noise_sigma <= 15.0 {
761 assert!(
762 integrity > 0.45,
763 "Noise σ={:.1}: integrity {:.1}% too low",
764 noise_sigma,
765 integrity * 100.0
766 );
767 }
768
769 println!(
770 "Noise σ={:>4.1}: integrity = {:.1}%",
771 noise_sigma,
772 integrity * 100.0
773 );
774 }
775 }
776
777 proptest! {
778 #[test]
781 fn test_threshold_combined_conditions_no_panic(
782 tag_size in 32_usize..200, brightness in -40_i16..40,
784 contrast in 0.6_f32..1.4,
785 noise in 0.0_f32..20.0
786 ) {
787 let canvas_size = 320;
788
789 if tag_size >= canvas_size - 40 {
791 return Ok(());
792 }
793
794 let params = TestImageParams {
795 family: TagFamily::AprilTag36h11,
796 id: 0,
797 tag_size,
798 canvas_size,
799 noise_sigma: noise,
800 brightness_offset: brightness,
801 contrast_scale: contrast,
802 };
803
804 let (data, _corners) = generate_test_image_with_params(¶ms);
805 let img = ImageView::new(&data, canvas_size, canvas_size, canvas_size).unwrap();
806
807 let engine = ThresholdEngine::new();
808 let arena = Bump::new();
809 let stats = engine.compute_tile_stats(&arena, &img);
810 let mut binary = vec![0u8; canvas_size * canvas_size];
811
812 engine.apply_threshold(&arena, &img, &stats, &mut binary);
814
815 let black_count = binary.iter().filter(|&&p| p == 0).count();
817 let white_count = binary.iter().filter(|&&p| p == 255).count();
818
819 prop_assert!(black_count + white_count == binary.len(),
821 "Binary output contains non-binary values");
822
823 prop_assert!(black_count > 0, "No black pixels in output");
825 prop_assert!(white_count > 0, "No white pixels in output");
826 }
827 }
828}
829
830#[multiversion(targets = "simd")]
831fn compute_min_max_simd(data: &[u8]) -> (u8, u8) {
832 let mut min = 255u8;
833 let mut max = 0u8;
834 for &b in data {
835 min = min.min(b);
836 max = max.max(b);
837 }
838 (min, max)
839}
840
841#[allow(clippy::needless_range_loop, clippy::items_after_statements)]
857#[allow(dead_code)]
858pub fn compute_integral_image(img: &ImageView, integral: &mut [u64]) {
859 let w = img.width;
860 let h = img.height;
861 let stride = w + 1;
862
863 for x in 0..stride {
865 integral[x] = 0;
866 }
867
868 integral
873 .par_chunks_exact_mut(stride)
874 .enumerate()
875 .skip(1)
876 .for_each(|(y_idx, row)| {
877 let y = y_idx - 1;
878 let src_row = img.get_row(y);
879 let mut sum = 0u64;
880 for x in 0..w {
882 sum += u64::from(src_row[x]);
883 row[x + 1] = sum;
884 }
885 });
886
887 const BLOCK_SIZE: usize = 128;
890 let num_blocks = stride.div_ceil(BLOCK_SIZE);
891
892 (0..num_blocks).into_par_iter().for_each(|b| {
893 let start_x = b * BLOCK_SIZE;
894 let end_x = (start_x + BLOCK_SIZE).min(stride);
895
896 let mut col_sums = [0u64; BLOCK_SIZE];
900
901 unsafe {
903 let base_ptr = integral.as_ptr().cast_mut();
904 for y in 1..=h {
905 let row_ptr = base_ptr.add(y * stride + start_x);
906 for (i, val) in col_sums.iter_mut().enumerate().take(end_x - start_x) {
907 let old_val = *row_ptr.add(i);
908 let new_sum = old_val + *val;
909 *row_ptr.add(i) = new_sum;
910 *val = new_sum;
911 }
912 }
913 }
914 });
915}
916
917#[multiversion(targets = "simd")]
921#[multiversion(targets(
925 "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
926 "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
927 "aarch64+neon"
928))]
929#[tracing::instrument(skip_all, name = "pipeline::threshold_integral")]
930pub fn adaptive_threshold_integral(
931 img: &ImageView,
932 integral: &[u64],
933 output: &mut [u8],
934 radius: usize,
935 c: i16,
936) {
937 let w = img.width;
938 let h = img.height;
939 let stride = w + 1;
940
941 let side = (2 * radius + 1) as u32;
945 let area = side * side;
946 let inv_area_fixed = ((1u64 << 31) / u64::from(area)) as u32;
947
948 (0..h).into_par_iter().for_each(|y| {
949 let y_offset = y * w;
950 let src_row = img.get_row(y);
951
952 let dst_row = unsafe {
954 let ptr = output.as_ptr().cast_mut();
955 std::slice::from_raw_parts_mut(ptr.add(y_offset), w)
956 };
957
958 let y0 = y.saturating_sub(radius);
959 let y1 = (y + radius + 1).min(h);
960
961 let x_start = radius;
963 let x_end = w.saturating_sub(radius + 1);
964
965 for x in 0..x_start.min(w) {
967 let x0 = 0; let x1 = (x + radius + 1).min(w);
969 let actual_area = (x1 - x0) * (y1 - y0);
970
971 let i00 = integral[y0 * stride + x0];
972 let i01 = integral[y0 * stride + x1];
973 let i10 = integral[y1 * stride + x0];
974 let i11 = integral[y1 * stride + x1];
975
976 let sum = (i11 + i00) - (i01 + i10);
977 let mean = (sum / actual_area as u64) as i16;
978 let threshold = (mean - c).max(0) as u8;
979 dst_row[x] = if src_row[x] < threshold { 0 } else { 255 };
980 }
981
982 if x_end > x_start && y >= radius && y + radius < h {
984 let row00 = &integral[y0 * stride + (x_start - radius)..];
985 let row01 = &integral[y0 * stride + (x_start + radius + 1)..];
986 let row10 = &integral[y1 * stride + (x_start - radius)..];
987 let row11 = &integral[y1 * stride + (x_start + radius + 1)..];
988
989 let interior_src = &src_row[x_start..x_end];
990 let interior_dst = &mut dst_row[x_start..x_end];
991
992 for i in 0..(x_end - x_start) {
993 let sum = (row11[i] + row00[i]) - (row01[i] + row10[i]);
994 let mean = ((sum * u64::from(inv_area_fixed)) >> 31) as i16;
996 let threshold = (mean - c).max(0) as u8;
997 interior_dst[i] = if interior_src[i] < threshold { 0 } else { 255 };
998 }
999 } else if x_end > x_start {
1000 for x in x_start..x_end {
1002 let x0 = x - radius;
1003 let x1 = x + radius + 1;
1004 let actual_area = (x1 - x0) * (y1 - y0);
1005
1006 let i00 = integral[y0 * stride + x0];
1007 let i01 = integral[y0 * stride + x1];
1008 let i10 = integral[y1 * stride + x0];
1009 let i11 = integral[y1 * stride + x1];
1010
1011 let sum = (i11 + i00) - (i01 + i10);
1012 let mean = (sum / actual_area as u64) as i16;
1013 let threshold = (mean - c).max(0) as u8;
1014 dst_row[x] = if src_row[x] < threshold { 0 } else { 255 };
1015 }
1016 }
1017
1018 for x in x_end.max(x_start)..w {
1020 let x0 = x.saturating_sub(radius);
1021 let x1 = w; let actual_area = (x1 - x0) * (y1 - y0);
1023
1024 let i00 = integral[y0 * stride + x0];
1025 let i01 = integral[y0 * stride + x1];
1026 let i10 = integral[y1 * stride + x0];
1027 let i11 = integral[y1 * stride + x1];
1028
1029 let sum = (i11 + i00) - (i01 + i10);
1030 let mean = (sum / actual_area as u64) as i16;
1031 let threshold = (mean - c).max(0) as u8;
1032 dst_row[x] = if src_row[x] < threshold { 0 } else { 255 };
1033 }
1034 });
1035}
1036
1037#[allow(dead_code)]
1044pub(crate) fn apply_adaptive_threshold_fast(img: &ImageView, output: &mut [u8]) {
1045 apply_adaptive_threshold_with_params(img, output, 6, 3);
1047}
1048
1049#[allow(dead_code)]
1051pub(crate) fn apply_adaptive_threshold_with_params(
1052 img: &ImageView,
1053 output: &mut [u8],
1054 radius: usize,
1055 c: i16,
1056) {
1057 let mut integral = vec![0u64; (img.width + 1) * (img.height + 1)];
1058 compute_integral_image(img, &mut integral);
1059 adaptive_threshold_integral(img, &integral, output, radius, c);
1060}
1061
1062#[allow(clippy::too_many_arguments)]
1066#[multiversion(targets(
1067 "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
1068 "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
1069 "aarch64+neon"
1070))]
1071#[tracing::instrument(skip_all, name = "pipeline::threshold_gradient_window")]
1072pub fn adaptive_threshold_gradient_window(
1073 img: &ImageView,
1074 gradient_map: &[u8],
1075 integral: &[u64],
1076 output: &mut [u8],
1077 min_radius: usize,
1078 max_radius: usize,
1079 gradient_threshold: u8,
1080 c: i16,
1081) {
1082 let w = img.width;
1083 let h = img.height;
1084 let stride = w + 1;
1085
1086 let mut radius_lut = [0usize; 256];
1088 let mut inv_area_lut = [0u32; 256];
1089 let grad_thresh_f32 = f32::from(gradient_threshold);
1090
1091 for g in 0..256 {
1092 let r = if g as u8 >= gradient_threshold {
1093 min_radius
1094 } else {
1095 let t = g as f32 / grad_thresh_f32;
1096 let r = max_radius as f32 * (1.0 - t) + min_radius as f32 * t;
1097 r as usize
1098 };
1099 radius_lut[g] = r;
1100 let side = (2 * r + 1) as u32;
1101 let area = side * side;
1102 inv_area_lut[g] = ((1u64 << 31) / u64::from(area)) as u32;
1103 }
1104
1105 (0..h).into_par_iter().for_each(|y| {
1108 let y_offset = y * w;
1109 let src_row = img.get_row(y);
1110
1111 let dst_row = unsafe {
1113 let ptr = output.as_ptr().cast_mut();
1114 std::slice::from_raw_parts_mut(ptr.add(y_offset), w)
1115 };
1116
1117 for x in 0..w {
1118 let grad = gradient_map[y_offset + x];
1119 let radius = radius_lut[grad as usize];
1120
1121 let y0 = y.saturating_sub(radius);
1122 let y1 = (y + radius + 1).min(h);
1123 let x0 = x.saturating_sub(radius);
1124 let x1 = (x + radius + 1).min(w);
1125
1126 let i00 = integral[y0 * stride + x0];
1127 let i01 = integral[y0 * stride + x1];
1128 let i10 = integral[y1 * stride + x0];
1129 let i11 = integral[y1 * stride + x1];
1130
1131 let sum = (i11 + i00) - (i01 + i10);
1132
1133 let mean = if x >= radius && x + radius < w && y >= radius && y + radius < h {
1135 ((sum * u64::from(inv_area_lut[grad as usize])) >> 31) as i16
1136 } else {
1137 let actual_area = (x1 - x0) * (y1 - y0);
1138 (sum / actual_area as u64) as i16
1139 };
1140
1141 let threshold = (mean - c).max(0) as u8;
1142 dst_row[x] = if src_row[x] < threshold { 0 } else { 255 };
1143 }
1144 });
1145}
1146
1147#[multiversion(targets(
1151 "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
1152 "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
1153 "aarch64+neon"
1154))]
1155#[allow(clippy::cast_sign_loss, clippy::needless_range_loop)]
1156pub(crate) fn compute_threshold_map(
1157 img: &ImageView,
1158 integral: &[u64],
1159 output: &mut [u8],
1160 radius: usize,
1161 c: i16,
1162) {
1163 let w = img.width;
1164 let h = img.height;
1165 let stride = w + 1;
1166
1167 let side = (2 * radius + 1) as u32;
1171 let area = side * side;
1172 let inv_area_fixed = ((1u64 << 31) / u64::from(area)) as u32;
1173
1174 (0..h).into_par_iter().for_each(|y| {
1175 let y_offset = y * w;
1176
1177 let dst_row = unsafe {
1179 let ptr = output.as_ptr().cast_mut();
1180 std::slice::from_raw_parts_mut(ptr.add(y_offset), w)
1181 };
1182
1183 let y0 = y.saturating_sub(radius);
1184 let y1 = (y + radius + 1).min(h);
1185
1186 let x_start = radius;
1187 let x_end = w.saturating_sub(radius + 1);
1188
1189 for x in 0..x_start.min(w) {
1191 let x0 = 0;
1192 let x1 = (x + radius + 1).min(w);
1193 let actual_area = (x1 - x0) * (y1 - y0);
1194 let sum = (integral[y1 * stride + x1] + integral[y0 * stride + x0])
1195 - (integral[y0 * stride + x1] + integral[y1 * stride + x0]);
1196 let mean = (sum / actual_area as u64) as i16;
1197 dst_row[x] = (mean - c).clamp(0, 255) as u8;
1198 }
1199
1200 if x_end > x_start && y >= radius && y + radius < h {
1202 let row00 = &integral[y0 * stride + (x_start - radius)..];
1203 let row01 = &integral[y0 * stride + (x_start + radius + 1)..];
1204 let row10 = &integral[y1 * stride + (x_start - radius)..];
1205 let row11 = &integral[y1 * stride + (x_start + radius + 1)..];
1206
1207 let interior_dst = &mut dst_row[x_start..x_end];
1208
1209 for i in 0..(x_end - x_start) {
1210 let sum = (row11[i] + row00[i]) - (row01[i] + row10[i]);
1211 let mean = ((sum * u64::from(inv_area_fixed)) >> 31) as i16;
1212 interior_dst[i] = (mean - c).clamp(0, 255) as u8;
1213 }
1214 }
1215
1216 for x in x_end.max(x_start)..w {
1218 let x0 = x.saturating_sub(radius);
1219 let x1 = w;
1220 let actual_area = (x1 - x0) * (y1 - y0);
1221 let sum = (integral[y1 * stride + x1] + integral[y0 * stride + x0])
1222 - (integral[y0 * stride + x1] + integral[y1 * stride + x0]);
1223 let mean = (sum / actual_area as u64) as i16;
1224 dst_row[x] = (mean - c).clamp(0, 255) as u8;
1225 }
1226 });
1227}