1#![allow(unsafe_code, clippy::cast_sign_loss)]
11use crate::config::DetectorConfig;
12use crate::image::ImageView;
13use bumpalo::Bump;
14use bumpalo::collections::Vec as BumpVec;
15use multiversion::multiversion;
16use rayon::prelude::*;
17
18#[derive(Clone, Copy, Debug, Default)]
20pub struct TileStats {
21 pub min: u8,
23 pub max: u8,
25}
26
27pub struct ThresholdEngine {
29 pub tile_size: usize,
31 pub min_range: u8,
33}
34
35impl Default for ThresholdEngine {
36 fn default() -> Self {
37 Self::new()
38 }
39}
40
41impl ThresholdEngine {
42 #[must_use]
44 pub fn new() -> Self {
45 Self {
46 tile_size: 4, min_range: 5, }
49 }
50
51 #[must_use]
53 pub fn from_config(config: &DetectorConfig) -> Self {
54 Self {
55 tile_size: config.threshold_tile_size,
56 min_range: config.threshold_min_range,
57 }
58 }
59
60 #[must_use]
63 pub fn compute_tile_stats<'a>(
64 &self,
65 arena: &'a Bump,
66 img: &ImageView,
67 ) -> BumpVec<'a, TileStats> {
68 let ts = self.tile_size;
69 let tiles_wide = img.width / ts;
70 let tiles_high = img.height / ts;
71 let mut stats = BumpVec::with_capacity_in(tiles_wide * tiles_high, arena);
72 stats.resize(tiles_wide * tiles_high, TileStats { min: 255, max: 0 });
73
74 stats
75 .par_chunks_mut(tiles_wide)
76 .enumerate()
77 .for_each(|(ty, stats_row)| {
78 for dy in 0..ts {
81 let py = ty * ts + dy;
82 let src_row = img.get_row(py);
83
84 compute_row_tile_stats_simd(src_row, stats_row, ts);
86 }
87 });
88 stats
89 }
90
91 #[allow(clippy::too_many_lines, clippy::needless_range_loop)]
94 pub fn apply_threshold(
95 &self,
96 arena: &Bump,
97 img: &ImageView,
98 stats: &[TileStats],
99 output: &mut [u8],
100 ) {
101 let ts = self.tile_size;
102 let tiles_wide = img.width / ts;
103 let tiles_high = img.height / ts;
104
105 let mut tile_thresholds = BumpVec::with_capacity_in(tiles_wide * tiles_high, arena);
106 tile_thresholds.resize(tiles_wide * tiles_high, 0u8);
107 let mut tile_valid = BumpVec::with_capacity_in(tiles_wide * tiles_high, arena);
108 tile_valid.resize(tiles_wide * tiles_high, 0u8);
109
110 tile_thresholds
111 .par_chunks_mut(tiles_wide)
112 .enumerate()
113 .for_each(|(ty, t_row)| {
114 let y_start = ty.saturating_sub(1);
115 let y_end = (ty + 1).min(tiles_high - 1);
116
117 for tx in 0..tiles_wide {
118 let mut nmin = 255u8;
119 let mut nmax = 0u8;
120
121 let x_start = tx.saturating_sub(1);
122 let x_end = (tx + 1).min(tiles_wide - 1);
123
124 for ny in y_start..=y_end {
125 let row_off = ny * tiles_wide;
126 for nx in x_start..=x_end {
127 let s = stats[row_off + nx];
128 if s.min < nmin {
129 nmin = s.min;
130 }
131 if s.max > nmax {
132 nmax = s.max;
133 }
134 }
135 }
136
137 let t_idx = tx;
138 let res = ((u16::from(nmin) + u16::from(nmax)) >> 1) as u8;
139
140 t_row[t_idx] = res;
143 }
144 });
145
146 for ty in 0..tiles_high {
148 for tx in 0..tiles_wide {
149 let mut nmin = 255;
150 let mut nmax = 0;
151 let y_start = ty.saturating_sub(1);
152 let y_end = (ty + 1).min(tiles_high - 1);
153 let x_start = tx.saturating_sub(1);
154 let x_end = (tx + 1).min(tiles_wide - 1);
155
156 for ny in y_start..=y_end {
157 let row_off = ny * tiles_wide;
158 for nx in x_start..=x_end {
159 let s = stats[row_off + nx];
160 if s.min < nmin {
161 nmin = s.min;
162 }
163 if s.max > nmax {
164 nmax = s.max;
165 }
166 }
167 }
168 let idx = ty * tiles_wide + tx;
169 tile_valid[idx] = if nmax.saturating_sub(nmin) < self.min_range {
170 0
171 } else {
172 255
173 };
174 }
175 }
176
177 for _ in 0..2 {
181 for ty in 0..tiles_high {
183 for tx in 0..tiles_wide {
184 let idx = ty * tiles_wide + tx;
185 if tile_valid[idx] == 0 {
186 let mut sum_thresh = 0u32;
187 let mut count = 0u32;
188
189 let y_start = ty.saturating_sub(1);
190 let y_end = (ty + 1).min(tiles_high - 1);
191 let x_start = tx.saturating_sub(1);
192 let x_end = (tx + 1).min(tiles_wide - 1);
193
194 for ny in y_start..=y_end {
195 let row_off = ny * tiles_wide;
196 for nx in x_start..=x_end {
197 let n_idx = row_off + nx;
198 if tile_valid[n_idx] > 0 {
199 sum_thresh += u32::from(tile_thresholds[n_idx]);
200 count += 1;
201 }
202 }
203 }
204
205 if count > 0 {
206 tile_thresholds[idx] = (sum_thresh / count) as u8;
207 tile_valid[idx] = 128; }
209 }
210 }
211 }
212 }
213
214 let thresholds_slice = tile_thresholds.as_slice();
215 let valid_slice = tile_valid.as_slice();
216
217 output
218 .par_chunks_mut(ts * img.width)
219 .enumerate()
220 .for_each_init(
221 || (vec![0u8; img.width], vec![0u8; img.width]),
222 |(row_thresholds, row_valid), (ty, output_tile_rows)| {
223 if ty >= tiles_high {
224 return;
225 }
226 row_thresholds.fill(0);
248 row_valid.fill(0);
249
250 for tx in 0..tiles_wide {
252 let idx = ty * tiles_wide + tx;
253 let thresh = thresholds_slice[idx];
254 let valid = valid_slice[idx];
255 for i in 0..ts {
256 row_thresholds[tx * ts + i] = thresh;
257 row_valid[tx * ts + i] = valid;
258 }
259 }
260
261 for dy in 0..ts {
262 let py = ty * ts + dy;
263 let src_row = img.get_row(py);
264 let dst_row = &mut output_tile_rows[dy * img.width..(dy + 1) * img.width];
265
266 threshold_row_simd(src_row, dst_row, row_thresholds, row_valid);
267 }
268 },
269 );
270 }
271
272 #[allow(clippy::needless_range_loop)]
277 pub fn apply_threshold_with_map(
278 &self,
279 arena: &Bump,
280 img: &ImageView,
281 stats: &[TileStats],
282 binary_output: &mut [u8],
283 threshold_output: &mut [u8],
284 ) {
285 let ts = self.tile_size;
286 let tiles_wide = img.width / ts;
287 let tiles_high = img.height / ts;
288
289 let mut tile_thresholds = BumpVec::with_capacity_in(tiles_wide * tiles_high, arena);
290 tile_thresholds.resize(tiles_wide * tiles_high, 0u8);
291 let mut tile_valid = BumpVec::with_capacity_in(tiles_wide * tiles_high, arena);
292 tile_valid.resize(tiles_wide * tiles_high, 0u8);
293
294 tile_thresholds
295 .par_chunks_mut(tiles_wide)
296 .enumerate()
297 .for_each(|(ty, t_row)| {
298 let y_start = ty.saturating_sub(1);
299 let y_end = (ty + 1).min(tiles_high - 1);
300
301 for tx in 0..tiles_wide {
302 let mut nmin = 255u8;
303 let mut nmax = 0u8;
304
305 let x_start = tx.saturating_sub(1);
306 let x_end = (tx + 1).min(tiles_wide - 1);
307
308 for ny in y_start..=y_end {
309 let row_off = ny * tiles_wide;
310 for nx in x_start..=x_end {
311 let s = stats[row_off + nx];
312 if s.min < nmin {
313 nmin = s.min;
314 }
315 if s.max > nmax {
316 nmax = s.max;
317 }
318 }
319 }
320
321 t_row[tx] = ((u16::from(nmin) + u16::from(nmax)) >> 1) as u8;
322 }
323 });
324
325 for ty in 0..tiles_high {
327 for tx in 0..tiles_wide {
328 let mut nmin = 255;
329 let mut nmax = 0;
330 let y_start = ty.saturating_sub(1);
331 let y_end = (ty + 1).min(tiles_high - 1);
332 let x_start = tx.saturating_sub(1);
333 let x_end = (tx + 1).min(tiles_wide - 1);
334
335 for ny in y_start..=y_end {
336 let row_off = ny * tiles_wide;
337 for nx in x_start..=x_end {
338 let s = stats[row_off + nx];
339 if s.min < nmin {
340 nmin = s.min;
341 }
342 if s.max > nmax {
343 nmax = s.max;
344 }
345 }
346 }
347 let idx = ty * tiles_wide + tx;
348 tile_valid[idx] = if nmax.saturating_sub(nmin) < self.min_range {
349 0
350 } else {
351 255
352 };
353 }
354 }
355
356 let thresholds_slice = tile_thresholds.as_slice();
394 let valid_slice = tile_valid.as_slice();
395
396 binary_output
397 .par_chunks_mut(ts * img.width)
398 .enumerate()
399 .for_each_init(
400 || (vec![0u8; img.width], vec![0u8; img.width]),
401 |(row_thresholds, row_valid), (ty, bin_tile_rows)| {
402 if ty >= tiles_high {
403 return;
404 }
405 let thresh_tile_rows = unsafe {
407 let ptr = threshold_output.as_ptr().cast_mut();
408 std::slice::from_raw_parts_mut(ptr.add(ty * ts * img.width), ts * img.width)
409 };
410
411 row_thresholds.fill(0);
412 row_valid.fill(0);
413
414 for tx in 0..tiles_wide {
415 let idx = ty * tiles_wide + tx;
416 let thresh = thresholds_slice[idx];
417 let valid = valid_slice[idx];
418 for i in 0..ts {
419 row_thresholds[tx * ts + i] = thresh;
420 row_valid[tx * ts + i] = valid;
421 }
422 }
423
424 for dy in 0..ts {
425 let py = ty * ts + dy;
426 let src_row = img.get_row(py);
427
428 let bin_row = &mut bin_tile_rows[dy * img.width..(dy + 1) * img.width];
430 threshold_row_simd(src_row, bin_row, row_thresholds, row_valid);
431
432 thresh_tile_rows[dy * img.width..(dy + 1) * img.width]
434 .copy_from_slice(row_thresholds);
435 }
436 },
437 );
438 }
439}
440
441#[multiversion(targets(
442 "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
443 "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
444 "aarch64+neon"
445))]
446fn compute_row_tile_stats_simd(src_row: &[u8], stats: &mut [TileStats], tile_size: usize) {
447 let chunks = src_row.chunks_exact(tile_size);
448 for (chunk, stat) in chunks.zip(stats.iter_mut()) {
449 let mut rmin = stat.min;
450 let mut rmax = stat.max;
451
452 for &p in chunk {
454 rmin = rmin.min(p);
455 rmax = rmax.max(p);
456 }
457
458 stat.min = rmin;
459 stat.max = rmax;
460 }
461}
462
463#[multiversion(targets(
464 "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
465 "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
466 "aarch64+neon"
467))]
468fn threshold_row_simd(src: &[u8], dst: &mut [u8], thresholds: &[u8], valid_mask: &[u8]) {
469 let len = src.len();
470 let src_chunks = src.chunks_exact(16);
472 let dst_chunks = dst.chunks_exact_mut(16);
473 let thresh_chunks = thresholds.chunks_exact(16);
474 let mask_chunks = valid_mask.chunks_exact(16);
475
476 for (((s_c, d_c), t_c), m_c) in src_chunks
477 .zip(dst_chunks)
478 .zip(thresh_chunks)
479 .zip(mask_chunks)
480 {
481 for i in 0..16 {
482 let s = s_c[i];
483 let t = t_c[i];
484 let v = m_c[i];
485 d_c[i] = if v > 0 {
487 if s < t { 0 } else { 255 }
488 } else {
489 255
490 };
491 }
492 }
493
494 let processed = (len / 16) * 16;
496 for i in processed..len {
497 let s = src[i];
498 let t = thresholds[i];
499 let v = valid_mask[i];
500 dst[i] = if v > 0 {
501 if s < t { 0 } else { 255 }
502 } else {
503 255
504 };
505 }
506}
507
508#[cfg(test)]
509#[allow(
510 clippy::unwrap_used,
511 clippy::naive_bytecount,
512 clippy::cast_possible_truncation,
513 clippy::cast_sign_loss
514)]
515mod tests {
516 use super::*;
517 use proptest::prelude::*;
518
519 proptest! {
520 #[test]
521 fn test_threshold_invariants(data in prop::collection::vec(0..=255u8, 16)) {
522 let mut min = 255u8;
523 let mut max = 0u8;
524 for &b in &data {
525 if b < min { min = b; }
526 if b > max { max = b; }
527 }
528 let (rmin, rmax) = compute_min_max_simd(&data);
529 assert_eq!(rmin, min);
530 assert_eq!(rmax, max);
531 }
532
533 #[test]
534 fn test_binarization_invariants(src in prop::collection::vec(0..=255u8, 16), thresh in 0..=255u8) {
535 let mut dst = vec![0u8; 16];
536 let valid = vec![255u8; 16];
537 threshold_row_simd(&src, &mut dst, &[thresh; 16], &valid);
538
539 for (i, &s) in src.iter().enumerate() {
540 if s >= thresh {
541 assert_eq!(dst[i], 255);
542 } else {
543 assert_eq!(dst[i], 0);
544 }
545 }
546
547 let invalid = vec![0u8; 16];
549 threshold_row_simd(&src, &mut dst, &[thresh; 16], &invalid);
550 for d in dst {
551 assert_eq!(d, 255);
552 }
553 }
554 }
555
556 #[test]
557 fn test_threshold_engine_e2e() {
558 let width = 16;
559 let height = 16;
560 let mut data = vec![128u8; width * height];
561 for y in 4..12 {
563 for x in 4..12 {
564 data[y * width + x] = 50;
565 }
566 }
567 for y in 0..height {
568 for x in 0..width {
569 if !(2..=14).contains(&x) || !(2..=14).contains(&y) {
570 data[y * width + x] = 200;
571 }
572 }
573 }
574
575 let img = ImageView::new(&data, width, height, width).unwrap();
576 let engine = ThresholdEngine::new();
577 let arena = Bump::new();
578 let stats = engine.compute_tile_stats(&arena, &img);
579 let mut output = vec![0u8; width * height];
580 engine.apply_threshold(&arena, &img, &stats, &mut output);
581
582 assert_eq!(output[8 * width + 8], 0);
584 assert_eq!(output[width + 1], 255);
586 }
587
588 #[test]
589 fn test_threshold_with_decimation() {
590 let width = 32;
591 let height = 32;
592 let mut data = vec![200u8; width * height];
593 for y in 8..24 {
595 for x in 8..24 {
596 data[y * width + x] = 50;
597 }
598 }
599
600 let img = ImageView::new(&data, width, height, width).unwrap();
601
602 let mut decimated_data = vec![0u8; 16 * 16];
604 let decimated_img = img
605 .decimate_to(2, &mut decimated_data)
606 .expect("decimation failed");
607
608 assert_eq!(decimated_img.width, 16);
609 assert_eq!(decimated_img.height, 16);
610 let arena = Bump::new();
611 let engine = ThresholdEngine::new();
612 let stats = engine.compute_tile_stats(&arena, &decimated_img);
613 let mut output = vec![0u8; 16 * 16];
614 engine.apply_threshold(&arena, &decimated_img, &stats, &mut output);
615
616 assert_eq!(output[4 * 16 + 4], 0);
618 assert_eq!(output[0], 255);
620 }
621
622 use crate::config::TagFamily;
627 use crate::test_utils::{
628 TestImageParams, generate_test_image_with_params, measure_border_integrity,
629 };
630
631 #[test]
634 fn test_threshold_preserves_tag_structure_at_varying_sizes() {
635 let canvas_size = 640;
636 let tag_sizes = [32, 48, 64, 100, 150, 200, 300];
638
639 for tag_size in tag_sizes {
640 let params = TestImageParams {
641 family: TagFamily::AprilTag36h11,
642 id: 0,
643 tag_size,
644 canvas_size,
645 ..Default::default()
646 };
647
648 let (data, corners) = generate_test_image_with_params(¶ms);
649 let img = ImageView::new(&data, canvas_size, canvas_size, canvas_size).unwrap();
650
651 let engine = ThresholdEngine::new();
652 let arena = Bump::new();
653 let stats = engine.compute_tile_stats(&arena, &img);
654 let mut binary = vec![0u8; canvas_size * canvas_size];
655 engine.apply_threshold(&arena, &img, &stats, &mut binary);
656
657 let integrity = measure_border_integrity(&binary, canvas_size, &corners);
658
659 assert!(
661 integrity > 0.5,
662 "Tag size {} failed: border integrity = {:.2}% (expected >50%)",
663 tag_size,
664 integrity * 100.0
665 );
666
667 println!(
668 "Tag size {:>3}px: border integrity = {:.1}%",
669 tag_size,
670 integrity * 100.0
671 );
672 }
673 }
674
675 #[test]
677 fn test_threshold_robustness_brightness_contrast() {
678 let canvas_size = 320;
679 let tag_size = 120;
680 let brightness_offsets = [-50, -25, 0, 25, 50];
681 let contrast_scales = [0.50, 0.75, 1.0, 1.25, 1.50];
682
683 for &brightness in &brightness_offsets {
684 for &contrast in &contrast_scales {
685 let params = TestImageParams {
686 family: TagFamily::AprilTag36h11,
687 id: 0,
688 tag_size,
689 canvas_size,
690 brightness_offset: brightness,
691 contrast_scale: contrast,
692 ..Default::default()
693 };
694
695 let (data, corners) = generate_test_image_with_params(¶ms);
696 let img = ImageView::new(&data, canvas_size, canvas_size, canvas_size).unwrap();
697
698 let engine = ThresholdEngine::new();
699 let arena = Bump::new();
700 let stats = engine.compute_tile_stats(&arena, &img);
701 let mut binary = vec![0u8; canvas_size * canvas_size];
702 engine.apply_threshold(&arena, &img, &stats, &mut binary);
703
704 let integrity = measure_border_integrity(&binary, canvas_size, &corners);
705
706 let is_moderate = brightness.abs() <= 25 && contrast >= 0.75;
708 if is_moderate {
709 assert!(
710 integrity > 0.4,
711 "Brightness {}, Contrast {:.2}: integrity {:.1}% too low",
712 brightness,
713 contrast,
714 integrity * 100.0
715 );
716 }
717
718 println!(
719 "Brightness {:>3}, Contrast {:.2}: integrity = {:.1}%",
720 brightness,
721 contrast,
722 integrity * 100.0
723 );
724 }
725 }
726 }
727
728 #[test]
730 fn test_threshold_robustness_noise() {
731 let canvas_size = 320;
732 let tag_size = 120;
733 let noise_levels = [0.0, 5.0, 10.0, 15.0, 20.0, 30.0];
734
735 for &noise_sigma in &noise_levels {
736 let params = TestImageParams {
737 family: TagFamily::AprilTag36h11,
738 id: 0,
739 tag_size,
740 canvas_size,
741 noise_sigma,
742 ..Default::default()
743 };
744
745 let (data, corners) = generate_test_image_with_params(¶ms);
746 let img = ImageView::new(&data, canvas_size, canvas_size, canvas_size).unwrap();
747
748 let engine = ThresholdEngine::new();
749 let arena = Bump::new();
750 let stats = engine.compute_tile_stats(&arena, &img);
751 let mut binary = vec![0u8; canvas_size * canvas_size];
752 engine.apply_threshold(&arena, &img, &stats, &mut binary);
753
754 let integrity = measure_border_integrity(&binary, canvas_size, &corners);
755
756 if noise_sigma <= 15.0 {
758 assert!(
759 integrity > 0.45,
760 "Noise σ={:.1}: integrity {:.1}% too low",
761 noise_sigma,
762 integrity * 100.0
763 );
764 }
765
766 println!(
767 "Noise σ={:>4.1}: integrity = {:.1}%",
768 noise_sigma,
769 integrity * 100.0
770 );
771 }
772 }
773
774 proptest! {
775 #[test]
778 fn test_threshold_combined_conditions_no_panic(
779 tag_size in 32_usize..200, brightness in -40_i16..40,
781 contrast in 0.6_f32..1.4,
782 noise in 0.0_f32..20.0
783 ) {
784 let canvas_size = 320;
785
786 if tag_size >= canvas_size - 40 {
788 return Ok(());
789 }
790
791 let params = TestImageParams {
792 family: TagFamily::AprilTag36h11,
793 id: 0,
794 tag_size,
795 canvas_size,
796 noise_sigma: noise,
797 brightness_offset: brightness,
798 contrast_scale: contrast,
799 };
800
801 let (data, _corners) = generate_test_image_with_params(¶ms);
802 let img = ImageView::new(&data, canvas_size, canvas_size, canvas_size).unwrap();
803
804 let engine = ThresholdEngine::new();
805 let arena = Bump::new();
806 let stats = engine.compute_tile_stats(&arena, &img);
807 let mut binary = vec![0u8; canvas_size * canvas_size];
808
809 engine.apply_threshold(&arena, &img, &stats, &mut binary);
811
812 let black_count = binary.iter().filter(|&&p| p == 0).count();
814 let white_count = binary.iter().filter(|&&p| p == 255).count();
815
816 prop_assert!(black_count + white_count == binary.len(),
818 "Binary output contains non-binary values");
819
820 prop_assert!(black_count > 0, "No black pixels in output");
822 prop_assert!(white_count > 0, "No white pixels in output");
823 }
824 }
825}
826
827#[multiversion(targets = "simd")]
828fn compute_min_max_simd(data: &[u8]) -> (u8, u8) {
829 let mut min = 255u8;
830 let mut max = 0u8;
831 for &b in data {
832 min = min.min(b);
833 max = max.max(b);
834 }
835 (min, max)
836}
837
838#[allow(clippy::needless_range_loop, clippy::items_after_statements)]
854pub fn compute_integral_image(img: &ImageView, integral: &mut [u64]) {
855 let w = img.width;
856 let h = img.height;
857 let stride = w + 1;
858
859 for x in 0..stride {
861 integral[x] = 0;
862 }
863
864 integral
869 .par_chunks_exact_mut(stride)
870 .enumerate()
871 .skip(1)
872 .for_each(|(y_idx, row)| {
873 let y = y_idx - 1;
874 let src_row = img.get_row(y);
875 let mut sum = 0u64;
876 for x in 0..w {
878 sum += u64::from(src_row[x]);
879 row[x + 1] = sum;
880 }
881 });
882
883 const BLOCK_SIZE: usize = 128;
886 let num_blocks = stride.div_ceil(BLOCK_SIZE);
887
888 (0..num_blocks).into_par_iter().for_each(|b| {
889 let start_x = b * BLOCK_SIZE;
890 let end_x = (start_x + BLOCK_SIZE).min(stride);
891
892 let mut col_sums = [0u64; BLOCK_SIZE];
896
897 unsafe {
899 let base_ptr = integral.as_ptr().cast_mut();
900 for y in 1..=h {
901 let row_ptr = base_ptr.add(y * stride + start_x);
902 for (i, val) in col_sums.iter_mut().enumerate().take(end_x - start_x) {
903 let old_val = *row_ptr.add(i);
904 let new_sum = old_val + *val;
905 *row_ptr.add(i) = new_sum;
906 *val = new_sum;
907 }
908 }
909 }
910 });
911}
912
913#[multiversion(targets = "simd")]
917#[multiversion(targets(
921 "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
922 "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
923 "aarch64+neon"
924))]
925pub fn adaptive_threshold_integral(
926 img: &ImageView,
927 integral: &[u64],
928 output: &mut [u8],
929 radius: usize,
930 c: i16,
931) {
932 let w = img.width;
933 let h = img.height;
934 let stride = w + 1;
935
936 let side = (2 * radius + 1) as u32;
940 let area = side * side;
941 let inv_area_fixed = ((1u64 << 31) / u64::from(area)) as u32;
942
943 (0..h).into_par_iter().for_each(|y| {
944 let y_offset = y * w;
945 let src_row = img.get_row(y);
946
947 let dst_row = unsafe {
949 let ptr = output.as_ptr().cast_mut();
950 std::slice::from_raw_parts_mut(ptr.add(y_offset), w)
951 };
952
953 let y0 = y.saturating_sub(radius);
954 let y1 = (y + radius + 1).min(h);
955
956 let x_start = radius;
958 let x_end = w.saturating_sub(radius + 1);
959
960 for x in 0..x_start.min(w) {
962 let x0 = 0; let x1 = (x + radius + 1).min(w);
964 let actual_area = (x1 - x0) * (y1 - y0);
965
966 let i00 = integral[y0 * stride + x0];
967 let i01 = integral[y0 * stride + x1];
968 let i10 = integral[y1 * stride + x0];
969 let i11 = integral[y1 * stride + x1];
970
971 let sum = (i11 + i00) - (i01 + i10);
972 let mean = (sum / actual_area as u64) as i16;
973 let threshold = (mean - c).max(0) as u8;
974 dst_row[x] = if src_row[x] < threshold { 0 } else { 255 };
975 }
976
977 if x_end > x_start && y >= radius && y + radius < h {
979 let row00 = &integral[y0 * stride + (x_start - radius)..];
980 let row01 = &integral[y0 * stride + (x_start + radius + 1)..];
981 let row10 = &integral[y1 * stride + (x_start - radius)..];
982 let row11 = &integral[y1 * stride + (x_start + radius + 1)..];
983
984 let interior_src = &src_row[x_start..x_end];
985 let interior_dst = &mut dst_row[x_start..x_end];
986
987 for i in 0..(x_end - x_start) {
988 let sum = (row11[i] + row00[i]) - (row01[i] + row10[i]);
989 let mean = ((sum * u64::from(inv_area_fixed)) >> 31) as i16;
991 let threshold = (mean - c).max(0) as u8;
992 interior_dst[i] = if interior_src[i] < threshold { 0 } else { 255 };
993 }
994 } else if x_end > x_start {
995 for x in x_start..x_end {
997 let x0 = x - radius;
998 let x1 = x + radius + 1;
999 let actual_area = (x1 - x0) * (y1 - y0);
1000
1001 let i00 = integral[y0 * stride + x0];
1002 let i01 = integral[y0 * stride + x1];
1003 let i10 = integral[y1 * stride + x0];
1004 let i11 = integral[y1 * stride + x1];
1005
1006 let sum = (i11 + i00) - (i01 + i10);
1007 let mean = (sum / actual_area as u64) as i16;
1008 let threshold = (mean - c).max(0) as u8;
1009 dst_row[x] = if src_row[x] < threshold { 0 } else { 255 };
1010 }
1011 }
1012
1013 for x in x_end.max(x_start)..w {
1015 let x0 = x.saturating_sub(radius);
1016 let x1 = w; let actual_area = (x1 - x0) * (y1 - y0);
1018
1019 let i00 = integral[y0 * stride + x0];
1020 let i01 = integral[y0 * stride + x1];
1021 let i10 = integral[y1 * stride + x0];
1022 let i11 = integral[y1 * stride + x1];
1023
1024 let sum = (i11 + i00) - (i01 + i10);
1025 let mean = (sum / actual_area as u64) as i16;
1026 let threshold = (mean - c).max(0) as u8;
1027 dst_row[x] = if src_row[x] < threshold { 0 } else { 255 };
1028 }
1029 });
1030}
1031
1032pub fn apply_adaptive_threshold_fast(img: &ImageView, output: &mut [u8]) {
1039 apply_adaptive_threshold_with_params(img, output, 6, 3);
1041}
1042
1043pub fn apply_adaptive_threshold_with_params(
1045 img: &ImageView,
1046 output: &mut [u8],
1047 radius: usize,
1048 c: i16,
1049) {
1050 let mut integral = vec![0u64; (img.width + 1) * (img.height + 1)];
1051 compute_integral_image(img, &mut integral);
1052 adaptive_threshold_integral(img, &integral, output, radius, c);
1053}
1054
1055#[allow(clippy::too_many_arguments)]
1059#[multiversion(targets(
1060 "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
1061 "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
1062 "aarch64+neon"
1063))]
1064pub fn adaptive_threshold_gradient_window(
1065 img: &ImageView,
1066 gradient_map: &[u8],
1067 integral: &[u64],
1068 output: &mut [u8],
1069 min_radius: usize,
1070 max_radius: usize,
1071 gradient_threshold: u8,
1072 c: i16,
1073) {
1074 let w = img.width;
1075 let h = img.height;
1076 let stride = w + 1;
1077
1078 let mut radius_lut = [0usize; 256];
1080 let mut inv_area_lut = [0u32; 256];
1081 let grad_thresh_f32 = f32::from(gradient_threshold);
1082
1083 for g in 0..256 {
1084 let r = if g as u8 >= gradient_threshold {
1085 min_radius
1086 } else {
1087 let t = g as f32 / grad_thresh_f32;
1088 let r = max_radius as f32 * (1.0 - t) + min_radius as f32 * t;
1089 r as usize
1090 };
1091 radius_lut[g] = r;
1092 let side = (2 * r + 1) as u32;
1093 let area = side * side;
1094 inv_area_lut[g] = ((1u64 << 31) / u64::from(area)) as u32;
1095 }
1096
1097 (0..h).into_par_iter().for_each(|y| {
1100 let y_offset = y * w;
1101 let src_row = img.get_row(y);
1102
1103 let dst_row = unsafe {
1105 let ptr = output.as_ptr().cast_mut();
1106 std::slice::from_raw_parts_mut(ptr.add(y_offset), w)
1107 };
1108
1109 for x in 0..w {
1110 let grad = gradient_map[y_offset + x];
1111 let radius = radius_lut[grad as usize];
1112
1113 let y0 = y.saturating_sub(radius);
1114 let y1 = (y + radius + 1).min(h);
1115 let x0 = x.saturating_sub(radius);
1116 let x1 = (x + radius + 1).min(w);
1117
1118 let i00 = integral[y0 * stride + x0];
1119 let i01 = integral[y0 * stride + x1];
1120 let i10 = integral[y1 * stride + x0];
1121 let i11 = integral[y1 * stride + x1];
1122
1123 let sum = (i11 + i00) - (i01 + i10);
1124
1125 let mean = if x >= radius && x + radius < w && y >= radius && y + radius < h {
1127 ((sum * u64::from(inv_area_lut[grad as usize])) >> 31) as i16
1128 } else {
1129 let actual_area = (x1 - x0) * (y1 - y0);
1130 (sum / actual_area as u64) as i16
1131 };
1132
1133 let threshold = (mean - c).max(0) as u8;
1134 dst_row[x] = if src_row[x] < threshold { 0 } else { 255 };
1135 }
1136 });
1137}
1138
1139#[multiversion(targets(
1143 "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
1144 "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
1145 "aarch64+neon"
1146))]
1147#[allow(clippy::cast_sign_loss, clippy::needless_range_loop)]
1148pub fn compute_threshold_map(
1149 img: &ImageView,
1150 integral: &[u64],
1151 output: &mut [u8],
1152 radius: usize,
1153 c: i16,
1154) {
1155 let w = img.width;
1156 let h = img.height;
1157 let stride = w + 1;
1158
1159 let side = (2 * radius + 1) as u32;
1163 let area = side * side;
1164 let inv_area_fixed = ((1u64 << 31) / u64::from(area)) as u32;
1165
1166 (0..h).into_par_iter().for_each(|y| {
1167 let y_offset = y * w;
1168
1169 let dst_row = unsafe {
1171 let ptr = output.as_ptr().cast_mut();
1172 std::slice::from_raw_parts_mut(ptr.add(y_offset), w)
1173 };
1174
1175 let y0 = y.saturating_sub(radius);
1176 let y1 = (y + radius + 1).min(h);
1177
1178 let x_start = radius;
1179 let x_end = w.saturating_sub(radius + 1);
1180
1181 for x in 0..x_start.min(w) {
1183 let x0 = 0;
1184 let x1 = (x + radius + 1).min(w);
1185 let actual_area = (x1 - x0) * (y1 - y0);
1186 let sum = (integral[y1 * stride + x1] + integral[y0 * stride + x0])
1187 - (integral[y0 * stride + x1] + integral[y1 * stride + x0]);
1188 let mean = (sum / actual_area as u64) as i16;
1189 dst_row[x] = (mean - c).clamp(0, 255) as u8;
1190 }
1191
1192 if x_end > x_start && y >= radius && y + radius < h {
1194 let row00 = &integral[y0 * stride + (x_start - radius)..];
1195 let row01 = &integral[y0 * stride + (x_start + radius + 1)..];
1196 let row10 = &integral[y1 * stride + (x_start - radius)..];
1197 let row11 = &integral[y1 * stride + (x_start + radius + 1)..];
1198
1199 let interior_dst = &mut dst_row[x_start..x_end];
1200
1201 for i in 0..(x_end - x_start) {
1202 let sum = (row11[i] + row00[i]) - (row01[i] + row10[i]);
1203 let mean = ((sum * u64::from(inv_area_fixed)) >> 31) as i16;
1204 interior_dst[i] = (mean - c).clamp(0, 255) as u8;
1205 }
1206 }
1207
1208 for x in x_end.max(x_start)..w {
1210 let x0 = x.saturating_sub(radius);
1211 let x1 = w;
1212 let actual_area = (x1 - x0) * (y1 - y0);
1213 let sum = (integral[y1 * stride + x1] + integral[y0 * stride + x0])
1214 - (integral[y0 * stride + x1] + integral[y1 * stride + x0]);
1215 let mean = (sum / actual_area as u64) as i16;
1216 dst_row[x] = (mean - c).clamp(0, 255) as u8;
1217 }
1218 });
1219}