1#[derive(Debug, Clone)]
42pub struct ARImageProcInfo {
43 pub image_x: i32,
45 pub image_y: i32,
47 pub image2: Option<Vec<u8>>,
49 pub image_temp_u16: Option<Vec<u16>>,
51 pub hist_bins: [u32; 256],
53 pub cdf_bins: [u32; 256],
55 pub min: u8,
57 pub max: u8,
59}
60
61impl ARImageProcInfo {
62 pub fn new(xsize: i32, ysize: i32) -> Self {
64 Self {
65 image_x: xsize,
66 image_y: ysize,
67 image2: None,
68 image_temp_u16: None,
69 hist_bins: [0; 256],
70 cdf_bins: [0; 256],
71 min: 0,
72 max: 0,
73 }
74 }
75
76 pub fn luma_hist(&mut self, data: &[u8]) -> Result<(), &'static str> {
78 let expected_size = (self.image_x * self.image_y) as usize;
79 if data.len() < expected_size {
80 return Err("Data array is smaller than the specified image dimensions");
81 }
82
83 self.hist_bins.fill(0);
84 for &pixel in data.iter().take(expected_size) {
85 self.hist_bins[pixel as usize] += 1;
86 }
87
88 Ok(())
89 }
90
91 pub fn luma_hist_and_cdf(&mut self, data: &[u8]) -> Result<(), &'static str> {
93 self.luma_hist(data)?;
94
95 let mut cdf_current = 0;
96 for i in 0..=255 {
97 self.cdf_bins[i] = cdf_current + self.hist_bins[i];
98 cdf_current = self.cdf_bins[i];
99 }
100
101 Ok(())
102 }
103
104 pub fn luma_hist_and_cdf_and_percentile(&mut self, data: &[u8], percentile: f32) -> Result<u8, &'static str> {
106 if !(0.0..=1.0).contains(&percentile) {
107 return Err("Percentile must be between 0.0 and 1.0");
108 }
109
110 self.luma_hist_and_cdf(data)?;
111
112 let count = (self.image_x * self.image_y) as f32;
113 let required_cd = (count * percentile) as u32;
114
115 let mut i = 0;
116 while i < 256 && self.cdf_bins[i] < required_cd {
117 i += 1;
118 }
119
120 let mut j = i;
121 while j < 256 && self.cdf_bins[j] == required_cd {
122 j += 1;
123 }
124
125 Ok(((i + j) / 2) as u8)
126 }
127
128 pub fn luma_hist_and_cdf_and_median(&mut self, data: &[u8]) -> Result<u8, &'static str> {
130 self.luma_hist_and_cdf_and_percentile(data, 0.5)
131 }
132
133 pub fn luma_hist_and_otsu(&mut self, data: &[u8]) -> Result<u8, &'static str> {
135 self.luma_hist(data)?;
136
137 let mut sum = 0.0;
138 for i in 1..=255 {
139 sum += (self.hist_bins[i] * i as u32) as f32;
140 }
141
142 let count = (self.image_x * self.image_y) as f32;
143 let mut sum_b = 0.0;
144 let mut w_b = 0.0;
145 let mut var_max = 0.0;
146 let mut threshold = 0;
147
148 for i in 0..=255 {
149 w_b += self.hist_bins[i] as f32;
150 if w_b == 0.0 { continue; }
151
152 let w_f = count - w_b;
153 if w_f == 0.0 { break; }
154
155 sum_b += (i as u32 * self.hist_bins[i]) as f32;
156
157 let m_b = sum_b / w_b;
158 let m_f = (sum - sum_b) / w_f;
159
160 let var_between = w_b * w_f * (m_b - m_f) * (m_b - m_f);
161
162 if var_between > var_max {
163 var_max = var_between;
164 threshold = i as u8;
165 }
166 }
167
168 Ok(threshold)
169 }
170
171 pub fn luma_hist_and_cdf_and_levels(&mut self, data: &[u8]) -> Result<(), &'static str> {
173 self.luma_hist_and_cdf(data)?;
174
175 let mut l = 0;
176 while l < 256 && self.cdf_bins[l] == 0 {
177 l += 1;
178 }
179 self.min = l as u8;
180
181 let max_cd = (self.image_x * self.image_y) as u32;
182 while l < 256 && self.cdf_bins[l] < max_cd {
183 l += 1;
184 }
185 self.max = l as u8;
186
187 Ok(())
188 }
189
190 pub fn luma_hist_and_box_filter_with_bias(&mut self, data: &[u8], box_size: i32, bias: i32) -> Result<(), &'static str> {
192 self.luma_hist(data)?;
193
194 let img_size = (self.image_x * self.image_y) as usize;
195 if self.image2.is_none() || self.image2.as_ref().unwrap().len() != img_size {
196 self.image2 = Some(vec![0; img_size]);
197 }
198 if self.image_temp_u16.is_none() || self.image_temp_u16.as_ref().unwrap().len() != img_size {
199 self.image_temp_u16 = Some(vec![0; img_size]);
200 }
201
202 let kernel_size_half = box_size / 2;
203 let image_temp_u16 = self.image_temp_u16.as_mut().unwrap();
204 let image2 = self.image2.as_mut().unwrap();
205
206 #[cfg(feature = "simd-image")]
208 {
209 #[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
210 {
211 if is_x86_feature_detected!("sse4.1") {
212 unsafe { box_filter_h_simd_x86(data, image_temp_u16, self.image_x, self.image_y, kernel_size_half); }
213 } else {
214 box_filter_h_scalar(data, image_temp_u16, self.image_x, self.image_y, kernel_size_half);
215 }
216 }
217 #[cfg(not(target_arch = "x86_64"))]
218 box_filter_h_scalar(data, image_temp_u16, self.image_x, self.image_y, kernel_size_half);
219 }
220 #[cfg(not(feature = "simd-image"))]
221 box_filter_h_scalar(data, image_temp_u16, self.image_x, self.image_y, kernel_size_half);
222
223 #[cfg(feature = "simd-image")]
225 {
226 #[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
227 {
228 if is_x86_feature_detected!("sse4.1") {
229 unsafe { box_filter_v_simd_x86(image_temp_u16, image2, self.image_x, self.image_y, kernel_size_half, bias); }
230 } else {
231 box_filter_v_scalar(image_temp_u16, image2, self.image_x, self.image_y, kernel_size_half, bias);
232 }
233 }
234 #[cfg(not(target_arch = "x86_64"))]
235 box_filter_v_scalar(image_temp_u16, image2, self.image_x, self.image_y, kernel_size_half, bias);
236 }
237 #[cfg(not(feature = "simd-image"))]
238 box_filter_v_scalar(image_temp_u16, image2, self.image_x, self.image_y, kernel_size_half, bias);
239
240 Ok(())
241 }
242}
243
244pub fn box_filter_h_scalar(data: &[u8], image_temp_u16: &mut [u16], width: i32, height: i32, half: i32) {
245 for j in 0..height {
246 let row_offset = (j * width) as usize;
247 for i in 0..width {
248 let mut val = 0u16;
249 for kernel_i in -half..=half {
250 let ii = i + kernel_i;
251 if ii >= 0 && ii < width {
252 val += data[row_offset + ii as usize] as u16;
253 }
254 }
255 image_temp_u16[row_offset + i as usize] = val;
256 }
257 }
258}
259
260pub fn box_filter_v_scalar(image_temp_u16: &[u16], image2: &mut [u8], width: i32, height: i32, half: i32, bias: i32) {
261 for i in 0..width {
262 for j in 0..height {
263 let mut val = 0u32;
264
265 let v_start = if j - half < 0 { 0 } else { j - half };
266 let v_end = if j + half >= height { height - 1 } else { j + half };
267 let h_start = if i - half < 0 { 0 } else { i - half };
268 let h_end = if i + half >= width { width - 1 } else { i + half };
269
270 let count = ((v_end - v_start + 1) * (h_end - h_start + 1)) as u32;
271
272 for jj in v_start..=v_end {
273 val += image_temp_u16[(jj * width + i) as usize] as u32;
274 }
275
276 let mut pixel = (val / count) as i32;
277 if bias != 0 {
278 pixel += bias;
279 }
280
281 image2[(j * width + i) as usize] = pixel.clamp(0, 255) as u8;
282 }
283 }
284}
285
286pub fn rgba_to_gray(rgba: &[u8]) -> Vec<u8> {
288 #[cfg(feature = "simd-image")]
289 {
290 #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
291 {
292 return unsafe { rgba_to_gray_simd_wasm(rgba) };
293 }
294 #[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
295 {
296 if is_x86_feature_detected!("sse4.1") {
297 return unsafe { rgba_to_gray_simd_x86(rgba) };
298 }
299 }
300 }
301
302 #[cfg(not(any(
303 all(target_arch = "wasm32", target_feature = "simd128"),
304 all(target_arch = "x86_64", target_feature = "sse4.1")
305 )))]
306 rgba_to_gray_scalar(rgba)
307}
308
309pub fn rgba_to_gray_scalar(rgba: &[u8]) -> Vec<u8> {
310 let mut gray = Vec::with_capacity(rgba.len() / 4);
311 for chunk in rgba.chunks_exact(4) {
312 let r = chunk[0] as f32;
313 let g = chunk[1] as f32;
314 let b = chunk[2] as f32;
315 gray.push((0.2989 * r + 0.5870 * g + 0.1140 * b + 0.5) as u8);
316 }
317 gray
318}
319
320pub fn rgb_to_gray(rgb: &[u8]) -> Vec<u8> {
322 #[cfg(feature = "simd-image")]
323 {
324 }
327
328 rgb_to_gray_scalar(rgb)
329}
330
331fn rgb_to_gray_scalar(rgb: &[u8]) -> Vec<u8> {
332 let mut gray = Vec::with_capacity(rgb.len() / 3);
333 for chunk in rgb.chunks_exact(3) {
334 let r = chunk[0] as f32;
335 let g = chunk[1] as f32;
336 let b = chunk[2] as f32;
337 gray.push((0.2989 * r + 0.5870 * g + 0.1140 * b + 0.5) as u8);
338 }
339 gray
340}
341
342#[cfg(all(feature = "simd-image", target_arch = "wasm32", target_feature = "simd128"))]
343#[target_feature(enable = "simd128")]
344unsafe fn rgba_to_gray_simd_wasm(rgba: &[u8]) -> Vec<u8> {
345 use std::arch::wasm32::*;
346
347 let mut gray = Vec::with_capacity(rgba.len() / 4);
348 let chunks_len = rgba.len() / 16;
349
350 let coeffs = i16x8(77, 151, 28, 0, 77, 151, 28, 0);
352
353 let mut rgba_ptr = rgba.as_ptr();
354
355 for _ in 0..chunks_len {
356 let v = v128_load(rgba_ptr as *const v128);
357
358 let low = u16x8_extend_low_u8x16(v);
359 let high = u16x8_extend_high_u8x16(v);
360
361 let dot_low = i32x4_dot_i16x8(low, coeffs);
362 let dot_high = i32x4_dot_i16x8(high, coeffs);
363
364 let sum_low = i32x4_add(dot_low, i32x4_shuffle::<1, 1, 3, 3>(dot_low, dot_low));
367 let sum_high = i32x4_add(dot_high, i32x4_shuffle::<1, 1, 3, 3>(dot_high, dot_high));
368
369 let round_v = i32x4_splat(128);
371 let res_low = u32x4_shr(i32x4_add(sum_low, round_v), 8);
372 let res_high = u32x4_shr(i32x4_add(sum_high, round_v), 8);
373
374 let res = i32x4_shuffle::<0, 2, 4, 6>(res_low, res_high);
376
377 let mut out = [0u32; 4];
378 v128_store(out.as_mut_ptr() as *mut v128, res);
379 gray.push(out[0] as u8);
380 gray.push(out[1] as u8);
381 gray.push(out[2] as u8);
382 gray.push(out[3] as u8);
383
384 rgba_ptr = rgba_ptr.add(16);
385 }
386
387 let rem_start = chunks_len * 4;
388 for chunk in rgba.chunks_exact(4).skip(rem_start) {
389 let r = chunk[0] as i32;
390 let g = chunk[1] as i32;
391 let b = chunk[2] as i32;
392 gray.push(((r * 77 + g * 151 + b * 28 + 128) >> 8) as u8);
393 }
394
395 gray
396}
397
398#[cfg(all(feature = "simd-image", target_arch = "x86_64", target_feature = "sse4.1"))]
399#[target_feature(enable = "sse4.1")]
400pub unsafe fn rgba_to_gray_simd_x86(rgba: &[u8]) -> Vec<u8> {
401 use std::arch::x86_64::*;
402
403 let mut gray = Vec::with_capacity(rgba.len() / 4);
404 let chunks_len = rgba.len() / 16;
405
406 let coeffs = _mm_setr_epi16(77, 151, 28, 0, 77, 151, 28, 0);
408
409 let mut rgba_ptr = rgba.as_ptr();
410
411 for _ in 0..chunks_len {
412 let v = _mm_loadu_si128(rgba_ptr as *const __m128i);
413
414 let low = _mm_cvtepu8_epi16(v);
416 let high = _mm_cvtepu8_epi16(_mm_srli_si128(v, 8));
417
418 let dot_low = _mm_madd_epi16(low, coeffs);
420 let dot_high = _mm_madd_epi16(high, coeffs);
421
422 let sum_low = _mm_add_epi32(dot_low, _mm_shuffle_epi32(dot_low, 0xB1));
426 let sum_high = _mm_add_epi32(dot_high, _mm_shuffle_epi32(dot_high, 0xB1));
427
428 let luma_low = _mm_srli_epi32(sum_low, 8);
431 let luma_high = _mm_srli_epi32(sum_high, 8);
432
433 let res = _mm_shuffle_epi8(_mm_unpacklo_epi64(luma_low, luma_high),
435 _mm_setr_epi8(0, 4, 8, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1));
436
437 let mut out = [0u32; 4];
438 _mm_storeu_si128(out.as_mut_ptr() as *mut __m128i, res);
439
440 let bytes = out[0].to_le_bytes();
441 gray.push(bytes[0]);
442 gray.push(bytes[1]);
443 gray.push(bytes[2]);
444 gray.push(bytes[3]);
445
446 rgba_ptr = rgba_ptr.add(16);
447 }
448
449 let rem_start = chunks_len * 4;
450 for chunk in rgba.chunks_exact(4).skip(rem_start) {
451 let r = chunk[0] as i32;
452 let g = chunk[1] as i32;
453 let b = chunk[2] as i32;
454 gray.push(((r * 77 + g * 151 + b * 28 + 128) >> 8) as u8);
455 }
456
457 gray
458}
459
460#[cfg(all(feature = "simd-image", target_arch = "x86_64", target_feature = "sse4.1"))]
461#[target_feature(enable = "sse4.1")]
462pub unsafe fn box_filter_h_simd_x86(data: &[u8], image_temp_u16: &mut [u16], width: i32, height: i32, half: i32) {
463 box_filter_h_scalar(data, image_temp_u16, width, height, half);
465}
466
467#[cfg(all(feature = "simd-image", target_arch = "x86_64", target_feature = "sse4.1"))]
468#[target_feature(enable = "sse4.1")]
469pub unsafe fn box_filter_v_simd_x86(image_temp_u16: &[u16], image2: &mut [u8], width: i32, height: i32, half: i32, bias: i32) {
470 use std::arch::x86_64::*;
471
472 for i_base in (0..width as usize).step_by(8) {
473 let remaining = (width as usize).saturating_sub(i_base);
474 if remaining < 8 {
475 for i in i_base..width as usize {
476 for j in 0..height {
477 let mut val = 0u32;
478 let v_start = if j - half < 0 { 0 } else { j - half };
479 let v_end = if j + half >= height { height - 1 } else { j + half };
480 let h_start = if i as i32 - half < 0 { 0 } else { i as i32 - half };
481 let h_end = if i as i32 + half >= width { width - 1 } else { i as i32 + half };
482 let count = ((v_end - v_start + 1) * (h_end - h_start + 1)) as u32;
483
484 for jj in v_start..=v_end {
485 val += image_temp_u16[(jj * width + i as i32) as usize] as u32;
486 }
487 let mut pixel = (val / count) as i32;
488 if bias != 0 { pixel += bias; }
489 image2[(j * width + i as i32) as usize] = pixel.clamp(0, 255) as u8;
490 }
491 }
492 continue;
493 }
494
495 for j in 0..height {
496 let mut sum_low_v = _mm_setzero_si128();
497 let mut sum_high_v = _mm_setzero_si128();
498
499 let v_start = if j - half < 0 { 0 } else { j - half };
500 let v_end = if j + half >= height { height - 1 } else { j + half };
501
502 for jj in v_start..=v_end {
503 let row_ptr = image_temp_u16.as_ptr().add((jj * width) as usize + i_base);
504 let v = _mm_loadu_si128(row_ptr as *const __m128i);
505
506 sum_low_v = _mm_add_epi32(sum_low_v, _mm_cvtepu16_epi32(v));
507 sum_high_v = _mm_add_epi32(sum_high_v, _mm_cvtepu16_epi32(_mm_srli_si128(v, 8)));
508 }
509
510 for i_off in 0..8 {
511 let actual_i = i_base + i_off;
512 let h_start = if actual_i as i32 - half < 0 { 0 } else { actual_i as i32 - half };
513 let h_end = if actual_i as i32 + half >= width { width - 1 } else { actual_i as i32 + half };
514 let count = ((v_end - v_start + 1) * (h_end - h_start + 1)) as u32;
515
516 let mut res_arr = [0i32; 4];
517 let val = if i_off < 4 {
518 _mm_storeu_si128(res_arr.as_mut_ptr() as *mut __m128i, sum_low_v);
519 res_arr[i_off] as u32
520 } else {
521 _mm_storeu_si128(res_arr.as_mut_ptr() as *mut __m128i, sum_high_v);
522 res_arr[i_off - 4] as u32
523 };
524
525 let mut pixel = (val / count) as i32;
526 if bias != 0 { pixel += bias; }
527 image2[(j * width + actual_i as i32) as usize] = pixel.clamp(0, 255) as u8;
528 }
529 }
530 }
531}
532
533#[cfg(test)]
534mod tests {
535 use super::*;
536
537 #[test]
538 fn test_image_proc_hist() {
539 let mut ipi = ARImageProcInfo::new(4, 4);
540
541 let mut gradient = vec![];
542 for i in 0..16 {
543 gradient.push((i * 10) as u8);
544 }
545
546 ipi.luma_hist(&gradient).unwrap();
547 assert_eq!(ipi.hist_bins[0], 1);
548 assert_eq!(ipi.hist_bins[10], 1);
549 assert_eq!(ipi.hist_bins[150], 1);
550 assert_eq!(ipi.hist_bins[160], 0);
551
552 ipi.luma_hist_and_cdf(&gradient).unwrap();
553 assert_eq!(ipi.cdf_bins[150], 16);
554 assert_eq!(ipi.cdf_bins[0], 1);
555 }
556
557 #[test]
558 fn test_otsu_thresholding() {
559 let mut ipi = ARImageProcInfo::new(5, 5);
560 let mut img = vec![200; 25]; img[6] = 100; img[7] = 100; img[11] = 100; img[12] = 100;
563
564 let thresh = ipi.luma_hist_and_otsu(&img).unwrap();
565 assert!(thresh >= 100 && thresh < 200);
567 }
568}