1#[derive(Debug, Clone)]
42pub struct ARImageProcInfo {
43 pub image_x: i32,
45 pub image_y: i32,
47 pub image2: Option<Vec<u8>>,
49 pub image_temp_u16: Option<Vec<u16>>,
51 pub hist_bins: [u32; 256],
53 pub cdf_bins: [u32; 256],
55 pub min: u8,
57 pub max: u8,
59}
60
61impl ARImageProcInfo {
62 pub fn new(xsize: i32, ysize: i32) -> Self {
64 Self {
65 image_x: xsize,
66 image_y: ysize,
67 image2: None,
68 image_temp_u16: None,
69 hist_bins: [0; 256],
70 cdf_bins: [0; 256],
71 min: 0,
72 max: 0,
73 }
74 }
75
76 pub fn luma_hist(&mut self, data: &[u8]) -> Result<(), &'static str> {
78 let expected_size = (self.image_x * self.image_y) as usize;
79 if data.len() < expected_size {
80 return Err("Data array is smaller than the specified image dimensions");
81 }
82
83 self.hist_bins.fill(0);
84 for &pixel in data.iter().take(expected_size) {
85 self.hist_bins[pixel as usize] += 1;
86 }
87
88 Ok(())
89 }
90
91 pub fn luma_hist_and_cdf(&mut self, data: &[u8]) -> Result<(), &'static str> {
93 self.luma_hist(data)?;
94
95 let mut cdf_current = 0;
96 for i in 0..=255 {
97 self.cdf_bins[i] = cdf_current + self.hist_bins[i];
98 cdf_current = self.cdf_bins[i];
99 }
100
101 Ok(())
102 }
103
104 pub fn luma_hist_and_cdf_and_percentile(&mut self, data: &[u8], percentile: f32) -> Result<u8, &'static str> {
106 if !(0.0..=1.0).contains(&percentile) {
107 return Err("Percentile must be between 0.0 and 1.0");
108 }
109
110 self.luma_hist_and_cdf(data)?;
111
112 let count = (self.image_x * self.image_y) as f32;
113 let required_cd = (count * percentile) as u32;
114
115 let mut i = 0;
116 while i < 256 && self.cdf_bins[i] < required_cd {
117 i += 1;
118 }
119
120 let mut j = i;
121 while j < 256 && self.cdf_bins[j] == required_cd {
122 j += 1;
123 }
124
125 Ok(((i + j) / 2) as u8)
126 }
127
128 pub fn luma_hist_and_cdf_and_median(&mut self, data: &[u8]) -> Result<u8, &'static str> {
130 self.luma_hist_and_cdf_and_percentile(data, 0.5)
131 }
132
133 pub fn luma_hist_and_otsu(&mut self, data: &[u8]) -> Result<u8, &'static str> {
135 self.luma_hist(data)?;
136
137 let mut sum = 0.0;
138 for i in 1..=255 {
139 sum += (self.hist_bins[i] * i as u32) as f32;
140 }
141
142 let count = (self.image_x * self.image_y) as f32;
143 let mut sum_b = 0.0;
144 let mut w_b = 0.0;
145 let mut var_max = 0.0;
146 let mut threshold = 0;
147
148 for i in 0..=255 {
149 w_b += self.hist_bins[i] as f32;
150 if w_b == 0.0 { continue; }
151
152 let w_f = count - w_b;
153 if w_f == 0.0 { break; }
154
155 sum_b += (i as u32 * self.hist_bins[i]) as f32;
156
157 let m_b = sum_b / w_b;
158 let m_f = (sum - sum_b) / w_f;
159
160 let var_between = w_b * w_f * (m_b - m_f) * (m_b - m_f);
161
162 if var_between > var_max {
163 var_max = var_between;
164 threshold = i as u8;
165 }
166 }
167
168 Ok(threshold)
169 }
170
171 pub fn luma_hist_and_cdf_and_levels(&mut self, data: &[u8]) -> Result<(), &'static str> {
173 self.luma_hist_and_cdf(data)?;
174
175 let mut l = 0;
176 while l < 256 && self.cdf_bins[l] == 0 {
177 l += 1;
178 }
179 self.min = l as u8;
180
181 let max_cd = (self.image_x * self.image_y) as u32;
182 while l < 256 && self.cdf_bins[l] < max_cd {
183 l += 1;
184 }
185 self.max = l as u8;
186
187 Ok(())
188 }
189
190 pub fn luma_hist_and_box_filter_with_bias(&mut self, data: &[u8], box_size: i32, bias: i32) -> Result<(), &'static str> {
192 self.luma_hist(data)?;
193
194 let img_size = (self.image_x * self.image_y) as usize;
195 if self.image2.is_none() || self.image2.as_ref().unwrap().len() != img_size {
196 self.image2 = Some(vec![0; img_size]);
197 }
198 if self.image_temp_u16.is_none() || self.image_temp_u16.as_ref().unwrap().len() != img_size {
199 self.image_temp_u16 = Some(vec![0; img_size]);
200 }
201
202 let kernel_size_half = box_size / 2;
203 let image_temp_u16 = self.image_temp_u16.as_mut().unwrap();
204 let image2 = self.image2.as_mut().unwrap();
205
206 #[cfg(feature = "simd-image")]
208 {
209 #[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
210 {
211 if is_x86_feature_detected!("sse4.1") {
212 unsafe { box_filter_h_simd_x86(data, image_temp_u16, self.image_x, self.image_y, kernel_size_half); }
213 } else {
214 box_filter_h_scalar(data, image_temp_u16, self.image_x, self.image_y, kernel_size_half);
215 }
216 }
217 #[cfg(not(target_arch = "x86_64"))]
218 box_filter_h_scalar(data, image_temp_u16, self.image_x, self.image_y, kernel_size_half);
219 }
220 #[cfg(not(feature = "simd-image"))]
221 box_filter_h_scalar(data, image_temp_u16, self.image_x, self.image_y, kernel_size_half);
222
223 #[cfg(feature = "simd-image")]
225 {
226 #[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
227 {
228 if is_x86_feature_detected!("sse4.1") {
229 unsafe { box_filter_v_simd_x86(image_temp_u16, image2, self.image_x, self.image_y, kernel_size_half, bias); }
230 } else {
231 box_filter_v_scalar(image_temp_u16, image2, self.image_x, self.image_y, kernel_size_half, bias);
232 }
233 }
234 #[cfg(not(target_arch = "x86_64"))]
235 box_filter_v_scalar(image_temp_u16, image2, self.image_x, self.image_y, kernel_size_half, bias);
236 }
237 #[cfg(not(feature = "simd-image"))]
238 box_filter_v_scalar(image_temp_u16, image2, self.image_x, self.image_y, kernel_size_half, bias);
239
240 Ok(())
241 }
242}
243
244pub fn box_filter_h_scalar(data: &[u8], image_temp_u16: &mut [u16], width: i32, height: i32, half: i32) {
245 for j in 0..height {
246 let row_offset = (j * width) as usize;
247 for i in 0..width {
248 let mut val = 0u16;
249 for kernel_i in -half..=half {
250 let ii = i + kernel_i;
251 if ii >= 0 && ii < width {
252 val += data[row_offset + ii as usize] as u16;
253 }
254 }
255 image_temp_u16[row_offset + i as usize] = val;
256 }
257 }
258}
259
260pub fn box_filter_v_scalar(image_temp_u16: &[u16], image2: &mut [u8], width: i32, height: i32, half: i32, bias: i32) {
261 for i in 0..width {
262 for j in 0..height {
263 let mut val = 0u32;
264
265 let v_start = if j - half < 0 { 0 } else { j - half };
266 let v_end = if j + half >= height { height - 1 } else { j + half };
267 let h_start = if i - half < 0 { 0 } else { i - half };
268 let h_end = if i + half >= width { width - 1 } else { i + half };
269
270 let count = ((v_end - v_start + 1) * (h_end - h_start + 1)) as u32;
271
272 for jj in v_start..=v_end {
273 val += image_temp_u16[(jj * width + i) as usize] as u32;
274 }
275
276 let mut pixel = (val / count) as i32;
277 if bias != 0 {
278 pixel += bias;
279 }
280
281 image2[(j * width + i) as usize] = pixel.clamp(0, 255) as u8;
282 }
283 }
284}
285
286pub fn rgba_to_gray(rgba: &[u8]) -> Vec<u8> {
288 #[cfg(feature = "simd-image")]
289 {
290 #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
291 {
292 return unsafe { rgba_to_gray_simd_wasm(rgba) };
293 }
294 #[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
295 {
296 if is_x86_feature_detected!("sse4.1") {
297 return unsafe { rgba_to_gray_simd_x86(rgba) };
298 }
299 }
300 }
301
302 rgba_to_gray_scalar(rgba)
303}
304
305pub fn rgba_to_gray_scalar(rgba: &[u8]) -> Vec<u8> {
306 let mut gray = Vec::with_capacity(rgba.len() / 4);
307 for chunk in rgba.chunks_exact(4) {
308 let r = chunk[0] as f32;
309 let g = chunk[1] as f32;
310 let b = chunk[2] as f32;
311 gray.push((0.2989 * r + 0.5870 * g + 0.1140 * b + 0.5) as u8);
312 }
313 gray
314}
315
316pub fn rgb_to_gray(rgb: &[u8]) -> Vec<u8> {
318 #[cfg(feature = "simd-image")]
319 {
320 }
323
324 rgb_to_gray_scalar(rgb)
325}
326
327fn rgb_to_gray_scalar(rgb: &[u8]) -> Vec<u8> {
328 let mut gray = Vec::with_capacity(rgb.len() / 3);
329 for chunk in rgb.chunks_exact(3) {
330 let r = chunk[0] as f32;
331 let g = chunk[1] as f32;
332 let b = chunk[2] as f32;
333 gray.push((0.2989 * r + 0.5870 * g + 0.1140 * b + 0.5) as u8);
334 }
335 gray
336}
337
338#[cfg(all(feature = "simd-image", target_arch = "wasm32", target_feature = "simd128"))]
339#[target_feature(enable = "simd128")]
340unsafe fn rgba_to_gray_simd_wasm(rgba: &[u8]) -> Vec<u8> {
341 use std::arch::wasm32::*;
342
343 let mut gray = Vec::with_capacity(rgba.len() / 4);
344 let chunks_len = rgba.len() / 16;
345
346 let coeffs = i16x8_make(77, 151, 28, 0, 77, 151, 28, 0);
348
349 let mut rgba_ptr = rgba.as_ptr();
350
351 for _ in 0..chunks_len {
352 let v = v128_load(rgba_ptr as *const v128);
353
354 let low = u16x8_extend_low_u8x16(v);
355 let high = u16x8_extend_high_u8x16(v);
356
357 let dot_low = i32x4_dot_i16x8(low, coeffs);
358 let dot_high = i32x4_dot_i16x8(high, coeffs);
359
360 let sum_low = i32x4_add(dot_low, i32x4_shuffle::<1, 1, 3, 3>(dot_low, dot_low));
363 let sum_high = i32x4_add(dot_high, i32x4_shuffle::<1, 1, 3, 3>(dot_high, dot_high));
364
365 let res_low = u32x4_shr(sum_low, 8);
366 let res_high = u32x4_shr(sum_high, 8);
367
368 let res = i32x4_shuffle::<0, 2, 4, 6>(res_low, res_high);
370
371 let mut out = [0u32; 4];
372 v128_store(out.as_mut_ptr() as *mut v128, res);
373 gray.push(out[0] as u8);
374 gray.push(out[1] as u8);
375 gray.push(out[2] as u8);
376 gray.push(out[3] as u8);
377
378 rgba_ptr = rgba_ptr.add(16);
379 }
380
381 let rem_start = chunks_len * 4;
382 for chunk in rgba.chunks_exact(4).skip(rem_start) {
383 let r = chunk[0] as i32;
384 let g = chunk[1] as i32;
385 let b = chunk[2] as i32;
386 gray.push(((r * 77 + g * 151 + b * 28 + 128) >> 8) as u8);
387 }
388
389 gray
390}
391
392#[cfg(all(feature = "simd-image", target_arch = "x86_64", target_feature = "sse4.1"))]
393#[target_feature(enable = "sse4.1")]
394pub unsafe fn rgba_to_gray_simd_x86(rgba: &[u8]) -> Vec<u8> {
395 use std::arch::x86_64::*;
396
397 let mut gray = Vec::with_capacity(rgba.len() / 4);
398 let chunks_len = rgba.len() / 16;
399
400 let coeffs = _mm_setr_epi16(77, 151, 28, 0, 77, 151, 28, 0);
402
403 let mut rgba_ptr = rgba.as_ptr();
404
405 for _ in 0..chunks_len {
406 let v = _mm_loadu_si128(rgba_ptr as *const __m128i);
407
408 let low = _mm_cvtepu8_epi16(v);
410 let high = _mm_cvtepu8_epi16(_mm_srli_si128(v, 8));
411
412 let dot_low = _mm_madd_epi16(low, coeffs);
414 let dot_high = _mm_madd_epi16(high, coeffs);
415
416 let sum_low = _mm_add_epi32(dot_low, _mm_shuffle_epi32(dot_low, 0xB1));
420 let sum_high = _mm_add_epi32(dot_high, _mm_shuffle_epi32(dot_high, 0xB1));
421
422 let luma_low = _mm_srli_epi32(sum_low, 8);
425 let luma_high = _mm_srli_epi32(sum_high, 8);
426
427 let res = _mm_shuffle_epi8(_mm_unpacklo_epi64(luma_low, luma_high),
429 _mm_setr_epi8(0, 4, 8, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1));
430
431 let mut out = [0u32; 4];
432 _mm_storeu_si128(out.as_mut_ptr() as *mut __m128i, res);
433
434 let bytes = out[0].to_le_bytes();
435 gray.push(bytes[0]);
436 gray.push(bytes[1]);
437 gray.push(bytes[2]);
438 gray.push(bytes[3]);
439
440 rgba_ptr = rgba_ptr.add(16);
441 }
442
443 let rem_start = chunks_len * 4;
444 for chunk in rgba.chunks_exact(4).skip(rem_start) {
445 let r = chunk[0] as i32;
446 let g = chunk[1] as i32;
447 let b = chunk[2] as i32;
448 gray.push(((r * 77 + g * 151 + b * 28 + 128) >> 8) as u8);
449 }
450
451 gray
452}
453
454#[cfg(all(feature = "simd-image", target_arch = "x86_64", target_feature = "sse4.1"))]
455#[target_feature(enable = "sse4.1")]
456pub unsafe fn box_filter_h_simd_x86(data: &[u8], image_temp_u16: &mut [u16], width: i32, height: i32, half: i32) {
457 box_filter_h_scalar(data, image_temp_u16, width, height, half);
459}
460
461#[cfg(all(feature = "simd-image", target_arch = "x86_64", target_feature = "sse4.1"))]
462#[target_feature(enable = "sse4.1")]
463pub unsafe fn box_filter_v_simd_x86(image_temp_u16: &[u16], image2: &mut [u8], width: i32, height: i32, half: i32, bias: i32) {
464 use std::arch::x86_64::*;
465
466 for i_base in (0..width as usize).step_by(8) {
467 let remaining = (width as usize).saturating_sub(i_base);
468 if remaining < 8 {
469 for i in i_base..width as usize {
470 for j in 0..height {
471 let mut val = 0u32;
472 let v_start = if j - half < 0 { 0 } else { j - half };
473 let v_end = if j + half >= height { height - 1 } else { j + half };
474 let h_start = if i as i32 - half < 0 { 0 } else { i as i32 - half };
475 let h_end = if i as i32 + half >= width { width - 1 } else { i as i32 + half };
476 let count = ((v_end - v_start + 1) * (h_end - h_start + 1)) as u32;
477
478 for jj in v_start..=v_end {
479 val += image_temp_u16[(jj * width + i as i32) as usize] as u32;
480 }
481 let mut pixel = (val / count) as i32;
482 if bias != 0 { pixel += bias; }
483 image2[(j * width + i as i32) as usize] = pixel.clamp(0, 255) as u8;
484 }
485 }
486 continue;
487 }
488
489 for j in 0..height {
490 let mut sum_low_v = _mm_setzero_si128();
491 let mut sum_high_v = _mm_setzero_si128();
492
493 let v_start = if j - half < 0 { 0 } else { j - half };
494 let v_end = if j + half >= height { height - 1 } else { j + half };
495
496 for jj in v_start..=v_end {
497 let row_ptr = image_temp_u16.as_ptr().add((jj * width) as usize + i_base);
498 let v = _mm_loadu_si128(row_ptr as *const __m128i);
499
500 sum_low_v = _mm_add_epi32(sum_low_v, _mm_cvtepu16_epi32(v));
501 sum_high_v = _mm_add_epi32(sum_high_v, _mm_cvtepu16_epi32(_mm_srli_si128(v, 8)));
502 }
503
504 for i_off in 0..8 {
505 let actual_i = i_base + i_off;
506 let h_start = if actual_i as i32 - half < 0 { 0 } else { actual_i as i32 - half };
507 let h_end = if actual_i as i32 + half >= width { width - 1 } else { actual_i as i32 + half };
508 let count = ((v_end - v_start + 1) * (h_end - h_start + 1)) as u32;
509
510 let mut res_arr = [0i32; 4];
511 let val = if i_off < 4 {
512 _mm_storeu_si128(res_arr.as_mut_ptr() as *mut __m128i, sum_low_v);
513 res_arr[i_off] as u32
514 } else {
515 _mm_storeu_si128(res_arr.as_mut_ptr() as *mut __m128i, sum_high_v);
516 res_arr[i_off - 4] as u32
517 };
518
519 let mut pixel = (val / count) as i32;
520 if bias != 0 { pixel += bias; }
521 image2[(j * width + actual_i as i32) as usize] = pixel.clamp(0, 255) as u8;
522 }
523 }
524 }
525}
526
527#[cfg(test)]
528mod tests {
529 use super::*;
530
531 #[test]
532 fn test_image_proc_hist() {
533 let mut ipi = ARImageProcInfo::new(4, 4);
534
535 let mut gradient = vec![];
536 for i in 0..16 {
537 gradient.push((i * 10) as u8);
538 }
539
540 ipi.luma_hist(&gradient).unwrap();
541 assert_eq!(ipi.hist_bins[0], 1);
542 assert_eq!(ipi.hist_bins[10], 1);
543 assert_eq!(ipi.hist_bins[150], 1);
544 assert_eq!(ipi.hist_bins[160], 0);
545
546 ipi.luma_hist_and_cdf(&gradient).unwrap();
547 assert_eq!(ipi.cdf_bins[150], 16);
548 assert_eq!(ipi.cdf_bins[0], 1);
549 }
550
551 #[test]
552 fn test_otsu_thresholding() {
553 let mut ipi = ARImageProcInfo::new(5, 5);
554 let mut img = vec![200; 25]; img[6] = 100; img[7] = 100; img[11] = 100; img[12] = 100;
557
558 let thresh = ipi.luma_hist_and_otsu(&img).unwrap();
559 assert!(thresh >= 100 && thresh < 200);
561 }
562}