webarkitlib_rs/
image_proc.rs

1/*
2 *  image_proc.rs
3 *  WebARKitLib-rs
4 *
5 *  This file is part of WebARKitLib-rs - WebARKit.
6 *
7 *  WebARKitLib-rs is free software: you can redistribute it and/or modify
8 *  it under the terms of the GNU Lesser General Public License as published by
9 *  the Free Software Foundation, either version 3 of the License, or
10 *  (at your option) any later version.
11 *
12 *  WebARKitLib-rs is distributed in the hope that it will be useful,
13 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 *  GNU Lesser General Public License for more details.
16 *
17 *  You should have received a copy of the GNU Lesser General Public License
18 *  along with WebARKitLib-rs.  If not, see <http://www.gnu.org/licenses/>.
19 *
20 *  As a special exception, the copyright holders of this library give you
21 *  permission to link this library with independent modules to produce an
22 *  executable, regardless of the license terms of these independent modules, and to
23 *  copy and distribute the resulting executable under terms of your choice,
24 *  provided that you also meet, for each linked independent module, the terms and
25 *  conditions of the license of that module. An independent module is a module
26 *  which is neither derived from nor based on this library. If you modify this
27 *  library, you may extend this exception to your version of the library, but you
28 *  are not obligated to do so. If you do not wish to do so, delete this exception
29 *  statement from your version.
30 *
31 *  Copyright 2026 WebARKit.
32 *
33 *  Author(s): Walter Perdan @kalwalt https://github.com/kalwalt
34 *
35 */
36
37//! Image Processing and Thresholding Utilities for WebARKitLib
38//! Translated from ARToolKit C headers (arImageProc.h, arImageProc.c)
39
40/// Structure holding settings for an instance of the image-processing pipeline
41#[derive(Debug, Clone)]
42pub struct ARImageProcInfo {
43    /// Width of image buffer
44    pub image_x: i32,
45    /// Height of image buffer
46    pub image_y: i32,
47    /// Extra buffer, allocated as required for filtering
48    pub image2: Option<Vec<u8>>,
49    /// Intermediate buffer for separable filters
50    pub image_temp_u16: Option<Vec<u16>>,
51    /// Luminance histogram
52    pub hist_bins: [u32; 256],
53    /// Luminance cumulative density function
54    pub cdf_bins: [u32; 256],
55    /// Minimum luminance
56    pub min: u8,
57    /// Maximum luminance
58    pub max: u8,
59}
60
61impl ARImageProcInfo {
62    /// Initialise image processing
63    pub fn new(xsize: i32, ysize: i32) -> Self {
64        Self {
65            image_x: xsize,
66            image_y: ysize,
67            image2: None,
68            image_temp_u16: None,
69            hist_bins: [0; 256],
70            cdf_bins: [0; 256],
71            min: 0,
72            max: 0,
73        }
74    }
75
76    /// Calculate luminance histogram
77    pub fn luma_hist(&mut self, data: &[u8]) -> Result<(), &'static str> {
78        let expected_size = (self.image_x * self.image_y) as usize;
79        if data.len() < expected_size {
80            return Err("Data array is smaller than the specified image dimensions");
81        }
82
83        self.hist_bins.fill(0);
84        for &pixel in data.iter().take(expected_size) {
85            self.hist_bins[pixel as usize] += 1;
86        }
87
88        Ok(())
89    }
90
91    /// Calculate image histogram and cumulative density function
92    pub fn luma_hist_and_cdf(&mut self, data: &[u8]) -> Result<(), &'static str> {
93        self.luma_hist(data)?;
94
95        let mut cdf_current = 0;
96        for i in 0..=255 {
97            self.cdf_bins[i] = cdf_current + self.hist_bins[i];
98            cdf_current = self.cdf_bins[i];
99        }
100
101        Ok(())
102    }
103
104    /// Calculate image histogram, cumulative density function, and luminance value at a given histogram percentile
105    pub fn luma_hist_and_cdf_and_percentile(&mut self, data: &[u8], percentile: f32) -> Result<u8, &'static str> {
106        if !(0.0..=1.0).contains(&percentile) {
107            return Err("Percentile must be between 0.0 and 1.0");
108        }
109
110        self.luma_hist_and_cdf(data)?;
111
112        let count = (self.image_x * self.image_y) as f32;
113        let required_cd = (count * percentile) as u32;
114
115        let mut i = 0;
116        while i < 256 && self.cdf_bins[i] < required_cd {
117            i += 1;
118        }
119
120        let mut j = i;
121        while j < 256 && self.cdf_bins[j] == required_cd {
122            j += 1;
123        }
124
125        Ok(((i + j) / 2) as u8)
126    }
127
128    /// Calculate image histogram, cumulative density function, and median luminance value
129    pub fn luma_hist_and_cdf_and_median(&mut self, data: &[u8]) -> Result<u8, &'static str> {
130        self.luma_hist_and_cdf_and_percentile(data, 0.5)
131    }
132
133    /// Calculate image histogram, and binarize image using Otsu's method
134    pub fn luma_hist_and_otsu(&mut self, data: &[u8]) -> Result<u8, &'static str> {
135        self.luma_hist(data)?;
136
137        let mut sum = 0.0;
138        for i in 1..=255 {
139            sum += (self.hist_bins[i] * i as u32) as f32;
140        }
141
142        let count = (self.image_x * self.image_y) as f32;
143        let mut sum_b = 0.0;
144        let mut w_b = 0.0;
145        let mut var_max = 0.0;
146        let mut threshold = 0;
147
148        for i in 0..=255 {
149            w_b += self.hist_bins[i] as f32;
150            if w_b == 0.0 { continue; }
151
152            let w_f = count - w_b;
153            if w_f == 0.0 { break; }
154
155            sum_b += (i as u32 * self.hist_bins[i]) as f32;
156
157            let m_b = sum_b / w_b;
158            let m_f = (sum - sum_b) / w_f;
159
160            let var_between = w_b * w_f * (m_b - m_f) * (m_b - m_f);
161
162            if var_between > var_max {
163                var_max = var_between;
164                threshold = i as u8;
165            }
166        }
167
168        Ok(threshold)
169    }
170
171    /// Calculate image histogram, cumulative density function, and minimum and maximum luminance values
172    pub fn luma_hist_and_cdf_and_levels(&mut self, data: &[u8]) -> Result<(), &'static str> {
173        self.luma_hist_and_cdf(data)?;
174
175        let mut l = 0;
176        while l < 256 && self.cdf_bins[l] == 0 {
177            l += 1;
178        }
179        self.min = l as u8;
180
181        let max_cd = (self.image_x * self.image_y) as u32;
182        while l < 256 && self.cdf_bins[l] < max_cd {
183            l += 1;
184        }
185        self.max = l as u8;
186
187        Ok(())
188    }
189
190    /// Calculate image histogram, and box filter image
191    pub fn luma_hist_and_box_filter_with_bias(&mut self, data: &[u8], box_size: i32, bias: i32) -> Result<(), &'static str> {
192        self.luma_hist(data)?;
193
194        let img_size = (self.image_x * self.image_y) as usize;
195        if self.image2.is_none() || self.image2.as_ref().unwrap().len() != img_size {
196            self.image2 = Some(vec![0; img_size]);
197        }
198        if self.image_temp_u16.is_none() || self.image_temp_u16.as_ref().unwrap().len() != img_size {
199            self.image_temp_u16 = Some(vec![0; img_size]);
200        }
201
202        let kernel_size_half = box_size / 2;
203        let image_temp_u16 = self.image_temp_u16.as_mut().unwrap();
204        let image2 = self.image2.as_mut().unwrap();
205
206        // Pass 1: Horizontal Sum
207        #[cfg(feature = "simd-image")]
208        {
209            #[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
210            {
211                if is_x86_feature_detected!("sse4.1") {
212                    unsafe { box_filter_h_simd_x86(data, image_temp_u16, self.image_x, self.image_y, kernel_size_half); }
213                } else {
214                    box_filter_h_scalar(data, image_temp_u16, self.image_x, self.image_y, kernel_size_half);
215                }
216            }
217            #[cfg(not(target_arch = "x86_64"))]
218            box_filter_h_scalar(data, image_temp_u16, self.image_x, self.image_y, kernel_size_half);
219        }
220        #[cfg(not(feature = "simd-image"))]
221        box_filter_h_scalar(data, image_temp_u16, self.image_x, self.image_y, kernel_size_half);
222
223        // Pass 2: Vertical Sum and Normalize
224        #[cfg(feature = "simd-image")]
225        {
226            #[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
227            {
228                if is_x86_feature_detected!("sse4.1") {
229                    unsafe { box_filter_v_simd_x86(image_temp_u16, image2, self.image_x, self.image_y, kernel_size_half, bias); }
230                } else {
231                    box_filter_v_scalar(image_temp_u16, image2, self.image_x, self.image_y, kernel_size_half, bias);
232                }
233            }
234            #[cfg(not(target_arch = "x86_64"))]
235            box_filter_v_scalar(image_temp_u16, image2, self.image_x, self.image_y, kernel_size_half, bias);
236        }
237        #[cfg(not(feature = "simd-image"))]
238        box_filter_v_scalar(image_temp_u16, image2, self.image_x, self.image_y, kernel_size_half, bias);
239
240        Ok(())
241    }
242}
243
244pub fn box_filter_h_scalar(data: &[u8], image_temp_u16: &mut [u16], width: i32, height: i32, half: i32) {
245    for j in 0..height {
246        let row_offset = (j * width) as usize;
247        for i in 0..width {
248            let mut val = 0u16;
249            for kernel_i in -half..=half {
250                let ii = i + kernel_i;
251                if ii >= 0 && ii < width {
252                    val += data[row_offset + ii as usize] as u16;
253                }
254            }
255            image_temp_u16[row_offset + i as usize] = val;
256        }
257    }
258}
259
260pub fn box_filter_v_scalar(image_temp_u16: &[u16], image2: &mut [u8], width: i32, height: i32, half: i32, bias: i32) {
261    for i in 0..width {
262        for j in 0..height {
263            let mut val = 0u32;
264            
265            let v_start = if j - half < 0 { 0 } else { j - half };
266            let v_end = if j + half >= height { height - 1 } else { j + half };
267            let h_start = if i - half < 0 { 0 } else { i - half };
268            let h_end = if i + half >= width { width - 1 } else { i + half };
269            
270            let count = ((v_end - v_start + 1) * (h_end - h_start + 1)) as u32;
271
272            for jj in v_start..=v_end {
273                val += image_temp_u16[(jj * width + i) as usize] as u32;
274            }
275
276            let mut pixel = (val / count) as i32;
277            if bias != 0 {
278                pixel += bias;
279            }
280            
281            image2[(j * width + i) as usize] = pixel.clamp(0, 255) as u8;
282        }
283    }
284}
285
286/// Convert an RGBA buffer to a grayscale (luma) buffer using BT.601 coefficients.
287pub fn rgba_to_gray(rgba: &[u8]) -> Vec<u8> {
288    #[cfg(feature = "simd-image")]
289    {
290        #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
291        {
292            return unsafe { rgba_to_gray_simd_wasm(rgba) };
293        }
294        #[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
295        {
296            if is_x86_feature_detected!("sse4.1") {
297                return unsafe { rgba_to_gray_simd_x86(rgba) };
298            }
299        }
300    }
301    
302    #[cfg(not(any(
303        all(target_arch = "wasm32", target_feature = "simd128"),
304        all(target_arch = "x86_64", target_feature = "sse4.1")
305    )))]
306    rgba_to_gray_scalar(rgba)
307}
308
309pub fn rgba_to_gray_scalar(rgba: &[u8]) -> Vec<u8> {
310    let mut gray = Vec::with_capacity(rgba.len() / 4);
311    for chunk in rgba.chunks_exact(4) {
312        let r = chunk[0] as f32;
313        let g = chunk[1] as f32;
314        let b = chunk[2] as f32;
315        gray.push((0.2989 * r + 0.5870 * g + 0.1140 * b + 0.5) as u8);
316    }
317    gray
318}
319
320/// Convert an RGB buffer to a grayscale (luma) buffer using BT.601 coefficients.
321pub fn rgb_to_gray(rgb: &[u8]) -> Vec<u8> {
322    #[cfg(feature = "simd-image")]
323    {
324        // SIMD for RGB (3-byte) is more complex due to alignment.
325        // For now, we only provide scalar or focus on RGBA which is common in WASM.
326    }
327    
328    rgb_to_gray_scalar(rgb)
329}
330
331fn rgb_to_gray_scalar(rgb: &[u8]) -> Vec<u8> {
332    let mut gray = Vec::with_capacity(rgb.len() / 3);
333    for chunk in rgb.chunks_exact(3) {
334        let r = chunk[0] as f32;
335        let g = chunk[1] as f32;
336        let b = chunk[2] as f32;
337        gray.push((0.2989 * r + 0.5870 * g + 0.1140 * b + 0.5) as u8);
338    }
339    gray
340}
341
342#[cfg(all(feature = "simd-image", target_arch = "wasm32", target_feature = "simd128"))]
343#[target_feature(enable = "simd128")]
344unsafe fn rgba_to_gray_simd_wasm(rgba: &[u8]) -> Vec<u8> {
345    use std::arch::wasm32::*;
346    
347    let mut gray = Vec::with_capacity(rgba.len() / 4);
348    let chunks_len = rgba.len() / 16;
349    
350    // Fixed point coefficients for (77R + 151G + 28B) >> 8
351    let coeffs = i16x8(77, 151, 28, 0, 77, 151, 28, 0);
352    
353    let mut rgba_ptr = rgba.as_ptr();
354    
355    for _ in 0..chunks_len {
356        let v = v128_load(rgba_ptr as *const v128);
357        
358        let low = u16x8_extend_low_u8x16(v);
359        let high = u16x8_extend_high_u8x16(v);
360        
361        let dot_low = i32x4_dot_i16x8(low, coeffs);
362        let dot_high = i32x4_dot_i16x8(high, coeffs);
363        
364        // sum = [R0*77+G0*151, B0*28, R1*77+G1*151, B1*28]
365        // We need to add (0,1) and (2,3)
366        let sum_low = i32x4_add(dot_low, i32x4_shuffle::<1, 1, 3, 3>(dot_low, dot_low));
367        let sum_high = i32x4_add(dot_high, i32x4_shuffle::<1, 1, 3, 3>(dot_high, dot_high));
368        
369        // Add 128 for rounding: (sum + 128) >> 8
370        let round_v = i32x4_splat(128);
371        let res_low = u32x4_shr(i32x4_add(sum_low, round_v), 8);
372        let res_high = u32x4_shr(i32x4_add(sum_high, round_v), 8);
373        
374        // Pack [L0, L1, L2, L3]
375        let res = i32x4_shuffle::<0, 2, 4, 6>(res_low, res_high);
376        
377        let mut out = [0u32; 4];
378        v128_store(out.as_mut_ptr() as *mut v128, res);
379        gray.push(out[0] as u8);
380        gray.push(out[1] as u8);
381        gray.push(out[2] as u8);
382        gray.push(out[3] as u8);
383
384        rgba_ptr = rgba_ptr.add(16);
385    }
386    
387    let rem_start = chunks_len * 4;
388    for chunk in rgba.chunks_exact(4).skip(rem_start) {
389        let r = chunk[0] as i32;
390        let g = chunk[1] as i32;
391        let b = chunk[2] as i32;
392        gray.push(((r * 77 + g * 151 + b * 28 + 128) >> 8) as u8);
393    }
394    
395    gray
396}
397
398#[cfg(all(feature = "simd-image", target_arch = "x86_64", target_feature = "sse4.1"))]
399#[target_feature(enable = "sse4.1")]
400pub unsafe fn rgba_to_gray_simd_x86(rgba: &[u8]) -> Vec<u8> {
401    use std::arch::x86_64::*;
402    
403    let mut gray = Vec::with_capacity(rgba.len() / 4);
404    let chunks_len = rgba.len() / 16;
405    
406    // Fixed point coefficients: R=77, G=151, B=28
407    let coeffs = _mm_setr_epi16(77, 151, 28, 0, 77, 151, 28, 0);
408    
409    let mut rgba_ptr = rgba.as_ptr();
410    
411    for _ in 0..chunks_len {
412        let v = _mm_loadu_si128(rgba_ptr as *const __m128i);
413        
414        // Expand u8 to i16
415        let low = _mm_cvtepu8_epi16(v);
416        let high = _mm_cvtepu8_epi16(_mm_srli_si128(v, 8));
417        
418        // madd: [R0*77 + G0*151, B0*28 + 0, R1*77 + G1*151, B1*28 + 0]
419        let dot_low = _mm_madd_epi16(low, coeffs);
420        let dot_high = _mm_madd_epi16(high, coeffs);
421        
422        // sum pairs: [sum0, sum1, sum2, sum3]
423        // Actually, madd gives [p0, p1, p2, p3] where we want p0+p1 and p2+p3.
424        // Shuffle [p1, p0, p3, p2]: _MM_SHUFFLE(2, 3, 0, 1) = 10 11 00 01 = 0xB1.
425        let sum_low = _mm_add_epi32(dot_low, _mm_shuffle_epi32(dot_low, 0xB1));
426        let sum_high = _mm_add_epi32(dot_high, _mm_shuffle_epi32(dot_high, 0xB1));
427        
428        // Values are at indices (0, 2) in sum_low and sum_high.
429        // Shr 8 and pack.
430        let luma_low = _mm_srli_epi32(sum_low, 8);
431        let luma_high = _mm_srli_epi32(sum_high, 8);
432        
433        // Pack into u8.
434        let res = _mm_shuffle_epi8(_mm_unpacklo_epi64(luma_low, luma_high), 
435                                  _mm_setr_epi8(0, 4, 8, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1));
436        
437        let mut out = [0u32; 4];
438        _mm_storeu_si128(out.as_mut_ptr() as *mut __m128i, res);
439        
440        let bytes = out[0].to_le_bytes();
441        gray.push(bytes[0]);
442        gray.push(bytes[1]);
443        gray.push(bytes[2]);
444        gray.push(bytes[3]);
445
446        rgba_ptr = rgba_ptr.add(16);
447    }
448    
449    let rem_start = chunks_len * 4;
450    for chunk in rgba.chunks_exact(4).skip(rem_start) {
451        let r = chunk[0] as i32;
452        let g = chunk[1] as i32;
453        let b = chunk[2] as i32;
454        gray.push(((r * 77 + g * 151 + b * 28 + 128) >> 8) as u8);
455    }
456    
457    gray
458}
459
460#[cfg(all(feature = "simd-image", target_arch = "x86_64", target_feature = "sse4.1"))]
461#[target_feature(enable = "sse4.1")]
462pub unsafe fn box_filter_h_simd_x86(data: &[u8], image_temp_u16: &mut [u16], width: i32, height: i32, half: i32) {
463    // Horizontal scalar for now (sliding window is faster but scalar is baseline)
464    box_filter_h_scalar(data, image_temp_u16, width, height, half);
465}
466
467#[cfg(all(feature = "simd-image", target_arch = "x86_64", target_feature = "sse4.1"))]
468#[target_feature(enable = "sse4.1")]
469pub unsafe fn box_filter_v_simd_x86(image_temp_u16: &[u16], image2: &mut [u8], width: i32, height: i32, half: i32, bias: i32) {
470    use std::arch::x86_64::*;
471    
472    for i_base in (0..width as usize).step_by(8) {
473        let remaining = (width as usize).saturating_sub(i_base);
474        if remaining < 8 { 
475            for i in i_base..width as usize {
476                for j in 0..height {
477                    let mut val = 0u32;
478                    let v_start = if j - half < 0 { 0 } else { j - half };
479                    let v_end = if j + half >= height { height - 1 } else { j + half };
480                    let h_start = if i as i32 - half < 0 { 0 } else { i as i32 - half };
481                    let h_end = if i as i32 + half >= width { width - 1 } else { i as i32 + half };
482                    let count = ((v_end - v_start + 1) * (h_end - h_start + 1)) as u32;
483
484                    for jj in v_start..=v_end {
485                        val += image_temp_u16[(jj * width + i as i32) as usize] as u32;
486                    }
487                    let mut pixel = (val / count) as i32;
488                    if bias != 0 { pixel += bias; }
489                    image2[(j * width + i as i32) as usize] = pixel.clamp(0, 255) as u8;
490                }
491            }
492            continue;
493        }
494
495        for j in 0..height {
496            let mut sum_low_v = _mm_setzero_si128();
497            let mut sum_high_v = _mm_setzero_si128();
498            
499            let v_start = if j - half < 0 { 0 } else { j - half };
500            let v_end = if j + half >= height { height - 1 } else { j + half };
501            
502            for jj in v_start..=v_end {
503                let row_ptr = image_temp_u16.as_ptr().add((jj * width) as usize + i_base);
504                let v = _mm_loadu_si128(row_ptr as *const __m128i); 
505                
506                sum_low_v = _mm_add_epi32(sum_low_v, _mm_cvtepu16_epi32(v));
507                sum_high_v = _mm_add_epi32(sum_high_v, _mm_cvtepu16_epi32(_mm_srli_si128(v, 8)));
508            }
509            
510            for i_off in 0..8 {
511                let actual_i = i_base + i_off;
512                let h_start = if actual_i as i32 - half < 0 { 0 } else { actual_i as i32 - half };
513                let h_end = if actual_i as i32 + half >= width { width - 1 } else { actual_i as i32 + half };
514                let count = ((v_end - v_start + 1) * (h_end - h_start + 1)) as u32;
515
516                let mut res_arr = [0i32; 4];
517                let val = if i_off < 4 {
518                    _mm_storeu_si128(res_arr.as_mut_ptr() as *mut __m128i, sum_low_v);
519                    res_arr[i_off] as u32
520                } else {
521                    _mm_storeu_si128(res_arr.as_mut_ptr() as *mut __m128i, sum_high_v);
522                    res_arr[i_off - 4] as u32
523                };
524
525                let mut pixel = (val / count) as i32;
526                if bias != 0 { pixel += bias; }
527                image2[(j * width + actual_i as i32) as usize] = pixel.clamp(0, 255) as u8;
528            }
529        }
530    }
531}
532
533#[cfg(test)]
534mod tests {
535    use super::*;
536
537    #[test]
538    fn test_image_proc_hist() {
539        let mut ipi = ARImageProcInfo::new(4, 4);
540        
541        let mut gradient = vec![];
542        for i in 0..16 {
543            gradient.push((i * 10) as u8);
544        }
545
546        ipi.luma_hist(&gradient).unwrap();
547        assert_eq!(ipi.hist_bins[0], 1);
548        assert_eq!(ipi.hist_bins[10], 1);
549        assert_eq!(ipi.hist_bins[150], 1);
550        assert_eq!(ipi.hist_bins[160], 0);
551
552        ipi.luma_hist_and_cdf(&gradient).unwrap();
553        assert_eq!(ipi.cdf_bins[150], 16);
554        assert_eq!(ipi.cdf_bins[0], 1);
555    }
556
557    #[test]
558    fn test_otsu_thresholding() {
559        let mut ipi = ARImageProcInfo::new(5, 5);
560        let mut img = vec![200; 25]; // Peak at 200
561        img[6] = 100; img[7] = 100; // Peak at 100
562        img[11] = 100; img[12] = 100;
563        
564        let thresh = ipi.luma_hist_and_otsu(&img).unwrap();
565        // Since peaks are 100 and 200, threshold should be around 100.
566        assert!(thresh >= 100 && thresh < 200);
567    }
568}
webarkitlib_rs/image_proc.rs

webarkitlib_rs/
image_proc.rs