Skip to main content

webarkitlib_rs/
image_proc.rs

1/*
2 *  image_proc.rs
3 *  WebARKitLib-rs
4 *
5 *  This file is part of WebARKitLib-rs - WebARKit.
6 *
7 *  WebARKitLib-rs is free software: you can redistribute it and/or modify
8 *  it under the terms of the GNU Lesser General Public License as published by
9 *  the Free Software Foundation, either version 3 of the License, or
10 *  (at your option) any later version.
11 *
12 *  WebARKitLib-rs is distributed in the hope that it will be useful,
13 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 *  GNU Lesser General Public License for more details.
16 *
17 *  You should have received a copy of the GNU Lesser General Public License
18 *  along with WebARKitLib-rs.  If not, see <http://www.gnu.org/licenses/>.
19 *
20 *  As a special exception, the copyright holders of this library give you
21 *  permission to link this library with independent modules to produce an
22 *  executable, regardless of the license terms of these independent modules, and to
23 *  copy and distribute the resulting executable under terms of your choice,
24 *  provided that you also meet, for each linked independent module, the terms and
25 *  conditions of the license of that module. An independent module is a module
26 *  which is neither derived from nor based on this library. If you modify this
27 *  library, you may extend this exception to your version of the library, but you
28 *  are not obligated to do so. If you do not wish to do so, delete this exception
29 *  statement from your version.
30 *
31 *  Copyright 2026 WebARKit.
32 *
33 *  Author(s): Walter Perdan @kalwalt https://github.com/kalwalt
34 *
35 */
36
37//! Image Processing and Thresholding Utilities for WebARKitLib
38//! Translated from ARToolKit C headers (arImageProc.h, arImageProc.c)
39
40/// Structure holding settings for an instance of the image-processing pipeline
41#[derive(Debug, Clone)]
42pub struct ARImageProcInfo {
43    /// Width of image buffer
44    pub image_x: i32,
45    /// Height of image buffer
46    pub image_y: i32,
47    /// Extra buffer, allocated as required for filtering
48    pub image2: Option<Vec<u8>>,
49    /// Intermediate buffer for separable filters
50    pub image_temp_u16: Option<Vec<u16>>,
51    /// Luminance histogram
52    pub hist_bins: [u32; 256],
53    /// Luminance cumulative density function
54    pub cdf_bins: [u32; 256],
55    /// Minimum luminance
56    pub min: u8,
57    /// Maximum luminance
58    pub max: u8,
59}
60
61impl ARImageProcInfo {
62    /// Initialise image processing
63    pub fn new(xsize: i32, ysize: i32) -> Self {
64        Self {
65            image_x: xsize,
66            image_y: ysize,
67            image2: None,
68            image_temp_u16: None,
69            hist_bins: [0; 256],
70            cdf_bins: [0; 256],
71            min: 0,
72            max: 0,
73        }
74    }
75
76    /// Calculate luminance histogram
77    pub fn luma_hist(&mut self, data: &[u8]) -> Result<(), &'static str> {
78        let expected_size = (self.image_x * self.image_y) as usize;
79        if data.len() < expected_size {
80            return Err("Data array is smaller than the specified image dimensions");
81        }
82
83        self.hist_bins.fill(0);
84        for &pixel in data.iter().take(expected_size) {
85            self.hist_bins[pixel as usize] += 1;
86        }
87
88        Ok(())
89    }
90
91    /// Calculate image histogram and cumulative density function
92    pub fn luma_hist_and_cdf(&mut self, data: &[u8]) -> Result<(), &'static str> {
93        self.luma_hist(data)?;
94
95        let mut cdf_current = 0;
96        for i in 0..=255 {
97            self.cdf_bins[i] = cdf_current + self.hist_bins[i];
98            cdf_current = self.cdf_bins[i];
99        }
100
101        Ok(())
102    }
103
104    /// Calculate image histogram, cumulative density function, and luminance value at a given histogram percentile
105    pub fn luma_hist_and_cdf_and_percentile(&mut self, data: &[u8], percentile: f32) -> Result<u8, &'static str> {
106        if !(0.0..=1.0).contains(&percentile) {
107            return Err("Percentile must be between 0.0 and 1.0");
108        }
109
110        self.luma_hist_and_cdf(data)?;
111
112        let count = (self.image_x * self.image_y) as f32;
113        let required_cd = (count * percentile) as u32;
114
115        let mut i = 0;
116        while i < 256 && self.cdf_bins[i] < required_cd {
117            i += 1;
118        }
119
120        let mut j = i;
121        while j < 256 && self.cdf_bins[j] == required_cd {
122            j += 1;
123        }
124
125        Ok(((i + j) / 2) as u8)
126    }
127
128    /// Calculate image histogram, cumulative density function, and median luminance value
129    pub fn luma_hist_and_cdf_and_median(&mut self, data: &[u8]) -> Result<u8, &'static str> {
130        self.luma_hist_and_cdf_and_percentile(data, 0.5)
131    }
132
133    /// Calculate image histogram, and binarize image using Otsu's method
134    pub fn luma_hist_and_otsu(&mut self, data: &[u8]) -> Result<u8, &'static str> {
135        self.luma_hist(data)?;
136
137        let mut sum = 0.0;
138        for i in 1..=255 {
139            sum += (self.hist_bins[i] * i as u32) as f32;
140        }
141
142        let count = (self.image_x * self.image_y) as f32;
143        let mut sum_b = 0.0;
144        let mut w_b = 0.0;
145        let mut var_max = 0.0;
146        let mut threshold = 0;
147
148        for i in 0..=255 {
149            w_b += self.hist_bins[i] as f32;
150            if w_b == 0.0 { continue; }
151
152            let w_f = count - w_b;
153            if w_f == 0.0 { break; }
154
155            sum_b += (i as u32 * self.hist_bins[i]) as f32;
156
157            let m_b = sum_b / w_b;
158            let m_f = (sum - sum_b) / w_f;
159
160            let var_between = w_b * w_f * (m_b - m_f) * (m_b - m_f);
161
162            if var_between > var_max {
163                var_max = var_between;
164                threshold = i as u8;
165            }
166        }
167
168        Ok(threshold)
169    }
170
171    /// Calculate image histogram, cumulative density function, and minimum and maximum luminance values
172    pub fn luma_hist_and_cdf_and_levels(&mut self, data: &[u8]) -> Result<(), &'static str> {
173        self.luma_hist_and_cdf(data)?;
174
175        let mut l = 0;
176        while l < 256 && self.cdf_bins[l] == 0 {
177            l += 1;
178        }
179        self.min = l as u8;
180
181        let max_cd = (self.image_x * self.image_y) as u32;
182        while l < 256 && self.cdf_bins[l] < max_cd {
183            l += 1;
184        }
185        self.max = l as u8;
186
187        Ok(())
188    }
189
190    /// Calculate image histogram, and box filter image
191    pub fn luma_hist_and_box_filter_with_bias(&mut self, data: &[u8], box_size: i32, bias: i32) -> Result<(), &'static str> {
192        self.luma_hist(data)?;
193
194        let img_size = (self.image_x * self.image_y) as usize;
195        if self.image2.is_none() || self.image2.as_ref().unwrap().len() != img_size {
196            self.image2 = Some(vec![0; img_size]);
197        }
198        if self.image_temp_u16.is_none() || self.image_temp_u16.as_ref().unwrap().len() != img_size {
199            self.image_temp_u16 = Some(vec![0; img_size]);
200        }
201
202        let kernel_size_half = box_size / 2;
203        let image_temp_u16 = self.image_temp_u16.as_mut().unwrap();
204        let image2 = self.image2.as_mut().unwrap();
205
206        // Pass 1: Horizontal Sum
207        #[cfg(feature = "simd-image")]
208        {
209            #[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
210            {
211                if is_x86_feature_detected!("sse4.1") {
212                    unsafe { box_filter_h_simd_x86(data, image_temp_u16, self.image_x, self.image_y, kernel_size_half); }
213                } else {
214                    box_filter_h_scalar(data, image_temp_u16, self.image_x, self.image_y, kernel_size_half);
215                }
216            }
217            #[cfg(not(target_arch = "x86_64"))]
218            box_filter_h_scalar(data, image_temp_u16, self.image_x, self.image_y, kernel_size_half);
219        }
220        #[cfg(not(feature = "simd-image"))]
221        box_filter_h_scalar(data, image_temp_u16, self.image_x, self.image_y, kernel_size_half);
222
223        // Pass 2: Vertical Sum and Normalize
224        #[cfg(feature = "simd-image")]
225        {
226            #[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
227            {
228                if is_x86_feature_detected!("sse4.1") {
229                    unsafe { box_filter_v_simd_x86(image_temp_u16, image2, self.image_x, self.image_y, kernel_size_half, bias); }
230                } else {
231                    box_filter_v_scalar(image_temp_u16, image2, self.image_x, self.image_y, kernel_size_half, bias);
232                }
233            }
234            #[cfg(not(target_arch = "x86_64"))]
235            box_filter_v_scalar(image_temp_u16, image2, self.image_x, self.image_y, kernel_size_half, bias);
236        }
237        #[cfg(not(feature = "simd-image"))]
238        box_filter_v_scalar(image_temp_u16, image2, self.image_x, self.image_y, kernel_size_half, bias);
239
240        Ok(())
241    }
242}
243
244pub fn box_filter_h_scalar(data: &[u8], image_temp_u16: &mut [u16], width: i32, height: i32, half: i32) {
245    for j in 0..height {
246        let row_offset = (j * width) as usize;
247        for i in 0..width {
248            let mut val = 0u16;
249            for kernel_i in -half..=half {
250                let ii = i + kernel_i;
251                if ii >= 0 && ii < width {
252                    val += data[row_offset + ii as usize] as u16;
253                }
254            }
255            image_temp_u16[row_offset + i as usize] = val;
256        }
257    }
258}
259
260pub fn box_filter_v_scalar(image_temp_u16: &[u16], image2: &mut [u8], width: i32, height: i32, half: i32, bias: i32) {
261    for i in 0..width {
262        for j in 0..height {
263            let mut val = 0u32;
264            
265            let v_start = if j - half < 0 { 0 } else { j - half };
266            let v_end = if j + half >= height { height - 1 } else { j + half };
267            let h_start = if i - half < 0 { 0 } else { i - half };
268            let h_end = if i + half >= width { width - 1 } else { i + half };
269            
270            let count = ((v_end - v_start + 1) * (h_end - h_start + 1)) as u32;
271
272            for jj in v_start..=v_end {
273                val += image_temp_u16[(jj * width + i) as usize] as u32;
274            }
275
276            let mut pixel = (val / count) as i32;
277            if bias != 0 {
278                pixel += bias;
279            }
280            
281            image2[(j * width + i) as usize] = pixel.clamp(0, 255) as u8;
282        }
283    }
284}
285
286/// Convert an RGBA buffer to a grayscale (luma) buffer using BT.601 coefficients.
287pub fn rgba_to_gray(rgba: &[u8]) -> Vec<u8> {
288    #[cfg(feature = "simd-image")]
289    {
290        #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
291        {
292            return unsafe { rgba_to_gray_simd_wasm(rgba) };
293        }
294        #[cfg(all(target_arch = "x86_64", target_feature = "sse4.1"))]
295        {
296            if is_x86_feature_detected!("sse4.1") {
297                return unsafe { rgba_to_gray_simd_x86(rgba) };
298            }
299        }
300    }
301    
302    rgba_to_gray_scalar(rgba)
303}
304
305pub fn rgba_to_gray_scalar(rgba: &[u8]) -> Vec<u8> {
306    let mut gray = Vec::with_capacity(rgba.len() / 4);
307    for chunk in rgba.chunks_exact(4) {
308        let r = chunk[0] as f32;
309        let g = chunk[1] as f32;
310        let b = chunk[2] as f32;
311        gray.push((0.2989 * r + 0.5870 * g + 0.1140 * b + 0.5) as u8);
312    }
313    gray
314}
315
316/// Convert an RGB buffer to a grayscale (luma) buffer using BT.601 coefficients.
317pub fn rgb_to_gray(rgb: &[u8]) -> Vec<u8> {
318    #[cfg(feature = "simd-image")]
319    {
320        // SIMD for RGB (3-byte) is more complex due to alignment.
321        // For now, we only provide scalar or focus on RGBA which is common in WASM.
322    }
323    
324    rgb_to_gray_scalar(rgb)
325}
326
327fn rgb_to_gray_scalar(rgb: &[u8]) -> Vec<u8> {
328    let mut gray = Vec::with_capacity(rgb.len() / 3);
329    for chunk in rgb.chunks_exact(3) {
330        let r = chunk[0] as f32;
331        let g = chunk[1] as f32;
332        let b = chunk[2] as f32;
333        gray.push((0.2989 * r + 0.5870 * g + 0.1140 * b + 0.5) as u8);
334    }
335    gray
336}
337
338#[cfg(all(feature = "simd-image", target_arch = "wasm32", target_feature = "simd128"))]
339#[target_feature(enable = "simd128")]
340unsafe fn rgba_to_gray_simd_wasm(rgba: &[u8]) -> Vec<u8> {
341    use std::arch::wasm32::*;
342    
343    let mut gray = Vec::with_capacity(rgba.len() / 4);
344    let chunks_len = rgba.len() / 16;
345    
346    // Fixed point coefficients for (77R + 151G + 28B) >> 8
347    let coeffs = i16x8(77, 151, 28, 0, 77, 151, 28, 0);
348    
349    let mut rgba_ptr = rgba.as_ptr();
350    
351    for _ in 0..chunks_len {
352        let v = v128_load(rgba_ptr as *const v128);
353        
354        let low = u16x8_extend_low_u8x16(v);
355        let high = u16x8_extend_high_u8x16(v);
356        
357        let dot_low = i32x4_dot_i16x8(low, coeffs);
358        let dot_high = i32x4_dot_i16x8(high, coeffs);
359        
360        // sum = [R0*77+G0*151, B0*28, R1*77+G1*151, B1*28]
361        // We need to add (0,1) and (2,3)
362        let sum_low = i32x4_add(dot_low, i32x4_shuffle::<1, 1, 3, 3>(dot_low, dot_low));
363        let sum_high = i32x4_add(dot_high, i32x4_shuffle::<1, 1, 3, 3>(dot_high, dot_high));
364        
365        // Add 128 for rounding: (sum + 128) >> 8
366        let round_v = i32x4_splat(128);
367        let res_low = u32x4_shr(i32x4_add(sum_low, round_v), 8);
368        let res_high = u32x4_shr(i32x4_add(sum_high, round_v), 8);
369        
370        // Pack [L0, L1, L2, L3]
371        let res = i32x4_shuffle::<0, 2, 4, 6>(res_low, res_high);
372        
373        let mut out = [0u32; 4];
374        v128_store(out.as_mut_ptr() as *mut v128, res);
375        gray.push(out[0] as u8);
376        gray.push(out[1] as u8);
377        gray.push(out[2] as u8);
378        gray.push(out[3] as u8);
379
380        rgba_ptr = rgba_ptr.add(16);
381    }
382    
383    let rem_start = chunks_len * 4;
384    for chunk in rgba.chunks_exact(4).skip(rem_start) {
385        let r = chunk[0] as i32;
386        let g = chunk[1] as i32;
387        let b = chunk[2] as i32;
388        gray.push(((r * 77 + g * 151 + b * 28 + 128) >> 8) as u8);
389    }
390    
391    gray
392}
393
394/// Converts an RGBA image buffer to a grayscale (Luminance) buffer using SSE4.1 intrinsics.
395///
396/// This function processes 16 bytes (4 pixels) at a time.
397/// 
398/// # Safety
399/// This function is unsafe because it uses SIMD intrinsics and raw pointer arithmetic.
400/// The caller must ensure that the target CPU supports SSE4.1.
401#[cfg(all(feature = "simd-image", target_arch = "x86_64", target_feature = "sse4.1"))]
402#[target_feature(enable = "sse4.1")]
403pub unsafe fn rgba_to_gray_simd_x86(rgba: &[u8]) -> Vec<u8> {
404    use std::arch::x86_64::*;
405    
406    let mut gray = Vec::with_capacity(rgba.len() / 4);
407    let chunks_len = rgba.len() / 16;
408    
409    // Fixed point coefficients: R=77, G=151, B=28
410    let coeffs = _mm_setr_epi16(77, 151, 28, 0, 77, 151, 28, 0);
411    
412    let mut rgba_ptr = rgba.as_ptr();
413    
414    for _ in 0..chunks_len {
415        let v = _mm_loadu_si128(rgba_ptr as *const __m128i);
416        
417        // Expand u8 to i16
418        let low = _mm_cvtepu8_epi16(v);
419        let high = _mm_cvtepu8_epi16(_mm_srli_si128(v, 8));
420        
421        // madd: [R0*77 + G0*151, B0*28 + 0, R1*77 + G1*151, B1*28 + 0]
422        let dot_low = _mm_madd_epi16(low, coeffs);
423        let dot_high = _mm_madd_epi16(high, coeffs);
424        
425        // sum pairs: [sum0, sum1, sum2, sum3]
426        // Actually, madd gives [p0, p1, p2, p3] where we want p0+p1 and p2+p3.
427        // Shuffle [p1, p0, p3, p2]: _MM_SHUFFLE(2, 3, 0, 1) = 10 11 00 01 = 0xB1.
428        let sum_low = _mm_add_epi32(dot_low, _mm_shuffle_epi32(dot_low, 0xB1));
429        let sum_high = _mm_add_epi32(dot_high, _mm_shuffle_epi32(dot_high, 0xB1));
430        
431        // Values are at indices (0, 2) in sum_low and sum_high.
432        // Shr 8 and pack.
433        let luma_low = _mm_srli_epi32(sum_low, 8);
434        let luma_high = _mm_srli_epi32(sum_high, 8);
435        
436        // Pack into u8.
437        let res = _mm_shuffle_epi8(_mm_unpacklo_epi64(luma_low, luma_high), 
438                                  _mm_setr_epi8(0, 4, 8, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1));
439        
440        let mut out = [0u32; 4];
441        _mm_storeu_si128(out.as_mut_ptr() as *mut __m128i, res);
442        
443        let bytes = out[0].to_le_bytes();
444        gray.push(bytes[0]);
445        gray.push(bytes[1]);
446        gray.push(bytes[2]);
447        gray.push(bytes[3]);
448
449        rgba_ptr = rgba_ptr.add(16);
450    }
451    
452    let rem_start = chunks_len * 4;
453    for chunk in rgba.chunks_exact(4).skip(rem_start) {
454        let r = chunk[0] as i32;
455        let g = chunk[1] as i32;
456        let b = chunk[2] as i32;
457        gray.push(((r * 77 + g * 151 + b * 28 + 128) >> 8) as u8);
458    }
459    
460    gray
461}
462
463/// Horizontal box filter implementation using SSE4.1 intrinsics.
464///
465/// Currently falls back to scalar implementation but is reserved for future SIMD optimization.
466///
467/// # Safety
468/// This function is unsafe because it is intended to use SIMD intrinsics.
469#[cfg(all(feature = "simd-image", target_arch = "x86_64", target_feature = "sse4.1"))]
470#[target_feature(enable = "sse4.1")]
471pub unsafe fn box_filter_h_simd_x86(data: &[u8], image_temp_u16: &mut [u16], width: i32, height: i32, half: i32) {
472    // Horizontal scalar for now (sliding window is faster but scalar is baseline)
473    box_filter_h_scalar(data, image_temp_u16, width, height, half);
474}
475
476/// Vertical box filter implementation using SSE4.1 intrinsics.
477///
478/// Processes 8 columns at a time using 128-bit registers.
479///
480/// # Safety
481/// This function is unsafe because it uses SIMD intrinsics and raw pointer arithmetic.
482/// The caller must ensure that the target CPU supports SSE4.1.
483#[cfg(all(feature = "simd-image", target_arch = "x86_64", target_feature = "sse4.1"))]
484#[target_feature(enable = "sse4.1")]
485pub unsafe fn box_filter_v_simd_x86(image_temp_u16: &[u16], image2: &mut [u8], width: i32, height: i32, half: i32, bias: i32) {
486    use std::arch::x86_64::*;
487    
488    for i_base in (0..width as usize).step_by(8) {
489        let remaining = (width as usize).saturating_sub(i_base);
490        if remaining < 8 { 
491            for i in i_base..width as usize {
492                for j in 0..height {
493                    let mut val = 0u32;
494                    let v_start = if j - half < 0 { 0 } else { j - half };
495                    let v_end = if j + half >= height { height - 1 } else { j + half };
496                    let h_start = if i as i32 - half < 0 { 0 } else { i as i32 - half };
497                    let h_end = if i as i32 + half >= width { width - 1 } else { i as i32 + half };
498                    let count = ((v_end - v_start + 1) * (h_end - h_start + 1)) as u32;
499
500                    for jj in v_start..=v_end {
501                        val += image_temp_u16[(jj * width + i as i32) as usize] as u32;
502                    }
503                    let mut pixel = (val / count) as i32;
504                    if bias != 0 { pixel += bias; }
505                    image2[(j * width + i as i32) as usize] = pixel.clamp(0, 255) as u8;
506                }
507            }
508            continue;
509        }
510
511        for j in 0..height {
512            let mut sum_low_v = _mm_setzero_si128();
513            let mut sum_high_v = _mm_setzero_si128();
514            
515            let v_start = if j - half < 0 { 0 } else { j - half };
516            let v_end = if j + half >= height { height - 1 } else { j + half };
517            
518            for jj in v_start..=v_end {
519                let row_ptr = image_temp_u16.as_ptr().add((jj * width) as usize + i_base);
520                let v = _mm_loadu_si128(row_ptr as *const __m128i); 
521                
522                sum_low_v = _mm_add_epi32(sum_low_v, _mm_cvtepu16_epi32(v));
523                sum_high_v = _mm_add_epi32(sum_high_v, _mm_cvtepu16_epi32(_mm_srli_si128(v, 8)));
524            }
525            
526            for i_off in 0..8 {
527                let actual_i = i_base + i_off;
528                let h_start = if actual_i as i32 - half < 0 { 0 } else { actual_i as i32 - half };
529                let h_end = if actual_i as i32 + half >= width { width - 1 } else { actual_i as i32 + half };
530                let count = ((v_end - v_start + 1) * (h_end - h_start + 1)) as u32;
531
532                let mut res_arr = [0i32; 4];
533                let val = if i_off < 4 {
534                    _mm_storeu_si128(res_arr.as_mut_ptr() as *mut __m128i, sum_low_v);
535                    res_arr[i_off] as u32
536                } else {
537                    _mm_storeu_si128(res_arr.as_mut_ptr() as *mut __m128i, sum_high_v);
538                    res_arr[i_off - 4] as u32
539                };
540
541                let mut pixel = (val / count) as i32;
542                if bias != 0 { pixel += bias; }
543                image2[(j * width + actual_i as i32) as usize] = pixel.clamp(0, 255) as u8;
544            }
545        }
546    }
547}
548
549#[cfg(test)]
550mod tests {
551    use super::*;
552
553    #[test]
554    fn test_image_proc_hist() {
555        let mut ipi = ARImageProcInfo::new(4, 4);
556        
557        let mut gradient = vec![];
558        for i in 0..16 {
559            gradient.push((i * 10) as u8);
560        }
561
562        ipi.luma_hist(&gradient).unwrap();
563        assert_eq!(ipi.hist_bins[0], 1);
564        assert_eq!(ipi.hist_bins[10], 1);
565        assert_eq!(ipi.hist_bins[150], 1);
566        assert_eq!(ipi.hist_bins[160], 0);
567
568        ipi.luma_hist_and_cdf(&gradient).unwrap();
569        assert_eq!(ipi.cdf_bins[150], 16);
570        assert_eq!(ipi.cdf_bins[0], 1);
571    }
572
573    #[test]
574    fn test_otsu_thresholding() {
575        let mut ipi = ARImageProcInfo::new(5, 5);
576        let mut img = vec![200; 25]; // Peak at 200
577        img[6] = 100; img[7] = 100; // Peak at 100
578        img[11] = 100; img[12] = 100;
579        
580        let thresh = ipi.luma_hist_and_otsu(&img).unwrap();
581        // Since peaks are 100 and 200, threshold should be around 100.
582        assert!(thresh >= 100 && thresh < 200);
583    }
584}