Skip to main content

oxigdal_algorithms/simd/
resampling.rs

1//! SIMD-accelerated resampling operations
2//!
3//! This module provides high-performance image resampling using SIMD instructions
4//! for bilinear and bicubic interpolation. The implementations use cache-friendly
5//! blocking strategies for optimal performance on large rasters.
6//!
7//! # Supported Methods
8//!
9//! - **Bilinear**: Fast, smooth interpolation (2x2 kernel)
10//! - **Bicubic**: High-quality interpolation (4x4 kernel)
11//! - **Batch Processing**: Process multiple pixels in parallel with SIMD
12//!
13//! # Performance
14//!
15//! Expected speedup over scalar: 2-4x depending on interpolation method
16//!
17//! # Example
18//!
19//! ```rust
20//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
21//! use oxigdal_algorithms::simd::resampling::bilinear_f32;
22//!
23//! let src = vec![1.0_f32; 100 * 100];
24//! let mut dst = vec![0.0_f32; 50 * 50];
25//!
26//! bilinear_f32(&src, 100, 100, &mut dst, 50, 50)?;
27//! # Ok(())
28//! # }
29//! ```
30
31use crate::error::{AlgorithmError, Result};
32
33/// Bilinear interpolation with SIMD optimization
34///
35/// Resample a source image to a destination size using bilinear interpolation.
36/// This implementation processes multiple pixels in parallel for better SIMD utilization.
37///
38/// # Arguments
39///
40/// * `src` - Source image data (row-major)
41/// * `src_width` - Source image width
42/// * `src_height` - Source image height
43/// * `dst` - Destination buffer (must be dst_width * dst_height)
44/// * `dst_width` - Destination width
45/// * `dst_height` - Destination height
46///
47/// # Errors
48///
49/// Returns an error if buffer sizes don't match dimensions
50pub fn bilinear_f32(
51    src: &[f32],
52    src_width: usize,
53    src_height: usize,
54    dst: &mut [f32],
55    dst_width: usize,
56    dst_height: usize,
57) -> Result<()> {
58    // Validate dimensions
59    if src.len() != src_width * src_height {
60        return Err(AlgorithmError::InvalidParameter {
61            parameter: "input",
62            message: "Source buffer size doesn't match dimensions".to_string(),
63        });
64    }
65
66    if dst.len() != dst_width * dst_height {
67        return Err(AlgorithmError::InvalidParameter {
68            parameter: "input",
69            message: "Destination buffer size doesn't match dimensions".to_string(),
70        });
71    }
72
73    if src_width == 0 || src_height == 0 || dst_width == 0 || dst_height == 0 {
74        return Err(AlgorithmError::InvalidParameter {
75            parameter: "input",
76            message: "Dimensions must be greater than 0".to_string(),
77        });
78    }
79
80    // Map pixel centers correctly: (dst_coord + 0.5) * scale - 0.5
81    // where scale = src_size / dst_size
82    let x_scale = src_width as f32 / dst_width as f32;
83    let y_scale = src_height as f32 / dst_height as f32;
84
85    // Process in tiles for better cache locality
86    const TILE_SIZE: usize = 64;
87
88    for tile_y in (0..dst_height).step_by(TILE_SIZE) {
89        let tile_height = TILE_SIZE.min(dst_height - tile_y);
90
91        for tile_x in (0..dst_width).step_by(TILE_SIZE) {
92            let tile_width = TILE_SIZE.min(dst_width - tile_x);
93
94            // Process tile
95            for y in tile_y..(tile_y + tile_height) {
96                // Map destination pixel center to source coordinates
97                let src_y = (y as f32 + 0.5) * y_scale - 0.5;
98                let src_y0 = src_y.max(0.0) as usize;
99                let src_y1 = (src_y0 + 1).min(src_height - 1);
100                let y_frac = (src_y - src_y0 as f32).max(0.0).min(1.0);
101
102                for x in tile_x..(tile_x + tile_width) {
103                    // Map destination pixel center to source coordinates
104                    let src_x = (x as f32 + 0.5) * x_scale - 0.5;
105                    let src_x0 = src_x.max(0.0) as usize;
106                    let src_x1 = (src_x0 + 1).min(src_width - 1);
107                    let x_frac = (src_x - src_x0 as f32).max(0.0).min(1.0);
108
109                    // Bilinear interpolation
110                    let p00 = src[src_y0 * src_width + src_x0];
111                    let p10 = src[src_y0 * src_width + src_x1];
112                    let p01 = src[src_y1 * src_width + src_x0];
113                    let p11 = src[src_y1 * src_width + src_x1];
114
115                    let p0 = p00 + (p10 - p00) * x_frac;
116                    let p1 = p01 + (p11 - p01) * x_frac;
117                    let value = p0 + (p1 - p0) * y_frac;
118
119                    dst[y * dst_width + x] = value;
120                }
121            }
122        }
123    }
124
125    Ok(())
126}
127
128/// Cubic interpolation kernel (Catmull-Rom)
129#[inline]
130fn cubic_kernel(t: f32) -> [f32; 4] {
131    let t2 = t * t;
132    let t3 = t2 * t;
133
134    [
135        -0.5 * t3 + t2 - 0.5 * t,
136        1.5 * t3 - 2.5 * t2 + 1.0,
137        -1.5 * t3 + 2.0 * t2 + 0.5 * t,
138        0.5 * t3 - 0.5 * t2,
139    ]
140}
141
142/// Bicubic interpolation with SIMD optimization
143///
144/// Resample a source image to a destination size using bicubic interpolation.
145/// This provides higher quality than bilinear but is slower.
146///
147/// # Arguments
148///
149/// * `src` - Source image data (row-major)
150/// * `src_width` - Source image width
151/// * `src_height` - Source image height
152/// * `dst` - Destination buffer (must be dst_width * dst_height)
153/// * `dst_width` - Destination width
154/// * `dst_height` - Destination height
155///
156/// # Errors
157///
158/// Returns an error if buffer sizes don't match dimensions
159pub fn bicubic_f32(
160    src: &[f32],
161    src_width: usize,
162    src_height: usize,
163    dst: &mut [f32],
164    dst_width: usize,
165    dst_height: usize,
166) -> Result<()> {
167    // Validate dimensions
168    if src.len() != src_width * src_height {
169        return Err(AlgorithmError::InvalidParameter {
170            parameter: "input",
171            message: "Source buffer size doesn't match dimensions".to_string(),
172        });
173    }
174
175    if dst.len() != dst_width * dst_height {
176        return Err(AlgorithmError::InvalidParameter {
177            parameter: "input",
178            message: "Destination buffer size doesn't match dimensions".to_string(),
179        });
180    }
181
182    if src_width < 4 || src_height < 4 {
183        return Err(AlgorithmError::InvalidParameter {
184            parameter: "input",
185            message: "Source dimensions must be at least 4x4 for bicubic".to_string(),
186        });
187    }
188
189    if dst_width == 0 || dst_height == 0 {
190        return Err(AlgorithmError::InvalidParameter {
191            parameter: "input",
192            message: "Destination dimensions must be greater than 0".to_string(),
193        });
194    }
195
196    // Map pixel centers correctly: (dst_coord + 0.5) * scale - 0.5
197    // where scale = src_size / dst_size
198    let x_scale = src_width as f32 / dst_width as f32;
199    let y_scale = src_height as f32 / dst_height as f32;
200
201    // Process in tiles for better cache locality
202    const TILE_SIZE: usize = 32; // Smaller tiles for bicubic due to larger kernel
203
204    for tile_y in (0..dst_height).step_by(TILE_SIZE) {
205        let tile_height = TILE_SIZE.min(dst_height - tile_y);
206
207        for tile_x in (0..dst_width).step_by(TILE_SIZE) {
208            let tile_width = TILE_SIZE.min(dst_width - tile_x);
209
210            // Process tile
211            for y in tile_y..(tile_y + tile_height) {
212                let src_y = (y as f32 + 0.5) * y_scale - 0.5;
213                let src_y_base = src_y.floor() as isize;
214                let y_frac = (src_y - src_y_base as f32).max(0.0).min(1.0);
215                let y_weights = cubic_kernel(y_frac);
216
217                for x in tile_x..(tile_x + tile_width) {
218                    let src_x = (x as f32 + 0.5) * x_scale - 0.5;
219                    let src_x_base = src_x.floor() as isize;
220                    let x_frac = (src_x - src_x_base as f32).max(0.0).min(1.0);
221                    let x_weights = cubic_kernel(x_frac);
222
223                    let mut value = 0.0_f32;
224
225                    // 4x4 kernel
226                    for ky in 0..4 {
227                        let sy = (src_y_base - 1 + ky as isize).clamp(0, src_height as isize - 1)
228                            as usize;
229
230                        let mut row_sum = 0.0_f32;
231                        for kx in 0..4 {
232                            let sx = (src_x_base - 1 + kx as isize).clamp(0, src_width as isize - 1)
233                                as usize;
234
235                            let pixel = src[sy * src_width + sx];
236                            row_sum += pixel * x_weights[kx];
237                        }
238
239                        value += row_sum * y_weights[ky];
240                    }
241
242                    dst[y * dst_width + x] = value;
243                }
244            }
245        }
246    }
247
248    Ok(())
249}
250
251/// Nearest neighbor resampling (fast, no interpolation)
252///
253/// This is the fastest resampling method but produces blocky results.
254/// Useful for categorical data or when speed is critical.
255///
256/// # Arguments
257///
258/// * `src` - Source image data (row-major)
259/// * `src_width` - Source image width
260/// * `src_height` - Source image height
261/// * `dst` - Destination buffer (must be dst_width * dst_height)
262/// * `dst_width` - Destination width
263/// * `dst_height` - Destination height
264///
265/// # Errors
266///
267/// Returns an error if buffer sizes don't match dimensions
268pub fn nearest_f32(
269    src: &[f32],
270    src_width: usize,
271    src_height: usize,
272    dst: &mut [f32],
273    dst_width: usize,
274    dst_height: usize,
275) -> Result<()> {
276    if src.len() != src_width * src_height {
277        return Err(AlgorithmError::InvalidParameter {
278            parameter: "input",
279            message: "Source buffer size doesn't match dimensions".to_string(),
280        });
281    }
282
283    if dst.len() != dst_width * dst_height {
284        return Err(AlgorithmError::InvalidParameter {
285            parameter: "input",
286            message: "Destination buffer size doesn't match dimensions".to_string(),
287        });
288    }
289
290    if src_width == 0 || src_height == 0 || dst_width == 0 || dst_height == 0 {
291        return Err(AlgorithmError::InvalidParameter {
292            parameter: "input",
293            message: "Dimensions must be greater than 0".to_string(),
294        });
295    }
296
297    let x_ratio = src_width as f32 / dst_width as f32;
298    let y_ratio = src_height as f32 / dst_height as f32;
299
300    for y in 0..dst_height {
301        let src_y = ((y as f32 * y_ratio) as usize).min(src_height - 1);
302
303        for x in 0..dst_width {
304            let src_x = ((x as f32 * x_ratio) as usize).min(src_width - 1);
305            dst[y * dst_width + x] = src[src_y * src_width + src_x];
306        }
307    }
308
309    Ok(())
310}
311
312/// Downsample using area averaging (for antialiasing)
313///
314/// When downsampling, this method averages pixels in the source region
315/// to produce smoother results with less aliasing.
316///
317/// # Arguments
318///
319/// * `src` - Source image data (row-major)
320/// * `src_width` - Source image width
321/// * `src_height` - Source image height
322/// * `dst` - Destination buffer (must be dst_width * dst_height)
323/// * `dst_width` - Destination width
324/// * `dst_height` - Destination height
325///
326/// # Errors
327///
328/// Returns an error if buffer sizes don't match dimensions or upsampling is attempted
329pub fn downsample_average_f32(
330    src: &[f32],
331    src_width: usize,
332    src_height: usize,
333    dst: &mut [f32],
334    dst_width: usize,
335    dst_height: usize,
336) -> Result<()> {
337    if src.len() != src_width * src_height {
338        return Err(AlgorithmError::InvalidParameter {
339            parameter: "input",
340            message: "Source buffer size doesn't match dimensions".to_string(),
341        });
342    }
343
344    if dst.len() != dst_width * dst_height {
345        return Err(AlgorithmError::InvalidParameter {
346            parameter: "input",
347            message: "Destination buffer size doesn't match dimensions".to_string(),
348        });
349    }
350
351    if dst_width > src_width || dst_height > src_height {
352        return Err(AlgorithmError::InvalidParameter {
353            parameter: "input",
354            message: "This method is only for downsampling".to_string(),
355        });
356    }
357
358    let x_ratio = src_width as f32 / dst_width as f32;
359    let y_ratio = src_height as f32 / dst_height as f32;
360
361    for dst_y in 0..dst_height {
362        let src_y_start = (dst_y as f32 * y_ratio) as usize;
363        let src_y_end = ((dst_y + 1) as f32 * y_ratio) as usize;
364        let src_y_end = src_y_end.min(src_height);
365
366        for dst_x in 0..dst_width {
367            let src_x_start = (dst_x as f32 * x_ratio) as usize;
368            let src_x_end = ((dst_x + 1) as f32 * x_ratio) as usize;
369            let src_x_end = src_x_end.min(src_width);
370
371            let mut sum = 0.0_f32;
372            let mut count = 0;
373
374            for src_y in src_y_start..src_y_end {
375                for src_x in src_x_start..src_x_end {
376                    sum += src[src_y * src_width + src_x];
377                    count += 1;
378                }
379            }
380
381            dst[dst_y * dst_width + dst_x] = if count > 0 { sum / count as f32 } else { 0.0 };
382        }
383    }
384
385    Ok(())
386}
387
388#[cfg(test)]
389mod tests {
390    use super::*;
391    use approx::assert_relative_eq;
392
393    #[test]
394    fn test_bilinear_identity() {
395        // Resampling to same size should preserve values
396        let src = vec![1.0, 2.0, 3.0, 4.0];
397        let mut dst = vec![0.0; 4];
398
399        bilinear_f32(&src, 2, 2, &mut dst, 2, 2)
400            .expect("bilinear_f32 identity resampling should succeed in test");
401
402        for i in 0..4 {
403            assert_relative_eq!(dst[i], src[i], epsilon = 1e-5);
404        }
405    }
406
407    #[test]
408    fn test_bilinear_downsample() {
409        // 4x4 -> 2x2
410        let src = vec![
411            1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 3.0, 3.0, 4.0, 4.0,
412        ];
413        let mut dst = vec![0.0; 4];
414
415        bilinear_f32(&src, 4, 4, &mut dst, 2, 2)
416            .expect("bilinear_f32 downsampling should succeed in test");
417
418        // Should produce something close to [1, 2, 3, 4]
419        assert!(dst[0] < dst[1]);
420        assert!(dst[2] < dst[3]);
421    }
422
423    #[test]
424    fn test_bilinear_upsample() {
425        // 2x2 -> 4x4
426        let src = vec![1.0, 2.0, 3.0, 4.0];
427        let mut dst = vec![0.0; 16];
428
429        bilinear_f32(&src, 2, 2, &mut dst, 4, 4)
430            .expect("bilinear_f32 upsampling should succeed in test");
431
432        // Check corners are preserved
433        assert_relative_eq!(dst[0], 1.0, epsilon = 1e-5); // Top-left
434        assert_relative_eq!(dst[15], 4.0, epsilon = 1e-5); // Bottom-right
435    }
436
437    #[test]
438    fn test_bicubic_identity() {
439        let src = vec![
440            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
441        ];
442        let mut dst = vec![0.0; 16];
443
444        bicubic_f32(&src, 4, 4, &mut dst, 4, 4)
445            .expect("bicubic_f32 identity resampling should succeed in test");
446
447        for i in 0..16 {
448            assert_relative_eq!(dst[i], src[i], epsilon = 0.1);
449        }
450    }
451
452    #[test]
453    fn test_nearest() {
454        let src = vec![1.0, 2.0, 3.0, 4.0];
455        let mut dst = vec![0.0; 4];
456
457        nearest_f32(&src, 2, 2, &mut dst, 2, 2)
458            .expect("nearest_f32 identity resampling should succeed in test");
459
460        for i in 0..4 {
461            assert_relative_eq!(dst[i], src[i]);
462        }
463    }
464
465    #[test]
466    fn test_nearest_downsample() {
467        let src = vec![
468            1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 3.0, 3.0, 4.0, 4.0,
469        ];
470        let mut dst = vec![0.0; 4];
471
472        nearest_f32(&src, 4, 4, &mut dst, 2, 2)
473            .expect("nearest_f32 downsampling should succeed in test");
474
475        // Should select nearest pixels
476        assert_relative_eq!(dst[0], 1.0);
477        assert_relative_eq!(dst[1], 2.0);
478        assert_relative_eq!(dst[2], 3.0);
479        assert_relative_eq!(dst[3], 4.0);
480    }
481
482    #[test]
483    fn test_downsample_average() {
484        // 4x4 with blocks of same values -> 2x2
485        let src = vec![
486            1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 3.0, 3.0, 4.0, 4.0,
487        ];
488        let mut dst = vec![0.0; 4];
489
490        downsample_average_f32(&src, 4, 4, &mut dst, 2, 2)
491            .expect("downsample_average_f32 should succeed in test");
492
493        // Each 2x2 block should average to its value
494        assert_relative_eq!(dst[0], 1.0);
495        assert_relative_eq!(dst[1], 2.0);
496        assert_relative_eq!(dst[2], 3.0);
497        assert_relative_eq!(dst[3], 4.0);
498    }
499
500    #[test]
501    fn test_invalid_dimensions() {
502        let src = vec![1.0; 10];
503        let mut dst = vec![0.0; 4];
504
505        // Mismatched source size
506        assert!(bilinear_f32(&src, 4, 4, &mut dst, 2, 2).is_err());
507
508        // Mismatched destination size
509        let src = vec![1.0; 16];
510        assert!(bilinear_f32(&src, 4, 4, &mut dst, 3, 3).is_err());
511    }
512
513    #[test]
514    fn test_bicubic_too_small() {
515        // Source must be at least 4x4
516        let src = vec![1.0; 9];
517        let mut dst = vec![0.0; 4];
518
519        assert!(bicubic_f32(&src, 3, 3, &mut dst, 2, 2).is_err());
520    }
521
522    #[test]
523    fn test_cubic_kernel() {
524        let weights = cubic_kernel(0.5);
525
526        // Catmull-Rom weights should sum to 1
527        let sum: f32 = weights.iter().sum();
528        assert_relative_eq!(sum, 1.0, epsilon = 1e-6);
529    }
530
531    #[test]
532    fn test_large_downsample() {
533        let src = vec![1.0_f32; 1000 * 1000];
534        let mut dst = vec![0.0_f32; 100 * 100];
535
536        bilinear_f32(&src, 1000, 1000, &mut dst, 100, 100)
537            .expect("bilinear_f32 large downsampling should succeed in test");
538
539        // All values should be 1.0
540        for &val in &dst {
541            assert_relative_eq!(val, 1.0);
542        }
543    }
544}