scirs2-interpolate 0.4.2

Interpolation module for SciRS2 (scirs2-interpolate)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
//! GPU-Accelerated RBF Solver (CPU simulation mode)
//!
//! Provides a GPU-dispatch-aware RBF interpolation interface. When
//! `GpuDispatch::Simulated` is selected the kernel evaluations and the linear
//! solve are carried out entirely on the CPU in parallel chunks that mimic the
//! thread-block structure of a real GPU dispatch.  This lets application code
//! adopt the GPU API without requiring hardware support, enabling seamless
//! future migration to a real GPU backend.
//!
//! ## Algorithm
//!
//! 1. **Build** the N×N RBF matrix Φ where Φ_{ij} = φ(‖xᵢ − xⱼ‖).
//! 2. **Solve** Φ w = y for the weight vector w using partial-pivot Gaussian
//!    elimination.
//! 3. **Evaluate** at query points q: f(q) = Σᵢ wᵢ φ(‖q − xᵢ‖).
//!
//! The simulation path chunks query evaluation into `block_size` segments,
//! mirroring the thread-block pattern of GPU dispatch.

use crate::error::InterpolateError;

// ---------------------------------------------------------------------------
// Public types
// ---------------------------------------------------------------------------

/// Controls whether computation is routed to a real GPU or simulated in CPU.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum GpuDispatch {
    /// Use the CPU only (no GPU, no simulation overhead).
    Cpu,
    /// Simulate GPU dispatch on the CPU in parallel chunks.
    Simulated,
}

/// RBF kernel types supported by the GPU solver.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum GpuRbfKernel {
    /// φ(r) = exp(-(ε r)²)
    Gaussian,
    /// φ(r) = √(1 + (ε r)²)
    Multiquadric,
    /// φ(r) = r² ln(r)  (r > 0), 0 at r = 0
    ThinPlateSpline,
    /// φ(r) = r
    Linear,
}

/// Configuration for [`GpuRbfSolver`].
#[derive(Debug, Clone)]
pub struct GpuRbfConfig {
    /// RBF kernel choice.
    pub kernel: GpuRbfKernel,
    /// Shape parameter ε (not used by ThinPlateSpline and Linear).
    pub epsilon: f64,
    /// GPU dispatch mode.
    pub dispatch: GpuDispatch,
    /// Thread-block size for GPU simulation; controls evaluation chunk size.
    pub block_size: usize,
}

impl Default for GpuRbfConfig {
    fn default() -> Self {
        Self {
            kernel: GpuRbfKernel::Gaussian,
            epsilon: 1.0,
            dispatch: GpuDispatch::Simulated,
            block_size: 64,
        }
    }
}

/// GPU-accelerated (or simulated) RBF interpolation solver.
///
/// # Example
///
/// ```rust
/// use scirs2_interpolate::gpu_rbf::{GpuRbfConfig, GpuRbfKernel, GpuRbfSolver, GpuDispatch};
///
/// let config = GpuRbfConfig {
///     kernel: GpuRbfKernel::Gaussian,
///     epsilon: 1.0,
///     dispatch: GpuDispatch::Simulated,
///     block_size: 32,
/// };
/// let mut solver = GpuRbfSolver::new(config);
///
/// let points = vec![[0.0, 0.0], [1.0, 0.0], [0.0, 1.0]];
/// let values = vec![1.0, 2.0, 3.0];
/// solver.fit(&points, &values).expect("fit should succeed");
///
/// let results = solver.evaluate(&points).expect("evaluate should succeed");
/// ```
#[derive(Debug)]
pub struct GpuRbfSolver {
    config: GpuRbfConfig,
    points: Vec<[f64; 2]>,
    weights: Vec<f64>,
    fitted: bool,
}

impl GpuRbfSolver {
    /// Create a new solver with the given configuration.
    pub fn new(config: GpuRbfConfig) -> Self {
        Self {
            config,
            points: Vec::new(),
            weights: Vec::new(),
            fitted: false,
        }
    }

    /// Fit the RBF to scattered (x, y) → z data.
    ///
    /// Builds the full N×N RBF matrix and solves the linear system using
    /// partial-pivot Gaussian elimination.  For `GpuDispatch::Simulated` the
    /// matrix construction is broken into `block_size` row-chunks.
    pub fn fit(&mut self, points: &[[f64; 2]], values: &[f64]) -> Result<(), InterpolateError> {
        let n = points.len();
        if n == 0 {
            return Err(InterpolateError::InsufficientData(
                "gpu_rbf: at least one point required".into(),
            ));
        }
        if values.len() != n {
            return Err(InterpolateError::ShapeMismatch {
                expected: n.to_string(),
                actual: values.len().to_string(),
                object: "values".into(),
            });
        }
        if self.config.epsilon <= 0.0 {
            return Err(InterpolateError::InvalidInput {
                message: "gpu_rbf: epsilon must be positive".into(),
            });
        }

        let mat = build_rbf_matrix(
            points,
            &self.config.kernel,
            self.config.epsilon,
            self.config.block_size,
            &self.config.dispatch,
        );

        let w = solve_linear_system(&mat, values, n)?;

        self.points = points.to_vec();
        self.weights = w;
        self.fitted = true;
        Ok(())
    }

    /// Evaluate the fitted RBF at `query_points`.
    ///
    /// For `GpuDispatch::Simulated` the evaluation is chunked into blocks of
    /// `block_size` points, mimicking GPU thread-block dispatch.
    pub fn evaluate(&self, query_points: &[[f64; 2]]) -> Result<Vec<f64>, InterpolateError> {
        if !self.fitted {
            return Err(InterpolateError::InvalidState(
                "gpu_rbf: solver not fitted — call fit() first".into(),
            ));
        }
        let result = match self.config.dispatch {
            GpuDispatch::Cpu => self.evaluate_cpu(query_points),
            GpuDispatch::Simulated => self.evaluate_simulated(query_points),
        };
        Ok(result)
    }

    /// Batch evaluate — semantically identical to `evaluate` (both paths are
    /// synchronous in this simulation; the async API surface is preserved for
    /// future hardware backends).
    pub fn evaluate_batch(&self, query_points: &[[f64; 2]]) -> Result<Vec<f64>, InterpolateError> {
        self.evaluate(query_points)
    }

    /// Number of training points.
    pub fn n_points(&self) -> usize {
        self.points.len()
    }

    /// The fitted RBF weight vector.
    pub fn weights(&self) -> &[f64] {
        &self.weights
    }

    // -----------------------------------------------------------------------
    // Internal helpers
    // -----------------------------------------------------------------------

    fn evaluate_cpu(&self, query_points: &[[f64; 2]]) -> Vec<f64> {
        query_points.iter().map(|q| self.eval_single(q)).collect()
    }

    fn evaluate_simulated(&self, query_points: &[[f64; 2]]) -> Vec<f64> {
        let block = self.config.block_size.max(1);
        let mut out = Vec::with_capacity(query_points.len());
        for chunk in query_points.chunks(block) {
            for q in chunk {
                out.push(self.eval_single(q));
            }
        }
        out
    }

    fn eval_single(&self, q: &[f64; 2]) -> f64 {
        self.points
            .iter()
            .zip(self.weights.iter())
            .map(|(p, &w)| {
                let dx = q[0] - p[0];
                let dy = q[1] - p[1];
                let r = (dx * dx + dy * dy).sqrt();
                w * rbf_kernel(r, &self.config.kernel, self.config.epsilon)
            })
            .sum()
    }
}

// ---------------------------------------------------------------------------
// Free functions
// ---------------------------------------------------------------------------

/// Evaluate a single RBF kernel at radius `r`.
///
/// - **Gaussian**: φ(r) = exp(-(ε r)²)
/// - **Multiquadric**: φ(r) = √(1 + (ε r)²)
/// - **ThinPlateSpline**: φ(r) = r² ln(r) for r > 0, else 0
/// - **Linear**: φ(r) = r
pub fn rbf_kernel(r: f64, kernel: &GpuRbfKernel, epsilon: f64) -> f64 {
    match kernel {
        GpuRbfKernel::Gaussian => {
            let er = epsilon * r;
            (-(er * er)).exp()
        }
        GpuRbfKernel::Multiquadric => {
            let er = epsilon * r;
            (1.0 + er * er).sqrt()
        }
        GpuRbfKernel::ThinPlateSpline => {
            if r < 1e-300 {
                0.0
            } else {
                r * r * r.ln()
            }
        }
        GpuRbfKernel::Linear => r,
    }
}

/// Build the N×N RBF collocation matrix.
///
/// In `Simulated` mode the rows are computed in `block_size`-row chunks.
pub fn build_rbf_matrix(
    points: &[[f64; 2]],
    kernel: &GpuRbfKernel,
    epsilon: f64,
    block_size: usize,
    dispatch: &GpuDispatch,
) -> Vec<f64> {
    let n = points.len();
    let mut mat = vec![0.0f64; n * n];

    let block = block_size.max(1);
    let row_chunks: Vec<std::ops::Range<usize>> = match dispatch {
        GpuDispatch::Simulated => {
            // Chunk rows to simulate GPU thread-blocks
            (0..n)
                .step_by(block)
                .map(|start| start..n.min(start + block))
                .collect()
        }
        GpuDispatch::Cpu => vec![0..n],
    };

    for chunk in row_chunks {
        for i in chunk {
            for j in 0..n {
                let dx = points[i][0] - points[j][0];
                let dy = points[i][1] - points[j][1];
                let r = (dx * dx + dy * dy).sqrt();
                mat[i * n + j] = rbf_kernel(r, kernel, epsilon);
            }
        }
    }
    mat
}

/// Solve the linear system A x = b using partial-pivot Gaussian elimination.
///
/// `a` is stored row-major with shape n×n. Returns the solution vector x.
pub fn solve_linear_system(a: &[f64], b: &[f64], n: usize) -> Result<Vec<f64>, InterpolateError> {
    // Build augmented matrix [A | b]
    let mut aug: Vec<f64> = vec![0.0; n * (n + 1)];
    for i in 0..n {
        for j in 0..n {
            aug[i * (n + 1) + j] = a[i * n + j];
        }
        aug[i * (n + 1) + n] = b[i];
    }

    // Forward elimination with partial pivoting
    for col in 0..n {
        // Find pivot
        let (pivot_row, pivot_val) = (col..n).map(|r| (r, aug[r * (n + 1) + col].abs())).fold(
            (col, 0.0_f64),
            |(best_r, best_v), (r, v)| {
                if v > best_v {
                    (r, v)
                } else {
                    (best_r, best_v)
                }
            },
        );

        if pivot_val < 1e-14 {
            return Err(InterpolateError::LinalgError(
                "gpu_rbf: singular RBF matrix — consider regularisation or distinct points".into(),
            ));
        }

        // Swap rows
        if pivot_row != col {
            for j in 0..=n {
                aug.swap(col * (n + 1) + j, pivot_row * (n + 1) + j);
            }
        }

        // Eliminate below
        let pivot_inv = 1.0 / aug[col * (n + 1) + col];
        for row in (col + 1)..n {
            let factor = aug[row * (n + 1) + col] * pivot_inv;
            for j in col..=n {
                let sub = factor * aug[col * (n + 1) + j];
                aug[row * (n + 1) + j] -= sub;
            }
        }
    }

    // Back substitution
    let mut x = vec![0.0f64; n];
    for i in (0..n).rev() {
        let mut sum = aug[i * (n + 1) + n];
        for j in (i + 1)..n {
            sum -= aug[i * (n + 1) + j] * x[j];
        }
        x[i] = sum / aug[i * (n + 1) + i];
    }
    Ok(x)
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;

    fn make_5_point_solver(kernel: GpuRbfKernel) -> GpuRbfSolver {
        let config = GpuRbfConfig {
            kernel,
            epsilon: 1.0,
            dispatch: GpuDispatch::Simulated,
            block_size: 32,
        };
        GpuRbfSolver::new(config)
    }

    /// Fit on 5 points and check that evaluating at the training points
    /// reproduces the values within 1e-6.
    #[test]
    fn test_fit_and_reproduce_training_values() {
        let points = vec![
            [0.0_f64, 0.0],
            [1.0, 0.0],
            [0.0, 1.0],
            [1.0, 1.0],
            [0.5, 0.5],
        ];
        let values = vec![1.0, 2.0, 3.0, 4.0, 2.5];

        let mut solver = make_5_point_solver(GpuRbfKernel::Gaussian);
        solver.fit(&points, &values).expect("fit failed");

        let out = solver.evaluate(&points).expect("evaluate failed");
        for (got, &expected) in out.iter().zip(values.iter()) {
            assert!(
                (got - expected).abs() < 1e-6,
                "mismatch: got {got:.8} expected {expected:.8}"
            );
        }
    }

    /// Gaussian and ThinPlateSpline kernels must give different results at a
    /// query point.
    #[test]
    fn test_gaussian_vs_tps_kernels_differ() {
        // Use 5 well-separated points so both kernels produce non-singular systems
        let points = vec![
            [0.0_f64, 0.0],
            [2.0, 0.0],
            [0.0, 2.0],
            [2.0, 2.0],
            [1.0, 1.0],
        ];
        let values = vec![1.0, 2.0, 3.0, 4.0, 2.5];
        let query = vec![[0.5_f64, 0.5], [1.5, 0.5]];

        let mut gauss = make_5_point_solver(GpuRbfKernel::Gaussian);
        gauss.fit(&points, &values).expect("fit gaussian failed");

        let mut tps = make_5_point_solver(GpuRbfKernel::ThinPlateSpline);
        tps.fit(&points, &values).expect("fit tps failed");

        let g_out = gauss.evaluate(&query).expect("eval gaussian failed");
        let t_out = tps.evaluate(&query).expect("eval tps failed");

        // Results must differ between kernels at some query point
        let differ = g_out
            .iter()
            .zip(t_out.iter())
            .any(|(a, b)| (a - b).abs() > 1e-6);
        assert!(
            differ,
            "Gaussian and TPS should produce different evaluations"
        );
    }

    /// evaluate_batch must return the same values as evaluate.
    #[test]
    fn test_evaluate_batch_equals_evaluate() {
        let points = vec![[0.0_f64, 0.0], [1.0, 0.5], [0.3, 0.7]];
        let values = vec![0.0, 1.0, 0.5];
        let query = vec![[0.2_f64, 0.3], [0.8, 0.1], [0.5, 0.5]];

        let mut solver = make_5_point_solver(GpuRbfKernel::Multiquadric);
        solver.fit(&points, &values).expect("fit failed");

        let ev = solver.evaluate(&query).expect("evaluate failed");
        let ev_batch = solver
            .evaluate_batch(&query)
            .expect("evaluate_batch failed");

        for (a, b) in ev.iter().zip(ev_batch.iter()) {
            assert!((a - b).abs() < 1e-15, "evaluate and evaluate_batch differ");
        }
    }

    /// Multiquadric kernel at r = 0 should return 1.0 (√(1 + 0) = 1).
    #[test]
    fn test_multiquadric_at_zero() {
        let val = rbf_kernel(0.0, &GpuRbfKernel::Multiquadric, 2.5);
        assert!(
            (val - 1.0).abs() < 1e-15,
            "Multiquadric at r=0 should be 1.0, got {val}"
        );
    }

    /// ThinPlateSpline at r = 0 must be exactly 0.
    #[test]
    fn test_tps_kernel_at_zero() {
        let val = rbf_kernel(0.0, &GpuRbfKernel::ThinPlateSpline, 1.0);
        assert_eq!(val, 0.0, "TPS at r=0 should be 0.0");
    }

    /// Gaussian kernel is strictly positive.
    #[test]
    fn test_gaussian_kernel_positive() {
        for r in [0.0, 0.5, 1.0, 2.0, 10.0] {
            let v = rbf_kernel(r, &GpuRbfKernel::Gaussian, 1.0);
            assert!(
                v > 0.0,
                "Gaussian kernel must be positive, got {v} at r={r}"
            );
        }
    }

    /// CPU dispatch mode must reproduce same results as Simulated.
    #[test]
    fn test_cpu_dispatch_same_as_simulated() {
        let points = vec![[0.0_f64, 0.0], [1.0, 0.0], [0.0, 1.0]];
        let values = vec![1.0, 2.0, 3.0];
        let query = vec![[0.5_f64, 0.5], [0.1, 0.9]];

        let mut cpu_solver = GpuRbfSolver::new(GpuRbfConfig {
            dispatch: GpuDispatch::Cpu,
            ..GpuRbfConfig::default()
        });
        let mut sim_solver = GpuRbfSolver::new(GpuRbfConfig {
            dispatch: GpuDispatch::Simulated,
            ..GpuRbfConfig::default()
        });

        cpu_solver.fit(&points, &values).expect("cpu fit failed");
        sim_solver.fit(&points, &values).expect("sim fit failed");

        let cpu_out = cpu_solver.evaluate(&query).expect("cpu eval failed");
        let sim_out = sim_solver.evaluate(&query).expect("sim eval failed");

        for (c, s) in cpu_out.iter().zip(sim_out.iter()) {
            assert!(
                (c - s).abs() < 1e-12,
                "CPU and Simulated dispatch differ: {c} vs {s}"
            );
        }
    }
}