Skip to main content

scirs2_interpolate/
auto_select.rs

1//! Automatic interpolation method selection based on problem characteristics.
2//!
3//! This module provides a *problem-specification-driven* API for selecting the
4//! most appropriate interpolation strategy.  Unlike the data-driven selector in
5//! [`crate::physics_informed::auto_select`], which analyses actual arrays, this
6//! module works from high-level problem descriptors such as dimensionality,
7//! dataset size, memory budget, and time constraints — useful when the data
8//! hasn't been generated yet or when building adaptive pipelines.
9//!
10//! # Decision Rules (in priority order)
11//!
12//! | Condition | Method |
13//! |-----------|--------|
14//! | `n_points > 50_000 && dim > 2` | `RandomFeaturesRbf` (scalable) |
15//! | `n_points > 10_000 && dim ≤ 3` | `Kriging` |
16//! | `dim == 1` | `CubicSpline` |
17//! | `dim == 2 && n_points < 1_000` | `ThinPlateSpline` |
18//! | `dim ≤ 4 && smooth` | `Rbf(Gaussian)` |
19//! | `dim > 4` | `Kriging` or `RandomFeaturesRbf` |
20//! | `require_derivatives` | avoid `NearestNeighbor` |
21//! | low memory | `RandomFeaturesRbf` |
22//!
23//! # Example
24//!
25//! ```rust
26//! use scirs2_interpolate::auto_select::{auto_select, InterpolationProblem};
27//!
28//! let problem = InterpolationProblem {
29//!     n_points: 200,
30//!     dim: 2,
31//!     smoothness_estimate: Some(0.8),
32//!     available_memory_mb: Some(512),
33//!     require_derivatives: false,
34//!     time_budget_ms: None,
35//! };
36//! let rec = auto_select(&problem);
37//! println!("Recommended: {:?}  — {}", rec.method, rec.reason);
38//! ```
39
40use crate::error::InterpolateResult;
41
42// ─────────────────────────────────────────────────────────────────────────────
43// Problem descriptor
44// ─────────────────────────────────────────────────────────────────────────────
45
46/// High-level description of an interpolation problem used for method selection.
47#[derive(Debug, Clone)]
48pub struct InterpolationProblem {
49    /// Number of training data points.
50    pub n_points: usize,
51    /// Input dimension (number of coordinates per point).
52    pub dim: usize,
53    /// Optional smoothness estimate in `[0.0, 1.0]`.
54    /// `0.0` = rough/discontinuous, `1.0` = very smooth.
55    pub smoothness_estimate: Option<f64>,
56    /// Available RAM in megabytes, if constrained.
57    pub available_memory_mb: Option<usize>,
58    /// Whether the chosen method must provide derivative evaluation.
59    pub require_derivatives: bool,
60    /// Maximum allowed wall-clock time for prediction (milliseconds).
61    pub time_budget_ms: Option<u64>,
62}
63
64impl Default for InterpolationProblem {
65    fn default() -> Self {
66        Self {
67            n_points: 100,
68            dim: 1,
69            smoothness_estimate: None,
70            available_memory_mb: None,
71            require_derivatives: false,
72            time_budget_ms: None,
73        }
74    }
75}
76
77// ─────────────────────────────────────────────────────────────────────────────
78// Method enum
79// ─────────────────────────────────────────────────────────────────────────────
80
81/// Interpolation methods that can be recommended.
82#[non_exhaustive]
83#[derive(Debug, Clone, PartialEq)]
84pub enum InterpolationMethod {
85    /// Nearest-neighbour lookup — O(1) evaluation, no smoothness.
86    NearestNeighbor,
87    /// Piecewise linear B-spline — fast, C⁰.
88    LinearBspline,
89    /// Piecewise cubic spline — smooth (C²), ideal for 1-D data.
90    CubicSpline,
91    /// Radial basis function with specified kernel.
92    Rbf(RbfKernelHint),
93    /// Natural-neighbour (Sibson) interpolation.
94    NaturalNeighbor,
95    /// Kriging (Gaussian process regression) — exact, uncertainty available.
96    Kriging,
97    /// Random Fourier Feature RBF (Rahimi-Recht) — scalable to large datasets.
98    RandomFeaturesRbf,
99    /// Thin-plate spline (polyharmonic, dim ≤ 3, small datasets).
100    ThinPlateSpline,
101}
102
103/// Suggested RBF kernel hint for `InterpolationMethod::Rbf`.
104#[non_exhaustive]
105#[derive(Debug, Clone, PartialEq)]
106pub enum RbfKernelHint {
107    /// Gaussian (squared-exponential) — very smooth.
108    Gaussian,
109    /// Multiquadric — robust general-purpose choice.
110    Multiquadric,
111    /// Thin-plate spline — natural for 2-D.
112    ThinPlate,
113}
114
115// ─────────────────────────────────────────────────────────────────────────────
116// Recommendation
117// ─────────────────────────────────────────────────────────────────────────────
118
119/// A method recommendation with supporting rationale and resource estimates.
120#[derive(Debug, Clone)]
121pub struct MethodRecommendation {
122    /// The recommended interpolation strategy.
123    pub method: InterpolationMethod,
124    /// Human-readable explanation of why this method was selected.
125    pub reason: String,
126    /// Estimated peak memory usage in megabytes (rough model).
127    pub estimated_memory_mb: f64,
128    /// Estimated wall-clock time for a single prediction in milliseconds.
129    /// `None` if no reliable estimate is available.
130    pub estimated_time_ms: Option<f64>,
131}
132
133// ─────────────────────────────────────────────────────────────────────────────
134// Core selection logic
135// ─────────────────────────────────────────────────────────────────────────────
136
137/// Estimate peak memory for a dense RBF / kriging solve in megabytes.
138///
139/// Storing the n×n kernel matrix: `n² × 8 bytes / 1024² MB`.
140fn dense_memory_mb(n: usize) -> f64 {
141    (n as f64 * n as f64 * 8.0) / (1024.0 * 1024.0)
142}
143
144/// Estimate memory for random-feature approximation.
145///
146/// Stores D×d weights + n predictions: `(D * d + n) * 8 / 1024²`.
147fn rf_memory_mb(n: usize, dim: usize, n_features: usize) -> f64 {
148    ((n_features * dim + n) as f64 * 8.0) / (1024.0 * 1024.0)
149}
150
151/// Automatically recommend an interpolation method for the given problem.
152///
153/// Returns a `MethodRecommendation` explaining the choice together with rough
154/// resource estimates.  The function always succeeds (it never returns `Err`).
155///
156/// When `require_derivatives` is `true` the selector ensures that the chosen
157/// method supports derivative evaluation (first-order at minimum).  In practice
158/// this rules out `NearestNeighbor` (which has no gradient) and biases toward
159/// differentiable kernels such as `Rbf(Gaussian)` or `CubicSpline`.
160pub fn auto_select(problem: &InterpolationProblem) -> MethodRecommendation {
161    let n = problem.n_points;
162    let d = problem.dim;
163    let smooth = problem.smoothness_estimate.unwrap_or(0.5);
164    let mem_limit = problem.available_memory_mb;
165    // Effective smoothness: if derivatives are required treat data as at least
166    // moderately smooth so we steer away from non-differentiable methods.
167    let effective_smooth = if problem.require_derivatives {
168        smooth.max(0.6)
169    } else {
170        smooth
171    };
172
173    // ── Memory pressure: prefer O(n) random features if kernel matrix won't fit.
174    let kernel_mem_mb = dense_memory_mb(n);
175    let rf_mem_mb = rf_memory_mb(n, d, 500);
176    let memory_constrained = mem_limit.is_some_and(|mb| kernel_mem_mb > mb as f64);
177
178    // ── Rule 1: very large + high-dimensional → Random Features.
179    if n > 50_000 && d > 2 {
180        return MethodRecommendation {
181            method: InterpolationMethod::RandomFeaturesRbf,
182            reason: format!(
183                "{n} points in {d}-D: kernel matrix would be {kernel_mem_mb:.0} MB; \
184                 Random Fourier Features (D=500) scale to O(nD) ≈ {rf_mem_mb:.1} MB \
185                 and O(nD) fit time."
186            ),
187            estimated_memory_mb: rf_mem_mb,
188            estimated_time_ms: Some(n as f64 * 500.0 * 1e-5),
189        };
190    }
191
192    // ── Rule 2: memory constrained.
193    if memory_constrained {
194        return MethodRecommendation {
195            method: InterpolationMethod::RandomFeaturesRbf,
196            reason: format!(
197                "Memory budget {mb} MB is below estimated kernel matrix \
198                 {kernel_mem_mb:.0} MB; switching to Random Fourier Features.",
199                mb = mem_limit.unwrap_or(0)
200            ),
201            estimated_memory_mb: rf_mem_mb,
202            estimated_time_ms: None,
203        };
204    }
205
206    // ── Rule 3: 1-D data → CubicSpline (fastest, C² smooth).
207    if d == 1 {
208        // CubicSpline doesn't support derivatives unless explicitly augmented,
209        // but it does provide exact first-derivatives via the spline coefficients.
210        let mem_mb = (n as f64 * 4.0 * 8.0) / (1024.0 * 1024.0); // 4 coeffs per interval
211        return MethodRecommendation {
212            method: InterpolationMethod::CubicSpline,
213            reason: format!(
214                "1-D problem with {n} points: CubicSpline provides C² smoothness, \
215                 O(n) memory ({mem_mb:.3} MB), and O(log n) evaluation."
216            ),
217            estimated_memory_mb: mem_mb,
218            estimated_time_ms: Some(0.001), // very fast
219        };
220    }
221
222    // ── Rule 4: 2-D small dataset → ThinPlateSpline (natural for 2-D scattered).
223    // ThinPlateSpline is C¹ and supports gradient evaluation, so it is
224    // compatible with require_derivatives=true.
225    if d == 2 && n < 1_000 {
226        let mem_mb = dense_memory_mb(n);
227        return MethodRecommendation {
228            method: InterpolationMethod::ThinPlateSpline,
229            reason: format!(
230                "2-D scattered data with {n} points: ThinPlateSpline minimises bending \
231                 energy, giving the smoothest C¹ interpolant; matrix {mem_mb:.2} MB."
232            ),
233            estimated_memory_mb: mem_mb,
234            estimated_time_ms: Some(n as f64 * 1e-3),
235        };
236    }
237
238    // ── Rule 5: large 1-D / 2-D / 3-D → Kriging (uncertainty quantification).
239    if n > 10_000 && d <= 3 {
240        return MethodRecommendation {
241            method: InterpolationMethod::Kriging,
242            reason: format!(
243                "{n} points in {d}-D: Kriging with Nyström approximation offers \
244                 probabilistic predictions and scales to ~10⁴ points per batch."
245            ),
246            estimated_memory_mb: dense_memory_mb(n.min(2000)), // Nyström subset
247            estimated_time_ms: Some(n as f64 * 5e-4),
248        };
249    }
250
251    // ── Rule 6: low-to-medium D, smooth data → Gaussian RBF.
252    // `effective_smooth` is raised to ≥ 0.6 when require_derivatives is true,
253    // so this branch is also reached when the user needs gradient access.
254    if d <= 4 && effective_smooth > 0.5 {
255        let mem_mb = dense_memory_mb(n);
256        return MethodRecommendation {
257            method: InterpolationMethod::Rbf(RbfKernelHint::Gaussian),
258            reason: format!(
259                "{d}-D smooth data ({n} points, smoothness={effective_smooth:.2}): \
260                 Gaussian RBF reproduces analytic-like smoothness and supports \
261                 gradient evaluation; {mem_mb:.2} MB."
262            ),
263            estimated_memory_mb: mem_mb,
264            estimated_time_ms: Some(n as f64 * 1e-3),
265        };
266    }
267
268    // ── Rule 7: moderate D, rough data → Multiquadric RBF.
269    if d <= 4 {
270        let mem_mb = dense_memory_mb(n);
271        return MethodRecommendation {
272            method: InterpolationMethod::Rbf(RbfKernelHint::Multiquadric),
273            reason: format!(
274                "{d}-D data ({n} points): Multiquadric RBF is robust for rough \
275                 or irregularly sampled data; {mem_mb:.2} MB."
276            ),
277            estimated_memory_mb: mem_mb,
278            estimated_time_ms: Some(n as f64 * 1e-3),
279        };
280    }
281
282    // ── Rule 8: high-D (> 4) → Kriging or RandomFeatures based on size.
283    if d > 4 {
284        if n > 5_000 {
285            return MethodRecommendation {
286                method: InterpolationMethod::RandomFeaturesRbf,
287                reason: format!(
288                    "{d}-D data ({n} points): high dimensionality with large n; \
289                     Random Fourier Features mitigate curse of dimensionality."
290                ),
291                estimated_memory_mb: rf_mem_mb,
292                estimated_time_ms: Some(n as f64 * 500.0 * 1e-5),
293            };
294        } else {
295            return MethodRecommendation {
296                method: InterpolationMethod::Kriging,
297                reason: format!(
298                    "{d}-D data ({n} points): Kriging adapts kernel length-scales \
299                     per dimension, handling anisotropic high-D spaces well."
300                ),
301                estimated_memory_mb: dense_memory_mb(n),
302                estimated_time_ms: Some(n as f64 * 2e-3),
303            };
304        }
305    }
306
307    // ── Default fallback.
308    MethodRecommendation {
309        method: InterpolationMethod::Rbf(RbfKernelHint::Multiquadric),
310        reason: format!(
311            "Default choice for {d}-D data ({n} points): Multiquadric RBF \
312             works well for general scattered data."
313        ),
314        estimated_memory_mb: dense_memory_mb(n),
315        estimated_time_ms: None,
316    }
317}
318
319/// Validate and auto-select, returning an error for clearly invalid problems.
320///
321/// Invalid: `n_points == 0`, `dim == 0`.
322pub fn auto_select_validated(
323    problem: &InterpolationProblem,
324) -> InterpolateResult<MethodRecommendation> {
325    use crate::error::InterpolateError;
326    if problem.n_points == 0 {
327        return Err(InterpolateError::InvalidInput {
328            message: "n_points must be > 0".to_string(),
329        });
330    }
331    if problem.dim == 0 {
332        return Err(InterpolateError::InvalidInput {
333            message: "dim must be > 0".to_string(),
334        });
335    }
336    Ok(auto_select(problem))
337}
338
339// ─────────────────────────────────────────────────────────────────────────────
340// Tests
341// ─────────────────────────────────────────────────────────────────────────────
342
343#[cfg(test)]
344mod tests {
345    use super::*;
346
347    #[test]
348    fn test_auto_select_1d_cubic() {
349        let problem = InterpolationProblem {
350            n_points: 50,
351            dim: 1,
352            ..Default::default()
353        };
354        let rec = auto_select(&problem);
355        assert_eq!(
356            rec.method,
357            InterpolationMethod::CubicSpline,
358            "1-D should recommend CubicSpline"
359        );
360        assert!(!rec.reason.is_empty());
361    }
362
363    #[test]
364    fn test_auto_select_2d_small_thin_plate() {
365        let problem = InterpolationProblem {
366            n_points: 200,
367            dim: 2,
368            smoothness_estimate: Some(0.7),
369            ..Default::default()
370        };
371        let rec = auto_select(&problem);
372        assert_eq!(
373            rec.method,
374            InterpolationMethod::ThinPlateSpline,
375            "2-D with < 1000 points should recommend ThinPlateSpline"
376        );
377    }
378
379    #[test]
380    fn test_auto_select_high_dim_large_rf() {
381        let problem = InterpolationProblem {
382            n_points: 100_000,
383            dim: 10,
384            ..Default::default()
385        };
386        let rec = auto_select(&problem);
387        assert_eq!(
388            rec.method,
389            InterpolationMethod::RandomFeaturesRbf,
390            "Large high-D should recommend RandomFeaturesRbf"
391        );
392        assert!(rec.estimated_memory_mb > 0.0);
393    }
394
395    #[test]
396    fn test_auto_select_memory_constrained() {
397        let problem = InterpolationProblem {
398            n_points: 10_000,
399            dim: 3,
400            available_memory_mb: Some(10), // very tight for n×n matrix
401            ..Default::default()
402        };
403        let rec = auto_select(&problem);
404        // A 10 000×10 000 matrix is ~800 MB, so RandomFeatures should be chosen.
405        assert_eq!(rec.method, InterpolationMethod::RandomFeaturesRbf);
406    }
407
408    #[test]
409    fn test_auto_select_no_panic_any_input() {
410        let cases = vec![
411            InterpolationProblem {
412                n_points: 1,
413                dim: 1,
414                ..Default::default()
415            },
416            InterpolationProblem {
417                n_points: 1_000_000,
418                dim: 20,
419                ..Default::default()
420            },
421            InterpolationProblem {
422                n_points: 500,
423                dim: 3,
424                smoothness_estimate: Some(0.1),
425                require_derivatives: true,
426                available_memory_mb: Some(2048),
427                time_budget_ms: Some(100),
428            },
429            InterpolationProblem {
430                n_points: 50,
431                dim: 4,
432                smoothness_estimate: Some(0.9),
433                ..Default::default()
434            },
435        ];
436        for (i, p) in cases.iter().enumerate() {
437            let rec = auto_select(p);
438            assert!(
439                !rec.reason.is_empty(),
440                "case {i}: reason should not be empty"
441            );
442            assert!(
443                rec.estimated_memory_mb >= 0.0,
444                "case {i}: memory estimate should be non-negative"
445            );
446        }
447    }
448
449    #[test]
450    fn test_auto_select_validated_zero_points() {
451        let problem = InterpolationProblem {
452            n_points: 0,
453            dim: 1,
454            ..Default::default()
455        };
456        let result = auto_select_validated(&problem);
457        assert!(result.is_err(), "0 points should return error");
458    }
459
460    #[test]
461    fn test_auto_select_validated_zero_dim() {
462        let problem = InterpolationProblem {
463            n_points: 10,
464            dim: 0,
465            ..Default::default()
466        };
467        let result = auto_select_validated(&problem);
468        assert!(result.is_err(), "dim=0 should return error");
469    }
470
471    #[test]
472    fn test_auto_select_large_moderate_d_kriging() {
473        let problem = InterpolationProblem {
474            n_points: 15_000,
475            dim: 3,
476            ..Default::default()
477        };
478        let rec = auto_select(&problem);
479        assert_eq!(rec.method, InterpolationMethod::Kriging);
480    }
481
482    #[test]
483    fn test_auto_select_smooth_4d_gaussian_rbf() {
484        let problem = InterpolationProblem {
485            n_points: 300,
486            dim: 4,
487            smoothness_estimate: Some(0.9),
488            ..Default::default()
489        };
490        let rec = auto_select(&problem);
491        assert_eq!(
492            rec.method,
493            InterpolationMethod::Rbf(RbfKernelHint::Gaussian),
494        );
495    }
496
497    #[test]
498    fn test_auto_select_high_dim_small_kriging() {
499        let problem = InterpolationProblem {
500            n_points: 200,
501            dim: 8,
502            ..Default::default()
503        };
504        let rec = auto_select(&problem);
505        assert_eq!(rec.method, InterpolationMethod::Kriging);
506    }
507
508    /// When `require_derivatives=true` on a rough (smoothness=0.1) 3-D problem
509    /// that would otherwise stay at `Multiquadric`, `effective_smooth` is lifted
510    /// to 0.6 → above the 0.5 threshold → `Rbf(Gaussian)` is returned instead.
511    #[test]
512    fn test_require_derivatives_biases_toward_differentiable() {
513        // Without require_derivatives and low smoothness, Multiquadric is chosen.
514        let without = InterpolationProblem {
515            n_points: 300,
516            dim: 3,
517            smoothness_estimate: Some(0.1),
518            require_derivatives: false,
519            ..Default::default()
520        };
521        let rec_without = auto_select(&without);
522        assert_eq!(
523            rec_without.method,
524            InterpolationMethod::Rbf(RbfKernelHint::Multiquadric),
525            "Without require_derivatives, rough 3-D data should use Multiquadric"
526        );
527
528        // With require_derivatives=true the effective smoothness is raised,
529        // pushing the selector to Gaussian RBF (differentiable kernel).
530        let with_deriv = InterpolationProblem {
531            n_points: 300,
532            dim: 3,
533            smoothness_estimate: Some(0.1),
534            require_derivatives: true,
535            ..Default::default()
536        };
537        let rec_with = auto_select(&with_deriv);
538        assert_eq!(
539            rec_with.method,
540            InterpolationMethod::Rbf(RbfKernelHint::Gaussian),
541            "With require_derivatives, should bias toward differentiable Gaussian RBF"
542        );
543        // The reason string should mention gradient / smoothness / derivative.
544        assert!(
545            rec_with.reason.contains("smooth") || rec_with.reason.contains("gradient"),
546            "Reason should mention smoothness/gradient: {}",
547            rec_with.reason
548        );
549    }
550
551    #[test]
552    fn test_estimated_memory_positive() {
553        let problem = InterpolationProblem {
554            n_points: 500,
555            dim: 2,
556            ..Default::default()
557        };
558        let rec = auto_select(&problem);
559        assert!(
560            rec.estimated_memory_mb > 0.0,
561            "Memory estimate must be positive, got {}",
562            rec.estimated_memory_mb
563        );
564    }
565}