sidereon_core/sp3/
interp.rs

1//! SP3 arbitrary-epoch position/clock interpolation.
2//!
3//! Two channels with different recipes, each validated against its correct
4//! external reference (the two-bars doctrine: capability vs the deployed
5//! reference, not a bit-exact port of a convenient primitive):
6//!
7//! # Position channel: sliding-window Lagrange/Neville (RTKLIB recipe)
8//!
9//! The satellite position is interpolated with a sliding-window high-degree
10//! Lagrange (Neville) polynomial matching RTKLIB `preceph.c` pephpos/interppol:
11//! the contiguous run of nodes bracketing the query, the RTKLIB window of up to
12//! 11 nodes (degree 10) centred on the query, an `OMEGA_E_DOT` per-node
13//! earth-rotation correction into the query-epoch frame, then Neville evaluation
14//! per axis. This is the IGS-standard orbit interpolation. It replaced a global
15//! not-a-knot cubic spline, which is only degree 3 over the whole day and erred
16//! ~200 m at the day boundary and across coverage gaps (a query deep inside a
17//! coverage gap is now rejected, never interpolated across). Validated against
18//! the RTKLIB reference (`interp_tests`) and end-to-end against ZIM2 PPP truth.
19//!
20//! # Clock channel: not-a-knot cubic spline (gnssanalysis recipe)
21//!
22//! The clock is locally smooth, so it keeps the not-a-knot cubic spline matching
23//! `scipy.interpolate.CubicSpline(x, y)` with gnssanalysis defaults
24//! (`bc_type="not-a-knot"`, `extrapolate=true`), evaluated at the query, with
25//! clock-event (`E`) arc splitting. BLAS-free (the not-a-knot solve dispatches
26//! to LAPACK `dgtsv`), a legitimate 0-ULP target against scipy.
27//!
28//! # Node substrate (load-bearing for 0-ULP)
29//!
30//! Nodes are **integer seconds since J2000** (2000-01-01 12:00:00 in the file's
31//! own time scale), exactly as gnssanalysis builds them in `datetime2j2000`
32//! (`gn_datetime.py:286-288`): epochs floored to whole seconds, differenced
33//! against the J2000 origin, kept as `i64`, then promoted to `f64` on entry to
34//! the spline. This module reconstructs the same `i64`-seconds axis from the
35//! parser's [`Instant`] epochs (NOT fractional JD, NOT nanoseconds), so the
36//! spline coefficients are bit-identical.
37//!
38//! J2000 = JD 2451545.0. Seconds-since-J2000 for a split JD `(whole, frac)` is
39//! computed in a cancellation-safe way and floored to whole seconds.
40//!
41//! # Units
42//!
43//! The spline is fit in the SP3-native units the reference carries -
44//! **kilometers** for position, **microseconds** for clock - and the evaluated
45//! result is converted to the public API boundary (**meters**, **seconds**) by a
46//! **single final multiply** (`* 1000.0`, `* 1e-6`). The conversion happens
47//! AFTER evaluation, never before the fit; this operation order is pinned.
48//!
49//! # Clock interpolation near gaps / discontinuities
50//!
51//! gnssanalysis defines none, so the policy is authored in the canonical recipe
52//! and matched here:
53//!
54//! - Clock uses the **same** `CubicSpline` construction over the nodes that have
55//!   a clock estimate (the bad-clock sentinel yields no clock node).
56//! - Position and clock node sets are independent.
57//! - Position is never split (orbits are continuous through clock resets).
58//! - Clock interpolation does **not** cross a clock-event (`E`) epoch: the arc
59//!   is split at each `E`-flagged epoch and the clock spline is fit on the
60//!   contiguous sub-arc containing the query epoch.
61
62use crate::astro::time::model::{Instant, InstantRepr};
63
64use crate::astro::time::civil::j2000_seconds_from_split;
65use crate::constants::{KM_TO_M, OMEGA_E_DOT_RAD_S, US_TO_S};
66use crate::frame::ItrfPositionM;
67use crate::id::GnssSatelliteId;
68use crate::sp3::{Sp3, Sp3State};
69use crate::validate;
70use crate::{Error, Result};
71
72/// Per-satellite precise node series in native fit units.
73#[derive(Debug, Clone, PartialEq)]
74pub(super) struct PreciseSatSeries {
75    /// Floored J2000-second node axis.
76    pub(super) x: Vec<f64>,
77    /// X position nodes in SP3-native kilometers.
78    pub(super) kx: Vec<f64>,
79    /// Y position nodes in SP3-native kilometers.
80    pub(super) ky: Vec<f64>,
81    /// Z position nodes in SP3-native kilometers.
82    pub(super) kz: Vec<f64>,
83    /// Clock nodes as `(x_seconds, clock_us, clock_event)`.
84    pub(super) clk: Vec<(f64, f64, bool)>,
85}
86
87impl PreciseSatSeries {
88    pub(super) fn new() -> Self {
89        Self {
90            x: Vec::new(),
91            kx: Vec::new(),
92            ky: Vec::new(),
93            kz: Vec::new(),
94            clk: Vec::new(),
95        }
96    }
97}
98
99impl Sp3 {
100    /// The product's parsed epochs as seconds since J2000, in the file's own time
101    /// scale, ascending.
102    ///
103    /// This is the exact query axis [`Sp3::position_at_j2000_seconds`] interpolates
104    /// against (each epoch converted by the same [`instant_to_j2000_seconds`] used
105    /// for the spline nodes, NOT floored), so a caller can read the grid here, form
106    /// query times on it, and feed them straight back without a Julian-date
107    /// round-trip. An epoch whose representation cannot be mapped to J2000 seconds
108    /// is skipped (SP3 epochs are always Julian-date, so on real data this returns
109    /// one value per epoch).
110    pub fn epochs_j2000_seconds(&self) -> Vec<f64> {
111        self.epochs
112            .iter()
113            .filter_map(instant_to_j2000_seconds)
114            .collect()
115    }
116
117    /// Interpolate the state of `sat` at an arbitrary `epoch`.
118    ///
119    /// Reproduces the pinned `scipy.interpolate.CubicSpline` recipe (see module
120    /// docs) bit-for-bit: a per-axis not-a-knot cubic spline over the
121    /// J2000-integer-second node axis, evaluated at `epoch`, with the unit
122    /// conversion as a single final multiply.
123    ///
124    /// - `position` is always returned (interpolated from all position nodes of
125    ///   `sat`), in meters, ITRF/IGS ECEF.
126    /// - `clock_s` is `Some` when `sat` has at least two clock nodes in the
127    ///   clock sub-arc containing `epoch` (after clock-event splitting); `None`
128    ///   otherwise.
129    /// - `velocity` / `clock_rate_s_s` are `None` (this API interpolates the
130    ///   position/clock product; velocity products are a separate concern).
131    /// - `flags` are defaulted (an interpolated state is synthetic, not a record).
132    ///
133    /// Errors:
134    /// - [`Error::UnknownSatellite`] if `sat` has no position nodes.
135    /// - [`Error::EpochOutOfRange`] if fewer than two position nodes exist (a
136    ///   spline needs at least two points) or the epoch is not representable.
137    /// - [`Error::InvalidInput`] if `epoch` is tagged with a different time
138    ///   scale than the SP3 product.
139    pub fn position(&self, sat: GnssSatelliteId, epoch: Instant) -> Result<Sp3State> {
140        if epoch.scale != self.header.time_scale {
141            return Err(Error::InvalidInput(format!(
142                "SP3 query time scale {} does not match product time scale {}",
143                epoch.scale.abbrev(),
144                self.header.time_scale.abbrev()
145            )));
146        }
147        let query = instant_to_j2000_seconds(&epoch).ok_or(Error::EpochOutOfRange)?;
148        self.position_at_j2000_seconds(sat, query)
149    }
150
151    /// Interpolate the state of `sat` at an arbitrary J2000-second epoch
152    /// supplied directly as an `f64`.
153    ///
154    /// Identical to [`Sp3::position`] except the query is the seconds-since-J2000
155    /// value as already computed by the caller, rather than derived from an
156    /// [`Instant`]. The transmit-time iteration of the SPP residual carries the
157    /// epoch as a J2000-second `f64` (`t_tx = t_rx - rho/c`) and must feed that
158    /// exact value to the spline, with no Julian-date round-trip in the loop, so
159    /// the interpolated position/clock match the reference recipe bit-for-bit.
160    ///
161    /// Errors:
162    /// - [`Error::InvalidInput`] if `query` is NaN or infinite.
163    pub fn position_at_j2000_seconds(&self, sat: GnssSatelliteId, query: f64) -> Result<Sp3State> {
164        let series = gather_sp3_precise_series(self, sat);
165        interpolate_precise_state(
166            sat,
167            &series.x,
168            &series.kx,
169            &series.ky,
170            &series.kz,
171            &series.clk,
172            query,
173        )
174    }
175}
176
177/// Gather one satellite's SP3-native node series in ascending epoch order.
178pub(super) fn gather_sp3_precise_series(source: &Sp3, sat: GnssSatelliteId) -> PreciseSatSeries {
179    let mut series = PreciseSatSeries::new();
180
181    for (idx, ep) in source.epochs.iter().enumerate() {
182        // Node axis: floored to whole seconds to match gnssanalysis
183        // datetime2j2000. The query is never floored.
184        let xi = match instant_to_j2000_seconds(ep) {
185            Some(v) => v.floor(),
186            None => continue,
187        };
188        // Use the parser's native km/us node values. Reconstructing km from the
189        // public meters can drift by 1 ULP; unit conversion stays after eval.
190        let Some(raw) = source.interp_raw[idx].get(&sat) else {
191            continue;
192        };
193        series.x.push(xi);
194        series.kx.push(raw.km[0]);
195        series.ky.push(raw.km[1]);
196        series.kz.push(raw.km[2]);
197
198        if let Some(clk_us) = raw.clock_us {
199            series.clk.push((xi, clk_us, raw.clock_event));
200        }
201    }
202
203    series
204}
205
206/// Interpolate a satellite state from already-gathered native-unit nodes.
207///
208/// This is the shared interpolation substrate. Both the SP3-parsed source
209/// ([`Sp3::position_at_j2000_seconds`]) and the sample-backed source
210/// ([`crate::sp3::PreciseEphemerisSamples`]) gather the same node vectors
211/// (ascending floored J2000 seconds `x`; native km `kx/ky/kz`; native
212/// `(x, clock_us, clock_event)` clock nodes) and drive this one function, so a
213/// source built from samples produces byte-identical states to the SP3 source
214/// those samples serialize to.
215///
216/// Inputs are the file-native units the reference recipes consume (km for
217/// position, microseconds for clock); the single final unit multiply to meters /
218/// seconds happens inside the position/clock evaluators, exactly as the SP3 path
219/// requires for 0-ULP parity.
220pub(super) fn interpolate_precise_state(
221    sat: GnssSatelliteId,
222    pos_x: &[f64],
223    pos_kx: &[f64],
224    pos_ky: &[f64],
225    pos_kz: &[f64],
226    clk_nodes: &[(f64, f64, bool)],
227    query: f64,
228) -> Result<Sp3State> {
229    let query = validate::finite(query, "query_j2000_s").map_err(map_query_input)?;
230
231    if pos_x.is_empty() {
232        return Err(Error::UnknownSatellite(sat));
233    }
234    if pos_x.len() < 2 {
235        // A cubic spline needs >= 2 points; a single node cannot define one.
236        return Err(Error::EpochOutOfRange);
237    }
238    validate_strictly_increasing_nodes(pos_x)?;
239
240    // Refuse grossly out-of-coverage queries instead of silently returning a
241    // diverging extrapolation. The underlying cubic spline mirrors scipy
242    // CubicSpline(extrapolate=True): a query well past the node span runs off
243    // to nonsense (megametres and worse). We allow up to one node spacing of
244    // edge extrapolation (the end cubic is still physically reasonable that
245    // close to the data) and reject anything beyond. In-coverage interpolation
246    // is bit-for-bit unchanged, so 0-ULP parity is preserved. Nodes are in
247    // ascending epoch order.
248    // Reject a query that lands deep inside an interior coverage gap rather
249    // than interpolating across it. Nominal spacing is the smallest
250    // consecutive node gap; a bracketing interval far larger than that is a
251    // gap. One nominal spacing of interpolation past either edge node is
252    // allowed (the near-gap edge stays usable); beyond that the query is in
253    // the gap and is refused.
254    let nominal = nominal_positive_spacing(pos_x).ok_or(Error::EpochOutOfRange)?;
255    let first = pos_x[0];
256    let last = pos_x[pos_x.len() - 1];
257    if query < first - nominal || query > last + nominal {
258        return Err(Error::EpochOutOfRange);
259    }
260
261    let gap_thresh = 1.5 * nominal;
262    let mut bi = 0usize;
263    while bi + 1 < pos_x.len() && pos_x[bi + 1] <= query {
264        bi += 1;
265    }
266    if bi + 1 < pos_x.len() {
267        let (lo, hi) = (pos_x[bi], pos_x[bi + 1]);
268        if hi - lo > gap_thresh && query > lo + nominal && query < hi - nominal {
269            return Err(Error::EpochOutOfRange);
270        }
271    }
272
273    let (x_m, y_m, z_m) = interpolate_position_neville(pos_x, pos_kx, pos_ky, pos_kz, query);
274
275    let clock_s = interpolate_clock(clk_nodes, query);
276
277    Ok(Sp3State {
278        position: ItrfPositionM::new(x_m, y_m, z_m).expect("valid ITRF position"),
279        clock_s,
280        velocity: None,
281        clock_rate_s_s: None,
282        flags: crate::sp3::Sp3Flags::default(),
283    })
284}
285
286fn map_query_input(error: validate::FieldError) -> Error {
287    Error::InvalidInput(format!("{} {}", error.field(), error.reason()))
288}
289
290fn nominal_positive_spacing(x: &[f64]) -> Option<f64> {
291    let nominal = x
292        .windows(2)
293        .map(|w| w[1] - w[0])
294        .filter(|&d| d > 0.0)
295        .fold(f64::INFINITY, f64::min);
296    if nominal.is_finite() {
297        Some(nominal)
298    } else {
299        None
300    }
301}
302
303fn validate_strictly_increasing_nodes(x: &[f64]) -> Result<()> {
304    for window in x.windows(2) {
305        if window[1] <= window[0] {
306            return Err(Error::InvalidInput(
307                "SP3 interpolation epochs must be strictly increasing".to_string(),
308            ));
309        }
310    }
311    Ok(())
312}
313
314/// Interpolate the clock channel with the clock-event-split policy.
315///
316/// Splits the clock node arc at each clock-event (`E`) epoch and fits the
317/// not-a-knot spline on the contiguous sub-arc containing `query`. Returns
318/// `None` if that sub-arc has fewer than two nodes.
319fn interpolate_clock(clk_nodes: &[(f64, f64, bool)], query: f64) -> Option<f64> {
320    if clk_nodes.len() < 2 {
321        return None;
322    }
323
324    // Partition into contiguous sub-arcs split at clock-event epochs. A
325    // clock-event epoch marks a discontinuity *at* that epoch, so it ends the
326    // sub-arc before it and starts a new one (the flagged node belongs to the
327    // new sub-arc, since the reset takes effect there).
328    let mut sub_start = 0usize;
329    let mut chosen: Option<(usize, usize)> = None; // [start, end) into clk_nodes
330    for i in 0..clk_nodes.len() {
331        let is_break = clk_nodes[i].2 && i > sub_start;
332        if is_break {
333            // Sub-arc [sub_start, i) ends here.
334            if range_contains_query(clk_nodes, sub_start, i, query) {
335                chosen = Some((sub_start, i));
336            }
337            sub_start = i;
338        }
339    }
340    // Trailing sub-arc [sub_start, len).
341    if chosen.is_none() && range_contains_query(clk_nodes, sub_start, clk_nodes.len(), query) {
342        chosen = Some((sub_start, clk_nodes.len()));
343    }
344    // If the query is outside every sub-arc span (extrapolation), use the
345    // sub-arc nearest the query so the default extrapolate=True behavior holds
346    // within the contiguous piece on that side.
347    let (start, end) = match chosen {
348        Some(r) => r,
349        None => nearest_subarc(clk_nodes, query)?,
350    };
351
352    if end - start < 2 {
353        return None;
354    }
355    let x: Vec<f64> = clk_nodes[start..end].iter().map(|n| n.0).collect();
356    let y: Vec<f64> = clk_nodes[start..end].iter().map(|n| n.1).collect();
357    Some(eval_cubic_spline(&x, &y, query) * US_TO_S)
358}
359
360/// Whether `query` lies within the closed node-span of sub-arc `[start, end)`.
361fn range_contains_query(nodes: &[(f64, f64, bool)], start: usize, end: usize, query: f64) -> bool {
362    if end <= start {
363        return false;
364    }
365    let lo = nodes[start].0;
366    let hi = nodes[end - 1].0;
367    query >= lo && query <= hi
368}
369
370/// Find the sub-arc (split at clock-event epochs) whose node-span is nearest to
371/// `query` for extrapolation. Returns `[start, end)` or `None` if empty.
372#[allow(clippy::needless_range_loop)]
373fn nearest_subarc(nodes: &[(f64, f64, bool)], query: f64) -> Option<(usize, usize)> {
374    if nodes.is_empty() {
375        return None;
376    }
377    // Rebuild sub-arc boundaries (same rule as interpolate_clock).
378    let mut bounds: Vec<(usize, usize)> = Vec::new();
379    let mut sub_start = 0usize;
380    for i in 0..nodes.len() {
381        if nodes[i].2 && i > sub_start {
382            bounds.push((sub_start, i));
383            sub_start = i;
384        }
385    }
386    bounds.push((sub_start, nodes.len()));
387
388    // Pick the sub-arc minimizing distance from query to its [lo, hi] span.
389    bounds
390        .into_iter()
391        .filter(|&(s, e)| e - s >= 2)
392        .min_by(|&(s1, e1), &(s2, e2)| {
393            let d1 = span_distance(nodes, s1, e1, query);
394            let d2 = span_distance(nodes, s2, e2, query);
395            d1.partial_cmp(&d2).unwrap_or(core::cmp::Ordering::Equal)
396        })
397}
398
399fn span_distance(nodes: &[(f64, f64, bool)], start: usize, end: usize, query: f64) -> f64 {
400    let lo = nodes[start].0;
401    let hi = nodes[end - 1].0;
402    if query < lo {
403        lo - query
404    } else if query > hi {
405        query - hi
406    } else {
407        0.0
408    }
409}
410
411/// Convert a parser [`Instant`] to seconds since J2000, as `f64`, **exact**
412/// (not floored).
413///
414/// The split-JD difference is taken whole-part first to avoid cancellation.
415/// This returns the precise instant; flooring belongs to the *node axis* only:
416///
417/// - **Node epochs** are floored to whole seconds at the call site to mirror
418///   gnssanalysis `datetime2j2000` (`datetime64[s]` truncation), so the spline's
419///   x-axis is bit-identical to the reference. SP3 epochs are integer-second in
420///   practice, so this floor is a no-op on real data but kept for faithfulness.
421/// - The **query** is evaluated at this exact value, never floored: flooring a
422///   sub-second query epoch would discard up to ~1 s, a kilometre-scale position
423///   error at orbital speed (this was a real bug - the node and query
424///   conversions must NOT share the flooring).
425pub(super) fn instant_to_j2000_seconds(instant: &Instant) -> Option<f64> {
426    match instant.repr {
427        InstantRepr::JulianDate(split) => {
428            // (jd - J2000_JD) days -> seconds, whole/fraction kept separate to
429            // avoid cancellation (canonical split-to-J2000-seconds reduction).
430            Some(j2000_seconds_from_split(split.jd_whole, split.fraction))
431        }
432        InstantRepr::Nanos(ns) => {
433            // Integer ns since the scale epoch - but the parser stores SP3
434            // epochs as JulianDate, so this path is not exercised by SP3.
435            // J2000 is JD 2451545.0; without a fixed ns-origin convention here
436            // we cannot map ns->J2000-seconds unambiguously, so decline.
437            let _ = ns;
438            None
439        }
440    }
441}
442
443/// Number of nodes in the sliding interpolation window (RTKLIB `NMAX`=10 ->
444/// degree-10 polynomial, 11 nodes).
445const NEVILLE_POINTS: usize = 11;
446
447/// Sliding-window Lagrange (Neville) satellite-POSITION interpolation, matching
448/// RTKLIB `preceph.c` pephpos/interppol. Replaces the global not-a-knot cubic
449/// spline, which is degree-3 over the whole day and errs ~200 m at the day
450/// boundary and across coverage gaps; SP3 15-minute orbit nodes need local
451/// ~degree-10 interpolation for sub-cm accuracy. Validated against the external
452/// RTKLIB reference and the ZIM2 PPP truth (two-bars doctrine: this channel is a
453/// capability gated on the deployed reference, not a bit-exact port of a scipy
454/// primitive). The CLOCK channel keeps its cubic spline (locally smooth, matched
455/// to the 30 s clock product at the cm level).
456///
457/// Recipe: restrict to the contiguous run of nodes bracketing `query` (never
458/// interpolate across a coverage gap), take the RTKLIB window of up to
459/// `NEVILLE_POINTS` nodes centred on the query (shifted inward at run edges),
460/// rotate each node's ECEF position about +z by `OMEGA_E_DOT * (t_node - query)`
461/// into the query-epoch earth-fixed frame, then Neville-interpolate each axis at
462/// the query. Inputs are ascending J2000 seconds (`x`) and km (`kx/ky/kz`).
463fn interpolate_position_neville(
464    x: &[f64],
465    kx: &[f64],
466    ky: &[f64],
467    kz: &[f64],
468    query: f64,
469) -> (f64, f64, f64) {
470    let n = x.len();
471
472    // Nominal node spacing = smallest positive consecutive gap (robust to one
473    // large coverage gap); the gap threshold marks a non-contiguous jump.
474    let nominal = nominal_positive_spacing(x).unwrap_or(1.0);
475    let gap_thresh = 1.5 * nominal;
476
477    // Last node at or before the query (clamped into range).
478    let mut pivot = 0usize;
479    while pivot + 1 < n && x[pivot + 1] <= query {
480        pivot += 1;
481    }
482    // The gap policy admits one nominal spacing of extrapolation from either
483    // arc. Near the next arc, anchor the window there instead of extrapolating
484    // the previous arc across the whole gap.
485    if pivot + 1 < n && (x[pivot + 1] - x[pivot]) > gap_thresh && query >= x[pivot + 1] - nominal {
486        pivot += 1;
487    }
488
489    // Contiguous run [run_lo, run_hi) around the pivot: extend while the
490    // neighbour gap stays within the threshold (do not cross a coverage gap).
491    let mut run_lo = pivot;
492    while run_lo > 0 && (x[run_lo] - x[run_lo - 1]) <= gap_thresh {
493        run_lo -= 1;
494    }
495    let mut run_hi = pivot + 1;
496    while run_hi < n && (x[run_hi] - x[run_hi - 1]) <= gap_thresh {
497        run_hi += 1;
498    }
499    let run_len = run_hi - run_lo;
500
501    // RTKLIB window: centre on the pivot, width = min(NEVILLE_POINTS, run_len),
502    // clamped to the run.
503    let win = NEVILLE_POINTS.min(run_len);
504    let half = (NEVILLE_POINTS / 2) as isize;
505    let mut start = pivot as isize - half;
506    if start < run_lo as isize {
507        start = run_lo as isize;
508    }
509    if start + win as isize > run_hi as isize {
510        start = run_hi as isize - win as isize;
511    }
512    let start = start as usize;
513
514    // Windowed nodes on the (t = node - query) abscissa, earth-rotation-corrected
515    // into the query-epoch frame; query is t = 0.
516    let mut t = [0.0f64; NEVILLE_POINTS];
517    let mut px = [0.0f64; NEVILLE_POINTS];
518    let mut py = [0.0f64; NEVILLE_POINTS];
519    let mut pz = [0.0f64; NEVILLE_POINTS];
520    for j in 0..win {
521        let k = start + j;
522        let tj = x[k] - query;
523        let (s, c) = (OMEGA_E_DOT_RAD_S * tj).sin_cos();
524        t[j] = tj;
525        px[j] = c * kx[k] - s * ky[k];
526        py[j] = s * kx[k] + c * ky[k];
527        pz[j] = kz[k];
528    }
529
530    let x_km = neville(&t[..win], &px[..win]);
531    let y_km = neville(&t[..win], &py[..win]);
532    let z_km = neville(&t[..win], &pz[..win]);
533    (x_km * KM_TO_M, y_km * KM_TO_M, z_km * KM_TO_M)
534}
535
536/// Neville's algorithm evaluated at 0, reproducing RTKLIB `rtkcmn.c` interppol
537/// (the abscissa `x` carries node-minus-query offsets, so the query is 0).
538fn neville(x: &[f64], y: &[f64]) -> f64 {
539    let n = y.len();
540    let mut c: [f64; NEVILLE_POINTS] = [0.0; NEVILLE_POINTS];
541    c[..n].copy_from_slice(&y[..n]);
542    for j in 1..n {
543        for i in 0..(n - j) {
544            c[i] = (x[i + j] * c[i] - x[i] * c[i + 1]) / (x[i + j] - x[i]);
545        }
546    }
547    c[0]
548}
549
550/// Evaluate a not-a-knot cubic spline at `query`, reproducing
551/// `scipy.interpolate.CubicSpline(x, y)(query)` bit-for-bit.
552///
553/// `x` must be strictly increasing with `x.len() == y.len() >= 2`.
554fn eval_cubic_spline(x: &[f64], y: &[f64], query: f64) -> f64 {
555    let n = x.len();
556    debug_assert_eq!(n, y.len());
557    debug_assert!(n >= 2);
558
559    let dydx = solve_not_a_knot_slopes(x, y);
560    let (c0, c1, c2, c3) = hermite_segment_coeffs(x, y, &dydx);
561    evaluate_ppoly(x, &c0, &c1, &c2, &c3, query)
562}
563
564/// Solve the not-a-knot tridiagonal system for the derivative values `s[i]` at
565/// each node, exactly as `scipy.interpolate.CubicSpline.__init__` assembles it
566/// (`_cubic.py`, scipy 1.17.1) and `scipy.linalg.solve_banded((1,1), ...)`
567/// solves it via LAPACK `dgtsv`.
568///
569/// Banded layout mirrors scipy's `A` of shape `(3, n)`:
570/// - `A[1, :]` diagonal `d`
571/// - `A[0, 1:]` upper diagonal `du` (i.e. `du[j]` couples row `j` to `j+1`)
572/// - `A[2, :-1]` lower diagonal `dl` (i.e. `dl[j]` couples row `j+1` to `j`)
573fn solve_not_a_knot_slopes(x: &[f64], y: &[f64]) -> Vec<f64> {
574    let n = x.len();
575
576    // dx[i] = x[i+1]-x[i]; slope[i] = (y[i+1]-y[i])/dx[i]. (scipy: np.diff / dxr)
577    let mut dx = vec![0.0; n - 1];
578    let mut slope = vec![0.0; n - 1];
579    for i in 0..n - 1 {
580        dx[i] = x[i + 1] - x[i];
581        slope[i] = (y[i + 1] - y[i]) / dx[i];
582    }
583
584    // Special case n == 2: not-a-knot is replaced by clamped to the secant
585    // slope on both ends (scipy `_cubic.py`: bc -> (1, slope[0])), giving the
586    // straight-line Hermite - both derivatives equal slope[0].
587    if n == 2 {
588        return vec![slope[0], slope[0]];
589    }
590
591    // Special case n == 3 with not-a-knot on both ends: scipy builds a 3x3 dense
592    // system (a parabola through the points) and solves with LAPACK `gesv`.
593    if n == 3 {
594        return solve_n3_parabola(&dx, &slope, y);
595    }
596
597    // General n >= 4: tridiagonal banded system.
598    // Diagonal/off-diagonals as scipy fills them.
599    // Interior rows i=1..n-2:
600    //   d[i]   = 2*(dx[i-1]+dx[i])
601    //   du[i]  (A[0, i+1]) = dx[i-1]
602    //   dl[i-1](A[2, i-1]) = dx[i]
603    //   b[i]   = 3*(dx[i]*slope[i-1] + dx[i-1]*slope[i])
604    let mut d = vec![0.0; n];
605    // upper diagonal du[j] for j in 0..n-1 couples row j -> j+1 (A[0, j+1]).
606    let mut du = vec![0.0; n - 1];
607    // lower diagonal dl[j] for j in 0..n-1 couples row j+1 -> j (A[2, j]).
608    let mut dl = vec![0.0; n - 1];
609    let mut b = vec![0.0; n];
610
611    for i in 1..n - 1 {
612        d[i] = 2.0 * (dx[i - 1] + dx[i]); // A[1, i]
613        du[i] = dx[i - 1]; // A[0, i+1] -> our du index i (couples i->i+1)
614        dl[i - 1] = dx[i]; // A[2, i-1] -> our dl index i-1 (couples i->i-1)
615        b[i] = 3.0 * (dx[i] * slope[i - 1] + dx[i - 1] * slope[i]);
616    }
617
618    // not-a-knot start (scipy):
619    //   A[1,0]=dx[1]; A[0,1]=x[2]-x[0]; d=x[2]-x[0];
620    //   b[0]=((dx[0]+2*d)*dx[1]*slope[0] + dx[0]^2*slope[1]) / d
621    {
622        let dd = x[2] - x[0];
623        d[0] = dx[1]; // A[1,0]
624        du[0] = dd; // A[0,1] couples row 0->1
625        b[0] = ((dx[0] + 2.0 * dd) * dx[1] * slope[0] + dx[0] * dx[0] * slope[1]) / dd;
626    }
627    // not-a-knot end (scipy):
628    //   A[1,-1]=dx[-2]; A[-1,-2]=x[-1]-x[-3]; d=x[-1]-x[-3];
629    //   b[-1]=(dx[-1]^2*slope[-2] + (2*d+dx[-1])*dx[-2]*slope[-1]) / d
630    {
631        let dd = x[n - 1] - x[n - 3];
632        d[n - 1] = dx[n - 2]; // A[1,-1]
633        dl[n - 2] = dd; // A[-1,-2] couples row n-1 -> n-2
634        b[n - 1] = (dx[n - 2] * dx[n - 2] * slope[n - 3]
635            + (2.0 * dd + dx[n - 2]) * dx[n - 3] * slope[n - 2])
636            / dd;
637    }
638
639    dgtsv(dl, d, du, b)
640}
641
642/// n == 3 not-a-knot special case: scipy solves a dense 3x3 `A s = b` via
643/// LAPACK `gesv` (partial-pivot LU). Reproduced with the same partial-pivoting
644/// Gaussian elimination operation order.
645fn solve_n3_parabola(dx: &[f64], slope: &[f64], _y: &[f64]) -> Vec<f64> {
646    // A (scipy `_cubic.py` n==3 branch):
647    //   A[0,0]=1 A[0,1]=1
648    //   A[1,0]=dx[1] A[1,1]=2*(dx[0]+dx[1]) A[1,2]=dx[0]
649    //   A[2,1]=1 A[2,2]=1
650    // b:
651    //   b[0]=2*slope[0]
652    //   b[1]=3*(dx[0]*slope[1] + dx[1]*slope[0])
653    //   b[2]=2*slope[1]
654    let mut a = [
655        [1.0, 1.0, 0.0],
656        [dx[1], 2.0 * (dx[0] + dx[1]), dx[0]],
657        [0.0, 1.0, 1.0],
658    ];
659    let mut b = [
660        2.0 * slope[0],
661        3.0 * (dx[0] * slope[1] + dx[1] * slope[0]),
662        2.0 * slope[1],
663    ];
664    gesv3(&mut a, &mut b);
665    b.to_vec()
666}
667
668/// LAPACK `dgtsv`-equivalent tridiagonal solve (scipy `solve_banded((1,1),...)`
669/// dispatch). Partial pivoting, scalar arithmetic, NRHS=1.
670///
671/// `dl[i]` = sub-diagonal coupling row `i+1`->`i`; `d[i]` = diagonal; `du[i]` =
672/// super-diagonal coupling row `i`->`i+1`. Reproduces the Reference-LAPACK
673/// `dgtsv.f` operation order, **with one pinned-environment subtlety**: the
674/// certified parity target's LAPACK is **Apple Accelerate** (macOS arm64; scipy
675/// 1.17.1, `detection method: extraframeworks`), whose `dgtsv` contracts each
676/// `acc - fact*x` update into a **fused multiply-add**. So every `y - a*x`
677/// elimination/back-substitution update here uses [`f64::mul_add`]
678/// (`(-a).mul_add(x, y)`), NOT a separate multiply then subtract - the
679/// per-function FMA-contraction discipline the parity contract requires.
680/// Verified 0-ULP against `scipy.linalg.lapack.dgtsv` on this target; on a
681/// non-FMA LAPACK build the last bits differ (the portable-mode reality, where
682/// 0 ULP is not promised across platforms).
683fn dgtsv(mut dl: Vec<f64>, mut d: Vec<f64>, mut du: Vec<f64>, mut b: Vec<f64>) -> Vec<f64> {
684    let n = d.len();
685
686    if n == 1 {
687        b[0] /= d[0];
688        return b;
689    }
690
691    // Forward elimination, rows i = 0 .. n-3 (Fortran 1..N-2). On a pivot, the
692    // fill-in second super-diagonal is stored back into `dl[i]` (NOT a separate
693    // du2 array) - exactly as Reference-LAPACK dgtsv.f does; the back
694    // substitution reads it as the B(I+2) coefficient.
695    for i in 0..n.saturating_sub(2) {
696        if d[i].abs() >= dl[i].abs() {
697            // No pivot.
698            let fact = dl[i] / d[i];
699            d[i + 1] = (-fact).mul_add(du[i], d[i + 1]);
700            b[i + 1] = (-fact).mul_add(b[i], b[i + 1]);
701            dl[i] = 0.0;
702        } else {
703            // Pivot (swap rows i and i+1). Note `dl[i] = du[i+1]` happens
704            // BEFORE `du[i+1] = -fact*dl[i]`, so the latter uses the new dl[i]
705            // (= old du[i+1]).
706            let fact = d[i] / dl[i];
707            d[i] = dl[i];
708            let temp = d[i + 1];
709            d[i + 1] = (-fact).mul_add(temp, du[i]);
710            dl[i] = du[i + 1];
711            du[i + 1] = -fact * dl[i];
712            du[i] = temp;
713            let tb = b[i];
714            b[i] = b[i + 1];
715            b[i + 1] = (-fact).mul_add(b[i + 1], tb);
716        }
717    }
718
719    // Row i = n-2 (Fortran I = N-1) - no du2 fill-in.
720    if n > 1 {
721        let i = n - 2;
722        if d[i].abs() >= dl[i].abs() {
723            let fact = dl[i] / d[i];
724            d[i + 1] = (-fact).mul_add(du[i], d[i + 1]);
725            b[i + 1] = (-fact).mul_add(b[i], b[i + 1]);
726        } else {
727            let fact = d[i] / dl[i];
728            d[i] = dl[i];
729            let temp = d[i + 1];
730            d[i + 1] = (-fact).mul_add(temp, du[i]);
731            du[i] = temp;
732            let tb = b[i];
733            b[i] = b[i + 1];
734            b[i + 1] = (-fact).mul_add(b[i + 1], tb);
735        }
736    }
737
738    // Back substitution (dgtsv), FMA-contracted as above.
739    b[n - 1] /= d[n - 1];
740    if n > 1 {
741        b[n - 2] = (-du[n - 2]).mul_add(b[n - 1], b[n - 2]) / d[n - 2];
742    }
743    for i in (0..n.saturating_sub(2)).rev() {
744        // (b[i] - du[i]*b[i+1] - dl[i]*b[i+2]) / d[i], each subtraction fused.
745        let t = (-du[i]).mul_add(b[i + 1], b[i]);
746        b[i] = (-dl[i]).mul_add(b[i + 2], t) / d[i];
747    }
748
749    b
750}
751
752/// 3x3 dense solve with partial-pivot LU, matching LAPACK `gesv` (`dgesv`) for
753/// the n==3 not-a-knot parabola case. As with [`dgtsv`], the certified parity
754/// target is Apple Accelerate, whose `dgesv` contracts the `acc - factor*x`
755/// elimination and substitution updates into fused multiply-adds; this routine
756/// uses [`f64::mul_add`] to match it bit-for-bit.
757#[allow(clippy::needless_range_loop)]
758fn gesv3(a: &mut [[f64; 3]; 3], b: &mut [f64; 3]) {
759    let mut perm = [0usize, 1, 2];
760    // LU with partial pivoting (column-major in LAPACK; we keep row-major but
761    // pivot by largest |a[col]| in the column, matching the same pivot choice).
762    for k in 0..3 {
763        // Find pivot row in column k at or below k.
764        let mut piv = k;
765        let mut best = a[k][k].abs();
766        for r in (k + 1)..3 {
767            let v = a[r][k].abs();
768            if v > best {
769                best = v;
770                piv = r;
771            }
772        }
773        if piv != k {
774            a.swap(k, piv);
775            perm.swap(k, piv);
776        }
777        for r in (k + 1)..3 {
778            let factor = a[r][k] / a[k][k];
779            a[r][k] = factor;
780            for c in (k + 1)..3 {
781                a[r][c] = (-factor).mul_add(a[k][c], a[r][c]);
782            }
783        }
784    }
785    // Apply row permutation to b.
786    let pb = [b[perm[0]], b[perm[1]], b[perm[2]]];
787    // Forward solve Ly = Pb (unit lower).
788    let mut yv = [0.0; 3];
789    for r in 0..3 {
790        let mut s = pb[r];
791        for c in 0..r {
792            s = (-a[r][c]).mul_add(yv[c], s);
793        }
794        yv[r] = s;
795    }
796    // Back solve Ux = y.
797    for r in (0..3).rev() {
798        let mut s = yv[r];
799        for c in (r + 1)..3 {
800            s = (-a[r][c]).mul_add(b[c], s);
801        }
802        b[r] = s / a[r][r];
803    }
804}
805
806/// Build the per-segment PPoly coefficients exactly as
807/// `scipy.interpolate.CubicHermiteSpline.__init__` (scipy 1.17.1):
808///
809/// ```text
810/// dxr   = x[i+1]-x[i]
811/// slope = (y[i+1]-y[i])/dxr
812/// t     = (dydx[i] + dydx[i+1] - 2*slope)/dxr
813/// c0 = t/dxr
814/// c1 = (slope - dydx[i])/dxr - t
815/// c2 = dydx[i]
816/// c3 = y[i]
817/// ```
818///
819/// for segment `i` between `x[i]` and `x[i+1]`, with local variable
820/// `s = xval - x[i]`.
821fn hermite_segment_coeffs(
822    x: &[f64],
823    y: &[f64],
824    dydx: &[f64],
825) -> (Vec<f64>, Vec<f64>, Vec<f64>, Vec<f64>) {
826    let n = x.len();
827    let mut c0 = vec![0.0; n - 1];
828    let mut c1 = vec![0.0; n - 1];
829    let mut c2 = vec![0.0; n - 1];
830    let mut c3 = vec![0.0; n - 1];
831    for i in 0..n - 1 {
832        let dxr = x[i + 1] - x[i];
833        let slope = (y[i + 1] - y[i]) / dxr;
834        let t = (dydx[i] + dydx[i + 1] - 2.0 * slope) / dxr;
835        c0[i] = t / dxr;
836        c1[i] = (slope - dydx[i]) / dxr - t;
837        c2[i] = dydx[i];
838        c3[i] = y[i];
839    }
840    (c0, c1, c2, c3)
841}
842
843/// Evaluate the PPoly at `query`, reproducing scipy `_ppoly.evaluate` /
844/// `find_interval_ascending` (extrapolate=True) and `evaluate_poly1` (dx=0).
845///
846/// Interval selection: the largest `i` with `x[i] <= query`, clamped to
847/// `[0, n-2]`; `query == x[n-1]` maps to interval `n-2` (right-closed); out of
848/// bounds extrapolates from interval 0 (below) or `n-2` (above).
849///
850/// Evaluation order (`evaluate_poly1`, dx=0): with `s = query - x[i]` and
851/// `z` accumulating powers via repeated `z *= s`,
852/// `res = c3 + c2*s + c1*s^2 + c0*s^3` summed low-power-first.
853fn evaluate_ppoly(x: &[f64], c0: &[f64], c1: &[f64], c2: &[f64], c3: &[f64], query: f64) -> f64 {
854    let n = x.len();
855    let last = n - 2; // last interval index
856
857    // find_interval_ascending with extrapolate=True.
858    let interval = if query.is_nan() {
859        // scipy returns -1 -> NaN out; propagate NaN.
860        return f64::NAN;
861    } else if query < x[0] {
862        0
863    } else if query > x[n - 1] {
864        last
865    } else {
866        // x[0] <= query <= x[n-1]: binary search for i with x[i] <= query < x[i+1];
867        // query == x[n-1] -> n-2.
868        if query == x[n - 1] {
869            last
870        } else {
871            let mut lo = 0usize;
872            let mut hi = n - 1;
873            while hi - lo > 1 {
874                let mid = (lo + hi) / 2;
875                if x[mid] <= query {
876                    lo = mid;
877                } else {
878                    hi = mid;
879                }
880            }
881            lo
882        }
883    };
884
885    // evaluate_poly1 (dx=0): res = sum_{kp} c[K-kp-1] * z, z = s^kp built by *=.
886    let s = query - x[interval];
887    let mut res = 0.0;
888    let mut z = 1.0;
889    // kp = 0 -> coefficient c3 (lowest power), kp=1 -> c2, kp=2 -> c1, kp=3 -> c0.
890    res += c3[interval] * z;
891    z *= s;
892    res += c2[interval] * z;
893    z *= s;
894    res += c1[interval] * z;
895    z *= s;
896    res += c0[interval] * z;
897    res
898}
899
900/// Test-only re-export of the core spline evaluator so the parity test can
901/// drive it directly against the scipy golden fixture.
902#[cfg(all(test, sidereon_repo_tests))]
903pub(super) fn eval_cubic_spline_for_test(x: &[f64], y: &[f64], query: f64) -> f64 {
904    eval_cubic_spline(x, y, query)
905}
906
907#[cfg(all(test, sidereon_repo_tests))]
908mod interp_tests;
sidereon_core/sp3/interp.rs

sidereon_core/sp3/
interp.rs