rubato 3.0.0

Asynchronous resampling library intended for audio data
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
use crate::asynchro::InnerResampler;
use crate::interpolation::*;
#[cfg(target_arch = "x86_64")]
use crate::sinc_interpolator::sinc_interpolator_avx::AvxInterpolator;
#[cfg(target_arch = "aarch64")]
use crate::sinc_interpolator::sinc_interpolator_neon::NeonInterpolator;
#[cfg(target_arch = "x86_64")]
use crate::sinc_interpolator::sinc_interpolator_sse::SseInterpolator;
use crate::sinc_interpolator::{
    AlignedBuf, AnyInterpolator, AvxSample, NeonSample, ScalarInterpolator, SincInterpolator,
    SseSample,
};
use crate::windows::WindowFunction;
use crate::Sample;
use audioadapter::AdapterMut;

macro_rules! t {
    // Shorter form of T::coerce(value)
    ($expression:expr) => {
        T::coerce($expression)
    };
}

/// A struct holding the parameters for sinc interpolation.
#[derive(Debug, Clone, Copy)]
pub struct SincInterpolationParameters {
    /// Length of the windowed sinc interpolation filter.
    /// Higher values can allow a higher cut-off frequency leading to less high frequency roll-off
    /// at the expense of higher CPU usage. 256 is a good starting point.
    /// The value will be rounded up to the nearest multiple of 8.
    pub sinc_len: usize,
    /// Relative cutoff frequency of the sinc interpolation filter
    /// (relative to the lowest one of fs_in/2 or fs_out/2). Start at 0.95, and increase if needed.
    pub f_cutoff: f32,
    /// The number of intermediate points to use for interpolation.
    /// Higher values use more memory for storing the sinc filters.
    /// Only the points actually needed are calculated during processing
    /// so a larger number does not directly lead to higher CPU usage.
    /// A lower value helps in keeping the sincs in the CPU cache. Start at 128.
    pub oversampling_factor: usize,
    /// Interpolation type, see `SincInterpolationType`
    pub interpolation: SincInterpolationType,
    /// Window function to use.
    pub window: WindowFunction,
}

/// Interpolation methods that can be selected. For asynchronous interpolation where the
/// ratio between input and output sample rates can be any number, it's not possible to
/// pre-calculate all the needed interpolation filters.
/// Instead they have to be computed as needed, which becomes impractical since the
/// sincs are very expensive to generate in terms of CPU time.
/// It's more efficient to combine the sinc filters with some other interpolation technique.
/// Then, sinc filters are used to provide a fixed number of interpolated points between input samples,
/// and then, the new value is calculated by interpolation between those points.
///
/// # Performance scaling with channel count
///
/// The figures below are approximations; actual performance varies with CPU architecture,
/// SIMD width, cache behaviour, and compiler optimisation.
///
/// Each interpolation mode evaluates N sinc filters per output sample (one per nearest point).
/// When processing multiple channels that share the same playback position, those N filters are
/// blended once into a single combined filter, and every channel then performs only one dot
/// product. The build cost of the combined filter is roughly equivalent to N dot products, so
/// the break-even point depends on the mode:
///
/// | Mode      | Nearest points (N) | Combined sinc used when |
/// |-----------|-------------------|-------------------------|
/// | Cubic     | 4                 | 2 or more channels      |
/// | Quadratic | 3                 | 3 or more channels      |
/// | Linear    | 2                 | 3 or more channels      |
/// | Nearest   | 1                 | never (no benefit)      |
///
/// The table below shows the total cost in dot-product equivalents per output sample.
/// Building the combined filter requires one scaled-add pass over the sinc buffer for each
/// nearest point (comparable in cost to one dot product), then each channel runs one dot product:
///
/// | Mode      | 1 ch | 2 ch | 3 ch | 4 ch | M ch (above threshold) |
/// |-----------|------|------|------|------|------------------------|
/// | Cubic     | 4    | 6    | 7    | 8    | 4 + M (M ≥ 2)          |
/// | Quadratic | 3    | 6    | 6    | 7    | 3 + M (M ≥ 3)          |
/// | Linear    | 2    | 4    | 5    | 6    | 2 + M (M ≥ 3)          |
/// | Nearest   | 1    | 2    | 3    | 4    | M (always)             |
///
/// Once above the combined-sinc threshold, every mode costs exactly one additional
/// dot product per extra channel. Practical consequences:
///
/// - **Cubic at M channels ≈ Linear at M+2 channels** (both cost 4+M dp for M ≥ 3).
///   For example, 4-channel Cubic and 6-channel Linear both cost 8 dp per output sample.
/// - **Cubic at M channels ≈ Quadratic at M+1 channels** (both cost 4+M dp for M ≥ 3).
/// - **At 2 channels**, Cubic and Quadratic are equal (both 6 dp), so there is no reason
///   to choose Quadratic over Cubic for stereo content.
/// - **Upgrading from Linear to Cubic** above the threshold costs the same as adding
///   two more channels at the current mode — a fixed overhead, not a multiplier.
#[derive(Debug, Clone, Copy)]
pub enum SincInterpolationType {
    /// Cubic interpolation using the four nearest intermediate sinc points.
    /// A cubic polynomial is fitted to these points to compute each output sample.
    ///
    /// This gives the best quality-to-oversampling-factor trade-off: fewer intermediate
    /// points are needed compared to linear interpolation for the same artefact level.
    /// The cost relative to linear is roughly 2× at 1 channel, but at 2+ channels the
    /// combined-sinc optimisation brings it close to the cost of a single dot product
    /// per channel.
    Cubic,
    /// Quadratic interpolation using the three nearest intermediate sinc points.
    /// A quadratic polynomial is fitted to these points to compute each output sample.
    ///
    /// Quality and CPU cost lie between `Linear` and `Cubic`.
    /// The combined-sinc optimisation applies at 3 or more channels.
    Quadratic,
    /// Linear interpolation between the two nearest intermediate sinc points.
    ///
    /// This is the fastest mode for 1–2 channels but requires a larger
    /// `oversampling_factor` than cubic to achieve the same artefact floor.
    /// The combined-sinc optimisation applies at 3 or more channels.
    Linear,
    /// No interpolation: the nearest intermediate sinc point is used directly.
    ///
    /// This is useful when the ratio between input and output sample rates can be
    /// expressed exactly by a fraction with a small denominator, so that one of the
    /// pre-computed sinc points always falls exactly on the desired position.
    /// For example, upsampling 48 kHz to 96 kHz with `oversampling_factor = 2` is
    /// equivalent to synchronous resampling with no added artefacts.
    /// For 44.1 kHz to 48 kHz, `oversampling_factor = 160` achieves the same
    /// (since 48000 = 160/147 × 44100).
    ///
    /// Each output sample requires exactly one sinc dot product per channel regardless
    /// of channel count; there is no combined-sinc optimisation for this mode.
    Nearest,
}

pub fn make_interpolator<T>(
    sinc_len: usize,
    resample_ratio: f64,
    f_cutoff: f32,
    oversampling_factor: usize,
    window: WindowFunction,
) -> AnyInterpolator<T>
where
    T: AvxSample + SseSample + NeonSample + Sample,
{
    let sinc_len = 8 * (((sinc_len as f32) / 8.0).ceil() as usize);
    let f_cutoff = if resample_ratio >= 1.0 {
        f_cutoff
    } else {
        f_cutoff * resample_ratio as f32
    };

    #[cfg(target_arch = "x86_64")]
    if let Ok(interpolator) =
        AvxInterpolator::<T>::new(sinc_len, oversampling_factor, f_cutoff, window)
    {
        return AnyInterpolator::Avx(interpolator);
    }

    #[cfg(target_arch = "x86_64")]
    if let Ok(interpolator) =
        SseInterpolator::<T>::new(sinc_len, oversampling_factor, f_cutoff, window)
    {
        return AnyInterpolator::Sse(interpolator);
    }

    #[cfg(target_arch = "aarch64")]
    if let Ok(interpolator) =
        NeonInterpolator::<T>::new(sinc_len, oversampling_factor, f_cutoff, window)
    {
        return AnyInterpolator::Neon(interpolator);
    }

    AnyInterpolator::Scalar(ScalarInterpolator::<T>::new(
        sinc_len,
        oversampling_factor,
        f_cutoff,
        window,
    ))
}

/// Perform cubic polynomial interpolation to get value at x.
/// Input points are assumed to be at x = -1, 0, 1, 2.
#[inline]
pub fn interp_cubic<T>(x: T, yvals: &[T; 4]) -> T
where
    T: Sample,
{
    let a0 = yvals[1];
    let a1 = -t!(1.0 / 3.0) * yvals[0] - t!(0.5) * yvals[1] + yvals[2] - t!(1.0 / 6.0) * yvals[3];
    let a2 = t!(0.5) * (yvals[0] + yvals[2]) - yvals[1];
    let a3 = t!(0.5) * (yvals[1] - yvals[2]) + t!(1.0 / 6.0) * (yvals[3] - yvals[0]);
    let x2 = x * x;
    let x3 = x2 * x;
    a0 + a1 * x + a2 * x2 + a3 * x3
}

/// Compute the four blending weights for cubic interpolation at fractional position x.
/// These are the per-point coefficients such that interp_cubic(x, pts) == dot(weights, pts).
/// Input points are assumed to be at x = -1, 0, 1, 2.
#[inline]
pub fn interp_cubic_weights<T>(x: T) -> [T; 4]
where
    T: Sample,
{
    let x2 = x * x;
    let x3 = x2 * x;
    [
        t!(-1.0 / 3.0) * x + t!(0.5) * x2 - t!(1.0 / 6.0) * x3,
        t!(1.0) - t!(0.5) * x - x2 + t!(0.5) * x3,
        x + t!(0.5) * x2 - t!(0.5) * x3,
        -t!(1.0 / 6.0) * x + t!(1.0 / 6.0) * x3,
    ]
}

/// Perform quadratic polynomial interpolation to get value at x.
/// Input points are assumed to be at x = 0, 1, 2.
#[inline]
pub fn interp_quad<T>(x: T, yvals: &[T; 3]) -> T
where
    T: Sample,
{
    let a2 = yvals[0] - t!(2.0) * yvals[1] + yvals[2];
    let a1 = -t!(3.0) * yvals[0] + t!(4.0) * yvals[1] - yvals[2];
    let a0 = t!(2.0) * yvals[0];
    let x2 = x * x;
    t!(0.5) * (a0 + a1 * x + a2 * x2)
}

/// Compute the three blending weights for quadratic interpolation at fractional position x.
/// These are the per-point coefficients such that interp_quad(x, pts) == dot(weights, pts).
/// Input points are assumed to be at x = 0, 1, 2.
#[inline]
pub fn interp_quad_weights<T>(x: T) -> [T; 3]
where
    T: Sample,
{
    let x2 = x * x;
    [
        t!(0.5) * (t!(2.0) - t!(3.0) * x + x2),
        t!(0.5) * (t!(4.0) * x - t!(2.0) * x2),
        t!(0.5) * (x2 - x),
    ]
}

/// Perform linear interpolation between two points at x=0 and x=1.
#[inline]
pub fn interp_lin<T>(x: T, yvals: &[T; 2]) -> T
where
    T: Sample,
{
    yvals[0] + x * (yvals[1] - yvals[0])
}

/// Compute the two blending weights for linear interpolation at fractional position x.
/// These are the per-point coefficients such that interp_lin(x, pts) == dot(weights, pts).
#[inline]
pub fn interp_lin_weights<T>(x: T) -> [T; 2]
where
    T: Sample,
{
    [t!(1.0) - x, x]
}

pub(crate) struct InnerSinc<T>
where
    T: AvxSample + SseSample + NeonSample + Sample,
{
    pub interpolator: AnyInterpolator<T>,
    pub interpolation: SincInterpolationType,
    // Pre-allocated buffer for the combined sinc (used by the >2 channel path).
    // Length is interpolator.nbr_points() + 1. 32-byte aligned so 256-bit AVX
    // loads on it never cross a cache line boundary.
    combined: AlignedBuf<T>,
}

impl<T> InnerSinc<T>
where
    T: AvxSample + SseSample + NeonSample + Sample,
{
    pub(crate) fn new(
        interpolator: AnyInterpolator<T>,
        interpolation: SincInterpolationType,
    ) -> Self {
        let len = interpolator.nbr_points() + 1;
        Self {
            interpolator,
            interpolation,
            combined: AlignedBuf::zeroed(len),
        }
    }

    /// Combined-sinc path: blend `nearest` sincs by `weights` into `self.combined`, then
    /// run one dot product per active channel. Used when channel count makes the build cost
    /// worthwhile (see `use_combined` thresholds in `process`).
    #[inline(always)]
    #[allow(clippy::too_many_arguments)]
    fn process_combined_frame(
        &mut self,
        nearest: &[(isize, isize)],
        weights: &[T],
        interpolator_len: usize,
        channel_mask: &[bool],
        wave_in: &[Vec<T>],
        wave_out: &mut dyn AdapterMut<'_, T>,
        frame: usize,
        output_offset: usize,
    ) {
        for n in nearest {
            self.interpolator.prefetch_sinc(n.1 as usize);
        }
        let min_idx = self
            .interpolator
            .make_combined_sinc(nearest, weights, &mut self.combined);
        let base = (min_idx + 2 * interpolator_len as isize) as usize;
        for (chan, active) in channel_mask.iter().enumerate() {
            if *active {
                let buf = &wave_in[chan];
                let result = self.interpolator.get_sinc_dot_product(
                    buf,
                    base,
                    &self.combined[..interpolator_len],
                ) + self.combined[interpolator_len] * buf[base + interpolator_len];
                wave_out.write_sample(chan, frame + output_offset, &result);
            }
        }
    }

    /// Direct path: compute N separate sinc dot products per active channel and combine them
    /// with `interp`. Used for low channel counts where building a combined sinc costs more
    /// than the N separate dot products would.
    #[inline(always)]
    #[allow(clippy::too_many_arguments)]
    fn process_direct_frame(
        &self,
        nearest: &[(isize, isize)],
        interpolator_len: usize,
        channel_mask: &[bool],
        wave_in: &[Vec<T>],
        wave_out: &mut dyn AdapterMut<'_, T>,
        frame: usize,
        output_offset: usize,
        interp: impl Fn(&[T]) -> T,
    ) {
        let n = nearest.len();
        let mut points = [T::zero(); 4];
        for (chan, active) in channel_mask.iter().enumerate() {
            if *active {
                let buf = &wave_in[chan];
                for (ni, p) in nearest.iter().zip(points[..n].iter_mut()) {
                    *p = self.interpolator.get_sinc_interpolated(
                        buf,
                        (ni.0 + 2 * interpolator_len as isize) as usize,
                        ni.1 as usize,
                    );
                }
                wave_out.write_sample(chan, frame + output_offset, &interp(&points[..n]));
            }
        }
    }
}

impl<T> InnerResampler<T> for InnerSinc<T>
where
    T: AvxSample + SseSample + NeonSample + Sample,
{
    fn process(
        &mut self,
        idx: f64,
        nbr_frames: usize,
        channel_mask: &[bool],
        t_ratio: f64,
        t_ratio_increment: f64,
        wave_in: &[Vec<T>],
        wave_out: &mut dyn AdapterMut<'_, T>,
        output_offset: usize,
    ) -> f64 {
        let mut t_ratio = t_ratio;
        let mut idx = idx;
        let interpolator_len = self.interpolator.nbr_points();
        let oversampling_factor = self.interpolator.nbr_sincs();
        let active_count = channel_mask.iter().filter(|&&a| a).count();
        match self.interpolation {
            SincInterpolationType::Cubic => {
                // 4 nearest points: combined sinc pays off at 2+ channels.
                let use_combined = active_count >= 2;
                let mut nearest = [(0isize, 0isize); 4];
                for frame in 0..nbr_frames {
                    t_ratio += t_ratio_increment;
                    idx += t_ratio;
                    get_nearest_times_4(idx, oversampling_factor as isize, &mut nearest);
                    let frac_offset = t!(idx * oversampling_factor as f64
                        - (idx * oversampling_factor as f64).floor());
                    if use_combined {
                        let weights = interp_cubic_weights(frac_offset);
                        self.process_combined_frame(
                            &nearest,
                            &weights,
                            interpolator_len,
                            channel_mask,
                            wave_in,
                            wave_out,
                            frame,
                            output_offset,
                        );
                    } else {
                        self.process_direct_frame(
                            &nearest,
                            interpolator_len,
                            channel_mask,
                            wave_in,
                            wave_out,
                            frame,
                            output_offset,
                            |pts| interp_cubic(frac_offset, &[pts[0], pts[1], pts[2], pts[3]]),
                        );
                    }
                }
            }
            SincInterpolationType::Quadratic => {
                // 3 nearest points: combined sinc pays off at 3+ channels.
                let use_combined = active_count > 2;
                let mut nearest = [(0isize, 0isize); 3];
                for frame in 0..nbr_frames {
                    t_ratio += t_ratio_increment;
                    idx += t_ratio;
                    get_nearest_times_3(idx, oversampling_factor as isize, &mut nearest);
                    let frac_offset = t!(idx * oversampling_factor as f64
                        - (idx * oversampling_factor as f64).floor());
                    if use_combined {
                        let weights = interp_quad_weights(frac_offset);
                        self.process_combined_frame(
                            &nearest,
                            &weights,
                            interpolator_len,
                            channel_mask,
                            wave_in,
                            wave_out,
                            frame,
                            output_offset,
                        );
                    } else {
                        self.process_direct_frame(
                            &nearest,
                            interpolator_len,
                            channel_mask,
                            wave_in,
                            wave_out,
                            frame,
                            output_offset,
                            |pts| interp_quad(frac_offset, &[pts[0], pts[1], pts[2]]),
                        );
                    }
                }
            }
            SincInterpolationType::Linear => {
                // 2 nearest points: combined sinc pays off at 3+ channels.
                let use_combined = active_count > 2;
                let mut nearest = [(0isize, 0isize); 2];
                for frame in 0..nbr_frames {
                    t_ratio += t_ratio_increment;
                    idx += t_ratio;
                    get_nearest_times_2(idx, oversampling_factor as isize, &mut nearest);
                    let frac_offset = t!(idx * oversampling_factor as f64
                        - (idx * oversampling_factor as f64).floor());
                    if use_combined {
                        let weights = interp_lin_weights(frac_offset);
                        self.process_combined_frame(
                            &nearest,
                            &weights,
                            interpolator_len,
                            channel_mask,
                            wave_in,
                            wave_out,
                            frame,
                            output_offset,
                        );
                    } else {
                        self.process_direct_frame(
                            &nearest,
                            interpolator_len,
                            channel_mask,
                            wave_in,
                            wave_out,
                            frame,
                            output_offset,
                            |pts| interp_lin(frac_offset, &[pts[0], pts[1]]),
                        );
                    }
                }
            }
            SincInterpolationType::Nearest => {
                let oversampling_factor = self.interpolator.nbr_sincs();
                let mut point;
                let mut nearest;
                for frame in 0..nbr_frames {
                    t_ratio += t_ratio_increment;
                    idx += t_ratio;
                    nearest = get_nearest_time(idx, oversampling_factor as isize);
                    for (chan, active) in channel_mask.iter().enumerate() {
                        if *active {
                            let buf = &wave_in[chan];
                            point = self.interpolator.get_sinc_interpolated(
                                buf,
                                (nearest.0 + 2 * interpolator_len as isize) as usize,
                                nearest.1 as usize,
                            );
                            wave_out.write_sample(chan, frame + output_offset, &point);
                        }
                    }
                }
            }
        }
        idx
    }

    fn nbr_points(&self) -> usize {
        self.interpolator.nbr_points()
    }

    fn init_last_index(&self) -> f64 {
        -(self.interpolator.nbr_points() as f64 - 1.0)
    }
}

#[cfg(test)]
mod tests {
    use super::{
        interp_cubic, interp_cubic_weights, interp_lin, interp_lin_weights, interp_quad,
        interp_quad_weights,
    };

    #[test]
    fn int_cubic() {
        let yvals = [0.0f64, 2.0f64, 4.0f64, 6.0f64];
        let interp = interp_cubic(0.5f64, &yvals);
        assert_eq!(interp, 3.0f64);
    }

    #[test]
    fn int_lin_32() {
        let yvals = [1.0f32, 5.0f32];
        let interp = interp_lin(0.25f32, &yvals);
        assert_eq!(interp, 2.0f32);
    }

    #[test]
    fn int_cubic_32() {
        let yvals = [0.0f32, 2.0f32, 4.0f32, 6.0f32];
        let interp = interp_cubic(0.5f32, &yvals);
        assert_eq!(interp, 3.0f32);
    }

    /// Verify that interp_cubic_weights produces the same result as interp_cubic.
    #[test]
    fn cubic_weights_match_cubic() {
        let yvals = [1.3f64, -0.7f64, 2.1f64, 0.4f64];
        for x_int in 0..=10 {
            let x = x_int as f64 / 10.0;
            let direct = interp_cubic(x, &yvals);
            let w = interp_cubic_weights(x);
            let blended = w[0] * yvals[0] + w[1] * yvals[1] + w[2] * yvals[2] + w[3] * yvals[3];
            assert!((direct - blended).abs() < 1.0e-12, "mismatch at x={x}");
        }
    }

    /// Verify that interp_quad_weights produces the same result as interp_quad.
    #[test]
    fn quad_weights_match_quad() {
        let yvals = [1.3f64, -0.7f64, 2.1f64];
        for x_int in 0..=10 {
            let x = x_int as f64 / 10.0;
            let direct = interp_quad(x, &yvals);
            let w = interp_quad_weights(x);
            let blended = w[0] * yvals[0] + w[1] * yvals[1] + w[2] * yvals[2];
            assert!((direct - blended).abs() < 1.0e-12, "mismatch at x={x}");
        }
    }

    /// Verify that interp_lin_weights produces the same result as interp_lin.
    #[test]
    fn lin_weights_match_lin() {
        let yvals = [1.3f64, -0.7f64];
        for x_int in 0..=10 {
            let x = x_int as f64 / 10.0;
            let direct = interp_lin(x, &yvals);
            let w = interp_lin_weights(x);
            let blended = w[0] * yvals[0] + w[1] * yvals[1];
            assert!((direct - blended).abs() < 1.0e-12, "mismatch at x={x}");
        }
    }
}