use nalgebra::{DMatrix, DVector};
use ndarray::s;
use ndarray::{Array1, Array2};
use rayon::prelude::*;
use std::time::Instant;
use thiserror::Error;
#[derive(Clone, Debug)]
pub struct BSpline {
pub knots: Array1<f64>,
pub coeffs: Array1<f64>,
pub degree: usize,
}
impl BSpline {
pub fn evaluate(&self, x: f64) -> f64 {
deboor(x, &self.knots, &self.coeffs, self.degree)
}
pub fn evaluate_batch(&self, x: &[f64]) -> Vec<f64> {
deboor_batch(x, &self.knots, &self.coeffs, self.degree)
}
pub fn evaluate_batch_array(&self, x: &Array1<f64>) -> Array1<f64> {
deboor_batch_array1(x, &self.knots, &self.coeffs, self.degree)
}
}
pub fn find_span(n: usize, k: usize, x: f64, t: &Array1<f64>) -> usize {
if x <= t[k] {
return k;
}
if x >= t[n + 1] {
return n;
}
let mut low = k;
let mut high = n + 1;
let mut mid = (low + high) / 2;
while x < t[mid] || x >= t[mid + 1] {
if x < t[mid] {
high = mid;
} else {
low = mid;
}
mid = (low + high) / 2;
}
mid
}
pub fn basis_functions(span: usize, x: f64, k: usize, t: &Array1<f64>) -> Vec<f64> {
let mut left = vec![0.0; k + 1];
let mut right = vec![0.0; k + 1];
let mut N = vec![0.0; k + 1];
N[0] = 1.0;
for j in 1..=k {
left[j] = x - t[span + 1 - j];
right[j] = t[span + j] - x;
let mut saved = 0.0;
for r in 0..j {
let temp = N[r] / (right[r + 1] + left[j - r]);
N[r] = saved + right[r + 1] * temp;
saved = left[j - r] * temp;
}
N[j] = saved;
}
N
}
pub fn deboor(x: f64, t: &Array1<f64>, c: &Array1<f64>, k: usize) -> f64 {
let n = c.len() - 1;
let span = find_span(n, k, x, t);
let mut d = vec![0.0; k + 1];
for j in 0..=k {
d[j] = c[span - k + j];
}
for r in 1..=k {
for j in (r..=k).rev() {
let i = span - k + j;
let denom = t[i + k + 1 - r] - t[i];
if denom.abs() < 1e-14 {
continue;
}
let alpha = (x - t[i]) / denom;
d[j] = (1.0 - alpha) * d[j - 1] + alpha * d[j];
}
}
d[k]
}
#[inline]
fn deboor_with_span(x: f64, t: &Array1<f64>, c: &Array1<f64>, k: usize, span: usize) -> f64 {
let mut d = vec![0.0; k + 1];
for j in 0..=k {
d[j] = c[span - k + j];
}
for r in 1..=k {
for j in (r..=k).rev() {
let i = span - k + j;
let denom = t[i + k + 1 - r] - t[i];
if denom.abs() < 1e-14 {
continue;
}
let alpha = (x - t[i]) / denom;
d[j] = (1.0 - alpha) * d[j - 1] + alpha * d[j];
}
}
d[k]
}
pub fn deboor_batch(x: &[f64], t: &Array1<f64>, c: &Array1<f64>, k: usize) -> Vec<f64> {
let n = c.len() - 1;
x.par_iter()
.map(|&xi| {
let span = find_span(n, k, xi, t);
deboor_with_span(xi, t, c, k, span)
})
.collect()
}
pub fn deboor_batch_array1(
x: &Array1<f64>,
t: &Array1<f64>,
c: &Array1<f64>,
k: usize,
) -> Array1<f64> {
Array1::from(deboor_batch(x.as_slice().unwrap(), t, c, k))
}
pub fn design_matrix_dense(x: &Array1<f64>, knots: &Array1<f64>, degree: usize) -> Array2<f64> {
let m = x.len();
let n = knots.len() - degree - 1;
let mut A = Array2::<f64>::zeros((m, n));
for (i, &xi) in x.iter().enumerate() {
let span = find_span(n - 1, degree, xi, knots);
let basis = basis_functions(span, xi, degree, knots);
for j in 0..=degree {
A[[i, span - degree + j]] = basis[j];
}
}
A
}
pub fn design_matrix_dense_parallel(
x: &Array1<f64>,
knots: &Array1<f64>,
degree: usize,
) -> Array2<f64> {
let m = x.len();
let n = knots.len() - degree - 1;
let row_entries: Vec<Vec<(usize, f64)>> = (0..m)
.into_par_iter()
.map(|i| {
let xi = x[i];
let span = find_span(n - 1, degree, xi, knots);
let basis = basis_functions(span, xi, degree, knots);
(0..=degree)
.map(|j| (span - degree + j, basis[j]))
.collect::<Vec<_>>()
})
.collect();
let mut A = Array2::<f64>::zeros((m, n));
for (i, entries) in row_entries.into_iter().enumerate() {
for (col, val) in entries {
A[[i, col]] = val;
}
}
A
}
#[derive(Debug, Error)]
pub enum LssError {
#[error("Linear algebra failure")]
Linalg,
#[error("Invalid spline input")]
InvalidInput,
}
#[derive(Clone, Copy)]
pub enum SolverKind {
DenseQR,
Banded, }
#[inline]
fn timing_enabled() -> bool {
std::env::var("LSQ_SPLINES_TIMING")
.map(|v| {
let normalized = v.trim().to_ascii_lowercase();
!(normalized.is_empty()
|| normalized == "0"
|| normalized == "false"
|| normalized == "off")
})
.unwrap_or(false)
}
#[inline]
fn log_timing(stage: &str, started: Instant) {
if timing_enabled() {
eprintln!(
"[LSQSplines][timing] {:<28} {:>10.3} ms",
stage,
started.elapsed().as_secs_f64() * 1_000.0
);
}
}
pub fn solve_lsq(
x: &Array1<f64>,
y: &Array1<f64>,
w: &Array1<f64>,
knots: &Array1<f64>,
degree: usize,
solver: SolverKind,
) -> Result<Array1<f64>, LssError> {
let total_started = Instant::now();
match solver {
SolverKind::DenseQR => {
let design_started = Instant::now();
let A = design_matrix_dense(x, knots, degree);
log_timing("dense.design_matrix", design_started);
let solve_started = Instant::now();
let result = dense_qr_nalgebra(A, y, w);
log_timing("dense.solve_qr", solve_started);
log_timing("dense.total", total_started);
result
}
SolverKind::Banded => {
let result = banded_solver(x, y, w, knots, degree);
log_timing("banded.total", total_started);
result
}
}
}
pub fn solve_lsq_parallel(
x: &Array1<f64>,
y: &Array1<f64>,
w: &Array1<f64>,
knots: &Array1<f64>,
degree: usize,
solver: SolverKind,
) -> Result<Array1<f64>, LssError> {
let total_started = Instant::now();
match solver {
SolverKind::DenseQR => {
let design_started = Instant::now();
let A = design_matrix_dense_parallel(x, knots, degree);
log_timing("dense.parallel.design_matrix", design_started);
let solve_started = Instant::now();
let result = dense_qr_nalgebra_parallel(A, y, w);
log_timing("dense.parallel.solve_qr", solve_started);
log_timing("dense.parallel.total", total_started);
result
}
SolverKind::Banded => {
let result = banded_solver_parallel(x, y, w, knots, degree);
log_timing("banded.parallel.total", total_started);
result
}
}
}
fn dense_qr_nalgebra(
A: Array2<f64>,
y: &Array1<f64>,
w: &Array1<f64>,
) -> Result<Array1<f64>, LssError> {
let (m, n) = A.dim();
if m < n {
return Err(LssError::InvalidInput);
}
let weighted_started = Instant::now();
let mut mat = DMatrix::<f64>::zeros(m, n);
let mut vec = DVector::<f64>::zeros(m);
for i in 0..m {
let wi = w[i].sqrt();
vec[i] = y[i] * wi;
for j in 0..n {
mat[(i, j)] = A[[i, j]] * wi;
}
}
log_timing("dense.weighted_assembly", weighted_started);
let qr_started = Instant::now();
let qr = mat.qr();
let q = qr.q();
let r = qr.r();
log_timing("dense.qr_factorization", qr_started);
let solve_started = Instant::now();
let qt_b = q.transpose() * vec;
let r_square = r.view((0, 0), (n, n));
let qt_b_trunc = qt_b.rows(0, n);
let solution = r_square
.solve_upper_triangular(&qt_b_trunc)
.ok_or(LssError::Linalg)?;
log_timing("dense.back_substitution", solve_started);
Ok(Array1::from(solution.data.as_vec().clone()))
}
fn dense_qr_nalgebra_parallel(
A: Array2<f64>,
y: &Array1<f64>,
w: &Array1<f64>,
) -> Result<Array1<f64>, LssError> {
let (m, n) = A.dim();
if m < n {
return Err(LssError::InvalidInput);
}
let weighted_started = Instant::now();
let weighted_rows: Vec<Vec<f64>> = (0..m)
.into_par_iter()
.map(|i| {
let wi = w[i].sqrt();
(0..n).map(|j| A[[i, j]] * wi).collect::<Vec<_>>()
})
.collect();
let flat_weighted: Vec<f64> = weighted_rows.into_iter().flatten().collect();
let mat = DMatrix::<f64>::from_row_slice(m, n, &flat_weighted);
let vec_data: Vec<f64> = (0..m).into_par_iter().map(|i| y[i] * w[i].sqrt()).collect();
let vec = DVector::<f64>::from_vec(vec_data);
log_timing("dense.parallel.weighted_assembly", weighted_started);
let qr_started = Instant::now();
let qr = mat.qr();
let q = qr.q();
let r = qr.r();
log_timing("dense.parallel.qr_factorization", qr_started);
let solve_started = Instant::now();
let qt_b = q.transpose() * vec;
let r_square = r.view((0, 0), (n, n));
let qt_b_trunc = qt_b.rows(0, n);
let solution = r_square
.solve_upper_triangular(&qt_b_trunc)
.ok_or(LssError::Linalg)?;
log_timing("dense.parallel.back_substitution", solve_started);
Ok(Array1::from(solution.data.as_vec().clone()))
}
pub struct BandedMatrix {
pub data: Vec<Vec<f64>>, pub n: usize,
pub bandwidth: usize,
}
impl BandedMatrix {
pub fn new(n: usize, bandwidth: usize) -> Self {
Self {
data: vec![vec![0.0; n]; bandwidth],
n,
bandwidth,
}
}
pub fn add(&mut self, i: usize, j: usize, val: f64) {
let band = j - i;
if band < self.bandwidth {
self.data[band][i] += val;
}
}
pub fn get(&self, i: usize, j: usize) -> f64 {
let band = j - i;
if band < self.bandwidth {
self.data[band][i]
} else {
0.0
}
}
}
fn cholesky_banded(G: &mut BandedMatrix) -> Result<(), LssError> {
let n = G.n;
let bw = G.bandwidth;
for i in 0..n {
let mut sum = 0.0;
for k in 1..bw {
if i >= k {
let val = G.get(i - k, i);
sum += val * val;
}
}
let diag = G.get(i, i) - sum;
if diag <= 0.0 {
return Err(LssError::Linalg);
}
G.data[0][i] = diag.sqrt();
for j_offset in 1..bw {
let j = i + j_offset;
if j >= n {
break;
}
let mut sum = 0.0;
for k in 1..bw {
if i >= k && j >= k {
sum += G.get(i - k, i) * G.get(j - k, i);
}
}
let val = (G.get(i, j) - sum) / G.data[0][i];
G.data[j_offset][i] = val;
}
}
Ok(())
}
#[derive(Debug, Clone)]
pub struct SymmetricBanded {
n: usize,
k: usize, data: Vec<f64>, }
impl SymmetricBanded {
pub fn new(n: usize, k: usize) -> Self {
Self {
n,
k,
data: vec![0.0; (k + 1) * n],
}
}
#[inline(always)]
fn index(&self, band: usize, col: usize) -> usize {
band * self.n + col
}
#[inline(always)]
fn in_band(&self, i: usize, j: usize) -> Option<(usize, usize)> {
if i > j {
return self.in_band(j, i);
}
let band = j - i;
if band <= self.k {
Some((band, i))
} else {
None
}
}
pub fn get(&self, i: usize, j: usize) -> f64 {
if let Some((band, col)) = self.in_band(i, j) {
self.data[self.index(band, col)]
} else {
0.0
}
}
pub fn add(&mut self, i: usize, j: usize, value: f64) {
if let Some((band, col)) = self.in_band(i, j) {
let idx = self.index(band, col);
self.data[idx] += value;
}
}
pub fn set(&mut self, i: usize, j: usize, value: f64) {
if let Some((band, col)) = self.in_band(i, j) {
let idx = self.index(band, col);
self.data[idx] = value;
}
}
#[inline(always)]
pub fn diag(&self, i: usize) -> f64 {
self.data[self.index(0, i)]
}
#[inline(always)]
pub fn diag_mut(&mut self, i: usize) -> &mut f64 {
let idx = self.index(0, i);
&mut self.data[idx]
}
pub fn size(&self) -> usize {
self.n
}
pub fn bandwidth(&self) -> usize {
self.k
}
pub fn to_dense(&self) -> nalgebra::DMatrix<f64> {
let mut mat = nalgebra::DMatrix::zeros(self.n, self.n);
for i in 0..self.n {
for j in i..=usize::min(self.n - 1, i + self.k) {
let val = self.get(i, j);
mat[(i, j)] = val;
mat[(j, i)] = val;
}
}
mat
}
pub fn cholesky_in_place(&mut self) -> Result<(), &'static str> {
let n = self.n;
let k = self.k;
for i in 0..n {
let mut sum = 0.0;
for p in 1..=k {
if i < p {
break;
}
let val = self.get(i - p, i);
sum += val * val;
}
let diag_idx = self.index(0, i);
let updated = self.data[diag_idx] - sum;
if updated <= 0.0 {
return Err("Matrix not positive definite");
}
let rii = updated.sqrt();
self.data[diag_idx] = rii;
let max_j = usize::min(n - 1, i + k);
for j in (i + 1)..=max_j {
let mut sum = 0.0;
for p in 1..=k {
if i < p {
break;
}
let s = i - p;
if j - s > k {
continue;
}
let rik = self.get(s, i);
let rsj = self.get(s, j);
sum += rik * rsj;
}
if let Some((band, col)) = self.in_band(i, j) {
let idx = self.index(band, col);
let val = (self.data[idx] - sum) / rii;
self.data[idx] = val;
}
}
}
Ok(())
}
pub fn solve_spd_in_place(&self, rhs: &mut [f64]) -> Result<(), &'static str> {
let n = self.n;
let k = self.k;
if rhs.len() != n {
return Err("Dimension mismatch");
}
for i in 0..n {
let mut sum = rhs[i];
for p in 1..=k {
if i < p {
break;
}
let val = self.get(i - p, i); sum -= val * rhs[i - p];
}
let rii = self.get(i, i);
rhs[i] = sum / rii;
}
for i in (0..n).rev() {
let mut sum = rhs[i];
let max_j = usize::min(n - 1, i + k);
for j in (i + 1)..=max_j {
let val = self.get(i, j);
sum -= val * rhs[j];
}
let rii = self.get(i, i);
rhs[i] = sum / rii;
}
Ok(())
}
}
fn build_normal_equations_banded(
x: &Array1<f64>,
y: &Array1<f64>,
w: &Array1<f64>,
knots: &Array1<f64>,
degree: usize,
) -> (SymmetricBanded, Vec<f64>) {
let n = knots.len() - degree - 1;
let half_bandwidth = 2 * degree;
let mut G = SymmetricBanded::new(n, half_bandwidth);
let mut rhs = vec![0.0; n];
for i in 0..x.len() {
let xi = x[i];
let wi = w[i];
let yi = y[i];
let span = find_span(n - 1, degree, xi, knots);
let basis = basis_functions(span, xi, degree, knots);
for a in 0..=degree {
let row = span - degree + a;
let va = basis[a];
rhs[row] += wi * va * yi;
for b in a..=degree {
let col = span - degree + b;
let vb = basis[b];
G.add(row, col, wi * va * vb);
}
}
}
(G, rhs)
}
fn build_normal_equations_banded_parallel(
x: &Array1<f64>,
y: &Array1<f64>,
w: &Array1<f64>,
knots: &Array1<f64>,
degree: usize,
) -> (SymmetricBanded, Vec<f64>) {
let n = knots.len() - degree - 1;
let half_bandwidth = 2 * degree;
(0..x.len())
.into_par_iter()
.fold(
|| (SymmetricBanded::new(n, half_bandwidth), vec![0.0; n]),
|(mut g_local, mut rhs_local), i| {
let xi = x[i];
let wi = w[i];
let yi = y[i];
let span = find_span(n - 1, degree, xi, knots);
let basis = basis_functions(span, xi, degree, knots);
for a in 0..=degree {
let row = span - degree + a;
let va = basis[a];
rhs_local[row] += wi * va * yi;
for b in a..=degree {
let col = span - degree + b;
let vb = basis[b];
g_local.add(row, col, wi * va * vb);
}
}
(g_local, rhs_local)
},
)
.reduce(
|| (SymmetricBanded::new(n, half_bandwidth), vec![0.0; n]),
|(mut g_acc, mut rhs_acc), (g_part, rhs_part)| {
for (acc, part) in g_acc.data.iter_mut().zip(g_part.data.iter()) {
*acc += *part;
}
for (acc, part) in rhs_acc.iter_mut().zip(rhs_part.iter()) {
*acc += *part;
}
(g_acc, rhs_acc)
},
)
}
fn banded_solver(
x: &Array1<f64>,
y: &Array1<f64>,
w: &Array1<f64>,
knots: &Array1<f64>,
degree: usize,
) -> Result<Array1<f64>, LssError> {
let build_started = Instant::now();
let (mut G, mut rhs) = build_normal_equations_banded(x, y, w, knots, degree);
log_timing("banded.build_normal_eq", build_started);
let cholesky_started = Instant::now();
G.cholesky_in_place().map_err(|_| LssError::Linalg)?;
log_timing("banded.cholesky", cholesky_started);
let solve_started = Instant::now();
G.solve_spd_in_place(&mut rhs)
.map_err(|_| LssError::Linalg)?;
log_timing("banded.triangular_solve", solve_started);
Ok(Array1::from(rhs))
}
fn banded_solver_parallel(
x: &Array1<f64>,
y: &Array1<f64>,
w: &Array1<f64>,
knots: &Array1<f64>,
degree: usize,
) -> Result<Array1<f64>, LssError> {
let build_started = Instant::now();
let (mut G, mut rhs) = build_normal_equations_banded_parallel(x, y, w, knots, degree);
log_timing("banded.parallel.build_normal_eq", build_started);
let cholesky_started = Instant::now();
G.cholesky_in_place().map_err(|_| LssError::Linalg)?;
log_timing("banded.parallel.cholesky", cholesky_started);
let solve_started = Instant::now();
G.solve_spd_in_place(&mut rhs)
.map_err(|_| LssError::Linalg)?;
log_timing("banded.parallel.triangular_solve", solve_started);
Ok(Array1::from(rhs))
}
pub fn check_schoenberg_whitney(
x: &Array1<f64>,
knots: &Array1<f64>,
degree: usize,
) -> Result<(), &'static str> {
let n = knots.len() - degree - 1;
for j in 0..n {
let left = knots[j];
let right = knots[j + degree + 1];
let exists = x.iter().any(|&xi| xi > left && xi < right);
if !exists {
return Err("Schoenberg–Whitney condition violated");
}
}
Ok(())
}
pub fn check_schoenberg_whitney_parallel(
x: &Array1<f64>,
knots: &Array1<f64>,
degree: usize,
) -> Result<(), &'static str> {
let n = knots.len() - degree - 1;
(0..n).into_par_iter().try_for_each(|j| {
let left = knots[j];
let right = knots[j + degree + 1];
let exists = x.iter().any(|&xi| xi > left && xi < right);
if exists {
Ok(())
} else {
Err("Schoenberg–Whitney condition violated")
}
})
}
pub fn make_lsq_spline(
x: &Array1<f64>,
y: &Array1<f64>,
knots: &Array1<f64>,
degree: usize,
weights: Option<&Array1<f64>>,
solver: SolverKind,
) -> Result<BSpline, LssError> {
let n_coeffs = knots.len() - degree - 1;
if x.len() < n_coeffs {
return Err(LssError::InvalidInput);
}
let w = weights.cloned().unwrap_or_else(|| Array1::ones(x.len()));
let coeffs = solve_lsq(x, y, &w, knots, degree, solver)?;
Ok(BSpline {
knots: knots.clone(),
coeffs,
degree,
})
}
pub fn make_lsq_spline_parallel(
x: &Array1<f64>,
y: &Array1<f64>,
knots: &Array1<f64>,
degree: usize,
weights: Option<&Array1<f64>>,
solver: SolverKind,
) -> Result<BSpline, LssError> {
let n_coeffs = knots.len() - degree - 1;
if x.len() < n_coeffs {
return Err(LssError::InvalidInput);
}
let w = weights.cloned().unwrap_or_else(|| Array1::ones(x.len()));
let coeffs = solve_lsq_parallel(x, y, &w, knots, degree, solver)?;
Ok(BSpline {
knots: knots.clone(),
coeffs,
degree,
})
}
pub fn make_lsq_univariate_spline_nonparallel(
x: &Array1<f64>,
y: &Array1<f64>,
internal_knots: &Array1<f64>,
degree: usize,
solver: SolverKind,
) -> Result<BSpline, LssError> {
let xmin = x[0];
let xmax = x[x.len() - 1];
let mut knots = Vec::new();
for _ in 0..=degree {
knots.push(xmin);
}
for &k in internal_knots {
knots.push(k);
}
for _ in 0..=degree {
knots.push(xmax);
}
let knots = Array1::from(knots);
check_schoenberg_whitney(x, &knots, degree).map_err(|_| LssError::InvalidInput)?;
make_lsq_spline(x, y, &knots, degree, None, solver)
}
pub fn make_lsq_univariate_spline(
x: &Array1<f64>,
y: &Array1<f64>,
internal_knots: &Array1<f64>,
degree: usize,
solver: SolverKind,
) -> Result<BSpline, LssError> {
let xmin = x[0];
let xmax = x[x.len() - 1];
let mut knots = Vec::with_capacity(internal_knots.len() + 2 * (degree + 1));
knots.extend(std::iter::repeat_n(xmin, degree + 1));
knots.extend(internal_knots.iter().copied());
knots.extend(std::iter::repeat_n(xmax, degree + 1));
let knots = Array1::from(knots);
check_schoenberg_whitney_parallel(x, &knots, degree).map_err(|_| LssError::InvalidInput)?;
make_lsq_spline_parallel(x, y, &knots, degree, None, solver)
}
pub fn choose_knots_by_noise(
x: &Array1<f64>,
y: &Array1<f64>,
sigma: f64,
degree: usize,
min_knots: usize,
max_knots: usize,
) -> usize {
let xmin = x[0];
let xmax = x[x.len() - 1];
for n in min_knots..=max_knots {
let internal = Array1::linspace(xmin, xmax, n);
let spline = make_lsq_univariate_spline(
x,
y,
&internal.slice(s![1..internal.len() - 1]).to_owned(),
degree,
SolverKind::Banded,
);
if spline.is_err() {
continue;
}
let spline = spline.unwrap();
let y_fit = x.mapv(|xi| spline.evaluate(xi));
let residual = rms(&y_fit, y);
if residual <= 1.2 * sigma {
return n;
}
}
max_knots
}
pub fn rms(a: &Array1<f64>, b: &Array1<f64>) -> f64 {
let diff = a - b;
let mse = diff.mapv(|v| v * v).mean().unwrap();
mse.sqrt()
}
pub fn rms_zero_mean(a: &Array1<f64>) -> f64 {
let mean = a.mean().unwrap();
let centered = a.mapv(|v| v - mean);
centered.mapv(|v| v * v).mean().unwrap().sqrt()
}