Skip to main content

neco_eigensolve/
lobpcg.rs

1use neco_sparse::CsrMat;
2
3use crate::dense::symmetric_dense_kernel;
4use crate::DenseMatrix;
5
6pub trait Preconditioner {
7    fn apply(&self, residuals: &DenseMatrix) -> DenseMatrix;
8}
9
10pub struct JacobiPreconditioner {
11    diag_inv: Vec<f64>,
12}
13
14impl JacobiPreconditioner {
15    pub fn new(k_mat: &CsrMat<f64>) -> Self {
16        let n = k_mat.nrows();
17        let mut diag_inv = vec![1.0; n];
18        for (i, diag_inv_i) in diag_inv.iter_mut().enumerate() {
19            let row = k_mat.row(i);
20            for (&col, &val) in row.col_indices().iter().zip(row.values().iter()) {
21                if col == i && val.abs() > 1e-20 {
22                    *diag_inv_i = 1.0 / val;
23                    break;
24                }
25            }
26        }
27        Self { diag_inv }
28    }
29
30    fn apply_block(&self, residuals: &DenseMatrix) -> DenseMatrix {
31        let mut out = residuals.clone();
32        for j in 0..out.ncols() {
33            for (value, &diag_inv) in out.column_mut(j).iter_mut().zip(&self.diag_inv) {
34                *value *= diag_inv;
35            }
36        }
37        out
38    }
39}
40
41impl Preconditioner for JacobiPreconditioner {
42    fn apply(&self, residuals: &DenseMatrix) -> DenseMatrix {
43        self.apply_block(residuals)
44    }
45}
46
47pub struct LobpcgResult {
48    pub eigenvalues: Vec<f64>,
49    /// Rows: modes, columns: DOFs
50    pub eigenvectors: DenseMatrix,
51    pub iterations: usize,
52}
53
54use crate::spmv;
55
56fn dot(x: &[f64], y: &[f64]) -> f64 {
57    x.iter().zip(y).map(|(a, b)| a * b).sum()
58}
59
60fn norm(x: &[f64]) -> f64 {
61    dot(x, x).sqrt()
62}
63
64fn lobpcg_spmv(a: &CsrMat<f64>, x: &[f64]) -> Vec<f64> {
65    let mut y = vec![0.0; a.nrows()];
66    spmv::spmv_dispatch(&mut y, a, x);
67    y
68}
69
70fn lobpcg_block_spmv_into(y: &mut DenseMatrix, a: &CsrMat<f64>, x: &DenseMatrix) {
71    spmv::block_spmv_dispatch(y.as_mut_slice(), a, x.as_slice(), a.nrows(), x.ncols());
72}
73
74fn lobpcg_block_spmv_into_prefix(
75    y: &mut DenseMatrix,
76    a: &CsrMat<f64>,
77    x: &DenseMatrix,
78    ncols: usize,
79) {
80    let nrows = a.nrows();
81    spmv::block_spmv_dispatch(
82        &mut y.as_mut_slice()[..nrows * ncols],
83        a,
84        &x.as_slice()[..nrows * ncols],
85        nrows,
86        ncols,
87    );
88}
89
90fn lobpcg_block_spmv_into_prefix_block(
91    y: &mut DenseMatrix,
92    a: &CsrMat<f64>,
93    x: &DenseMatrix,
94    ncols: usize,
95) {
96    if ncols == 0 {
97        return;
98    }
99    let nrows = a.nrows();
100    let mut tmp = vec![0.0; nrows * ncols];
101    spmv::block_spmv_dispatch(&mut tmp, a, &x.as_slice()[..nrows * ncols], nrows, ncols);
102    for col in 0..ncols {
103        y.column_mut(col)
104            .copy_from_slice(&tmp[col * nrows..(col + 1) * nrows]);
105    }
106}
107
108fn orthonormalize_b(
109    vecs: &mut DenseMatrix,
110    m_mat: &CsrMat<f64>,
111    m_vecs_buf: &mut DenseMatrix,
112) -> bool {
113    let cols = vecs.ncols();
114    lobpcg_block_spmv_into_prefix_block(m_vecs_buf, m_mat, vecs, cols);
115
116    let mut gram = vec![0.0; cols * cols];
117    for i in 0..cols {
118        for j in i..cols {
119            let mut dot = 0.0;
120            for k in 0..vecs.nrows() {
121                dot += vecs[(k, i)] * m_vecs_buf.column(j)[k];
122            }
123            gram[i * cols + j] = dot;
124            gram[j * cols + i] = dot;
125        }
126    }
127
128    match symmetric_dense_kernel().cholesky_upper(&gram, cols) {
129        Some(r) => {
130            let nrows = vecs.nrows();
131            let mut q_buf = vec![0.0; nrows * cols];
132            for j in 0..cols {
133                let mut coeffs = vec![0.0; cols];
134                coeffs[j] = 1.0;
135                for i in (0..cols).rev() {
136                    let mut sum = coeffs[i];
137                    for p in (i + 1)..cols {
138                        sum -= r[i * cols + p] * coeffs[p];
139                    }
140                    coeffs[i] = sum / r[i * cols + i];
141                }
142                for k in 0..nrows {
143                    let mut value = 0.0;
144                    for p in 0..cols {
145                        value += vecs[(k, p)] * coeffs[p];
146                    }
147                    q_buf[j * nrows + k] = value;
148                }
149            }
150            *vecs = DenseMatrix::from_column_slice(nrows, cols, &q_buf);
151            true
152        }
153        None => false,
154    }
155}
156
157fn deflate_dc(x: &mut DenseMatrix, m_mat: &CsrMat<f64>) {
158    let n = x.nrows();
159    let ones = vec![1.0; n];
160    let m_ones = lobpcg_spmv(m_mat, &ones);
161    let norm = dot(&ones, &m_ones).sqrt();
162    let dc: Vec<f64> = ones.iter().map(|value| value / norm).collect();
163    let m_dc: Vec<f64> = m_ones.iter().map(|value| value / norm).collect();
164
165    for j in 0..x.ncols() {
166        let dot = dot(x.column(j), &m_dc);
167        for i in 0..n {
168            x[(i, j)] -= dot * dc[i];
169        }
170    }
171}
172
173fn select_columns(mat: &DenseMatrix, indices: &[usize]) -> DenseMatrix {
174    mat.select_columns(indices)
175}
176
177struct SearchSpaceInputs<'a> {
178    x: &'a DenseMatrix,
179    w: &'a DenseMatrix,
180    p: &'a DenseMatrix,
181    kx_buf: &'a DenseMatrix,
182    kw_buf: &'a DenseMatrix,
183    kp: &'a DenseMatrix,
184    mx_buf: &'a DenseMatrix,
185    mw_buf: &'a DenseMatrix,
186    mp_buf: &'a DenseMatrix,
187}
188
189fn assemble_search_space(
190    inputs: SearchSpaceInputs<'_>,
191    p_cols: usize,
192    ma: usize,
193) -> (DenseMatrix, DenseMatrix, DenseMatrix) {
194    let SearchSpaceInputs {
195        x,
196        w,
197        p,
198        kx_buf,
199        kw_buf,
200        kp,
201        mx_buf,
202        mw_buf,
203        mp_buf,
204    } = inputs;
205    if p_cols > 0 {
206        let total = x.ncols() + ma + p_cols;
207        let mut s = DenseMatrix::zeros(x.nrows(), total);
208        let mut ks = DenseMatrix::zeros(x.nrows(), total);
209        let mut ms = DenseMatrix::zeros(x.nrows(), total);
210
211        s.copy_columns_from(0, x, 0, x.ncols());
212        s.copy_columns_from(x.ncols(), w, 0, ma);
213        s.copy_columns_from(x.ncols() + ma, p, 0, p_cols);
214
215        ks.copy_columns_from(0, kx_buf, 0, x.ncols());
216        ks.copy_columns_from(x.ncols(), kw_buf, 0, ma);
217        ks.copy_columns_from(x.ncols() + ma, kp, 0, p_cols);
218
219        ms.copy_columns_from(0, mx_buf, 0, x.ncols());
220        ms.copy_columns_from(x.ncols(), mw_buf, 0, ma);
221        ms.copy_columns_from(x.ncols() + ma, mp_buf, 0, p_cols);
222
223        (s, ks, ms)
224    } else {
225        let total = x.ncols() + ma;
226        let mut s = DenseMatrix::zeros(x.nrows(), total);
227        let mut ks = DenseMatrix::zeros(x.nrows(), total);
228        let mut ms = DenseMatrix::zeros(x.nrows(), total);
229
230        s.copy_columns_from(0, x, 0, x.ncols());
231        s.copy_columns_from(x.ncols(), w, 0, ma);
232
233        ks.copy_columns_from(0, kx_buf, 0, x.ncols());
234        ks.copy_columns_from(x.ncols(), kw_buf, 0, ma);
235
236        ms.copy_columns_from(0, mx_buf, 0, x.ncols());
237        ms.copy_columns_from(x.ncols(), mw_buf, 0, ma);
238
239        (s, ks, ms)
240    }
241}
242
243/// Solver configuration.
244#[derive(Debug, Clone)]
245pub struct LobpcgConfig {
246    pub n_modes: usize,
247    pub tol: f64,
248    pub max_iter: usize,
249    /// Deflate the constant (DC) mode from the search space.
250    /// Appropriate for FEM (skip rigid-body modes), but should be disabled
251    /// for problems where near-zero eigenvalues are informative (e.g., graph Laplacians).
252    pub deflate_dc: bool,
253}
254
255impl LobpcgConfig {
256    pub fn new(n_modes: usize, tol: f64, max_iter: usize) -> Self {
257        Self {
258            n_modes,
259            tol,
260            max_iter,
261            deflate_dc: true,
262        }
263    }
264}
265
266/// Solve with DC deflation enabled (default, backwards-compatible).
267pub fn lobpcg(
268    k_mat: &CsrMat<f64>,
269    m_mat: &CsrMat<f64>,
270    n_modes: usize,
271    tol: f64,
272    max_iter: usize,
273    precond: &dyn Preconditioner,
274) -> LobpcgResult {
275    lobpcg_with_progress(k_mat, m_mat, n_modes, tol, max_iter, precond, |_, _| {})
276}
277
278/// Solve with explicit configuration.
279pub fn lobpcg_configured(
280    k_mat: &CsrMat<f64>,
281    m_mat: &CsrMat<f64>,
282    config: &LobpcgConfig,
283    precond: &dyn Preconditioner,
284    on_progress: impl FnMut(usize, usize),
285) -> LobpcgResult {
286    lobpcg_core(
287        k_mat,
288        m_mat,
289        precond,
290        on_progress,
291        LobpcgCoreConfig {
292            n_modes: config.n_modes,
293            tol: config.tol,
294            max_iter: config.max_iter,
295            do_deflate_dc: config.deflate_dc,
296        },
297    )
298}
299
300pub fn lobpcg_with_progress(
301    k_mat: &CsrMat<f64>,
302    m_mat: &CsrMat<f64>,
303    n_modes: usize,
304    tol: f64,
305    max_iter: usize,
306    precond: &dyn Preconditioner,
307    on_progress: impl FnMut(usize, usize),
308) -> LobpcgResult {
309    lobpcg_core(
310        k_mat,
311        m_mat,
312        precond,
313        on_progress,
314        LobpcgCoreConfig {
315            n_modes,
316            tol,
317            max_iter,
318            do_deflate_dc: true,
319        },
320    )
321}
322
323struct LobpcgCoreConfig {
324    n_modes: usize,
325    tol: f64,
326    max_iter: usize,
327    do_deflate_dc: bool,
328}
329
330fn symmetrize_in_place(mat: &mut DenseMatrix) {
331    for i in 0..mat.nrows() {
332        for j in (i + 1)..mat.ncols() {
333            let value = 0.5 * (mat[(i, j)] + mat[(j, i)]);
334            mat[(i, j)] = value;
335            mat[(j, i)] = value;
336        }
337    }
338}
339
340fn lobpcg_core(
341    k_mat: &CsrMat<f64>,
342    m_mat: &CsrMat<f64>,
343    precond: &dyn Preconditioner,
344    mut on_progress: impl FnMut(usize, usize),
345    config: LobpcgCoreConfig,
346) -> LobpcgResult {
347    let n = k_mat.nrows();
348    let max_subspace = n.saturating_sub(1).max(1);
349    let m = (config.n_modes + 5).min(max_subspace);
350    let _ = &mut on_progress;
351
352    let mut x = DenseMatrix::zeros(n, m);
353    for j in 0..m {
354        for i in 0..n {
355            x[(i, j)] = (((i + j * 997) as f64 * 0.618033988749895) % 1.0) - 0.5;
356        }
357    }
358
359    if config.do_deflate_dc {
360        deflate_dc(&mut x, m_mat);
361    }
362
363    let mut orth_buf = DenseMatrix::zeros(n, m);
364    assert!(
365        orthonormalize_b(&mut x, m_mat, &mut orth_buf),
366        "M-orthogonalization of initial vectors failed"
367    );
368
369    let mut kx_buf = DenseMatrix::zeros(n, m);
370    lobpcg_block_spmv_into(&mut kx_buf, k_mat, &x);
371    let mut lambda = vec![0.0; m];
372    for (j, lambda_j) in lambda.iter_mut().enumerate().take(m) {
373        *lambda_j = dot(x.column(j), kx_buf.column(j));
374    }
375
376    let mut p = DenseMatrix::zeros(n, m);
377    let mut kp = DenseMatrix::zeros(n, m);
378    let mut p_cols = 0usize;
379    let mut converged = vec![false; m];
380    let mut iter = 0;
381
382    let mut mx_buf = DenseMatrix::zeros(n, m);
383    let mut kw_buf = DenseMatrix::zeros(n, m);
384    let mut mw_buf = DenseMatrix::zeros(n, m);
385    let mut mp_buf = DenseMatrix::zeros(n, m);
386    for iteration in 0..config.max_iter {
387        iter = iteration + 1;
388
389        lobpcg_block_spmv_into(&mut kx_buf, k_mat, &x);
390        lobpcg_block_spmv_into(&mut mx_buf, m_mat, &x);
391        let mut r = DenseMatrix::zeros(n, m);
392        for j in 0..m {
393            for i in 0..n {
394                r[(i, j)] = kx_buf[(i, j)] - lambda[j] * mx_buf[(i, j)];
395            }
396        }
397
398        let mut all_converged = true;
399        let mut n_active = 0;
400        for j in 0..m {
401            let rnorm = norm(r.column(j));
402            let lnorm = lambda[j].abs().max(1e-15);
403            converged[j] = rnorm / lnorm < config.tol;
404            if !converged[j] {
405                all_converged = false;
406                n_active += 1;
407            }
408        }
409
410        if all_converged || n_active == 0 {
411            break;
412        }
413
414        let active_idx: Vec<usize> = (0..m).filter(|&j| !converged[j]).collect();
415        let ma = active_idx.len();
416
417        let r_active = select_columns(&r, &active_idx);
418
419        let mut w = precond.apply(&r_active);
420        if config.do_deflate_dc {
421            deflate_dc(&mut w, m_mat);
422        }
423
424        lobpcg_block_spmv_into_prefix(&mut mw_buf, m_mat, &w, w.ncols());
425        let mw_active = mw_buf.select_columns(&(0..w.ncols()).collect::<Vec<_>>());
426        let overlap = x.transpose_mul(&mw_active);
427        let x_overlap = x.mul(&overlap);
428        for col in 0..w.ncols() {
429            for row in 0..n {
430                w[(row, col)] -= x_overlap[(row, col)];
431            }
432        }
433
434        if !orthonormalize_b(&mut w, m_mat, &mut orth_buf) {
435            for j in 0..w.ncols() {
436                let mwj = lobpcg_spmv(m_mat, w.column(j));
437                let column_norm = dot(w.column(j), &mwj).sqrt();
438                if column_norm > 1e-15 {
439                    for value in w.column_mut(j) {
440                        *value /= column_norm;
441                    }
442                }
443            }
444        }
445
446        lobpcg_block_spmv_into_prefix(&mut kw_buf, k_mat, &w, w.ncols());
447        lobpcg_block_spmv_into_prefix(&mut mw_buf, m_mat, &w, w.ncols());
448        let active_p_cols = active_idx.len().min(p_cols);
449        if p_cols > 0 {
450            lobpcg_block_spmv_into_prefix(&mut mp_buf, m_mat, &p, active_p_cols);
451        }
452        let (s, ks, ms) = assemble_search_space(
453            SearchSpaceInputs {
454                x: &x,
455                w: &w,
456                p: &p,
457                kx_buf: &kx_buf,
458                kw_buf: &kw_buf,
459                kp: &kp,
460                mx_buf: &mx_buf,
461                mw_buf: &mw_buf,
462                mp_buf: &mp_buf,
463            },
464            active_p_cols,
465            ma,
466        );
467
468        let mut gram_a = s.transpose_mul(&ks);
469        let mut gram_b = s.transpose_mul(&ms);
470        symmetrize_in_place(&mut gram_a);
471        symmetrize_in_place(&mut gram_b);
472
473        if let Some(eigen) = symmetric_dense_kernel().generalized_symmetric_eigen(
474            &gram_a.to_row_major(),
475            &gram_b.to_row_major(),
476            gram_a.nrows(),
477        ) {
478            let eigenvectors =
479                DenseMatrix::from_row_major(gram_a.nrows(), gram_a.ncols(), &eigen.eigenvectors);
480
481            let mut eig_pairs: Vec<(f64, usize)> = eigen
482                .eigenvalues
483                .iter()
484                .enumerate()
485                .map(|(idx, &value)| (value, idx))
486                .collect();
487            eig_pairs.sort_by(|a, b| a.0.total_cmp(&b.0));
488
489            let take = m.min(eig_pairs.len());
490            for j in 0..take {
491                lambda[j] = eig_pairs[j].0;
492            }
493
494            let mut c_mat = DenseMatrix::zeros(s.ncols(), take);
495            for (j, pair) in eig_pairs.iter().take(take).enumerate() {
496                c_mat.set_column(j, eigenvectors.column(pair.1));
497            }
498
499            let x_old = x.clone();
500            let x_new = s.mul(&c_mat);
501            // p = x_new - x_old
502            for col in 0..take {
503                for row in 0..n {
504                    p[(row, col)] = x_new[(row, col)] - x_old[(row, col)];
505                }
506            }
507            x = x_new;
508            lobpcg_block_spmv_into_prefix(&mut kp, k_mat, &p, take);
509            p_cols = take;
510        } else {
511            // Fallback: gram_b is not positive-definite, use eigendecomposition
512            let n_sub = gram_b.nrows();
513            let eig_b = symmetric_dense_kernel().symmetric_eigen(&gram_b.to_row_major(), n_sub);
514            let eig_b_vecs = DenseMatrix::from_row_major(n_sub, n_sub, &eig_b.eigenvectors);
515
516            let lambda_max = eig_b.eigenvalues.iter().copied().fold(0.0f64, f64::max);
517            let tol_eig = n_sub as f64 * f64::EPSILON * lambda_max;
518
519            let keep: Vec<usize> = eig_b
520                .eigenvalues
521                .iter()
522                .enumerate()
523                .filter(|(_, &v)| v > tol_eig)
524                .map(|(i, _)| i)
525                .collect();
526            let rank = keep.len();
527
528            if rank == 0 {
529                p = DenseMatrix::zeros(n, 0);
530                kp = DenseMatrix::zeros(n, 0);
531                continue;
532            }
533
534            // Q_r: columns of eigenvectors corresponding to kept eigenvalues
535            let q_r = DenseMatrix::from_fn(n_sub, rank, |r, c| eig_b_vecs[(r, keep[c])]);
536
537            // T = diag(1/sqrt(lambda_keep)) * Q_r^T  (rank x n_sub)
538            let mut t_mat = DenseMatrix::zeros(rank, n_sub);
539            for (i, &keep_idx) in keep.iter().enumerate().take(rank) {
540                let scale = 1.0 / eig_b.eigenvalues[keep_idx].sqrt();
541                for col in 0..n_sub {
542                    t_mat[(i, col)] = q_r[(col, i)] * scale;
543                }
544            }
545
546            let mut reduced_a = t_mat.mul(&gram_a).mul(&t_mat.transpose());
547            symmetrize_in_place(&mut reduced_a);
548            let eig_reduced =
549                symmetric_dense_kernel().symmetric_eigen(&reduced_a.to_row_major(), rank);
550            let eig_reduced_vecs =
551                DenseMatrix::from_row_major(rank, rank, &eig_reduced.eigenvectors);
552
553            let mut eig_pairs: Vec<(f64, usize)> = eig_reduced
554                .eigenvalues
555                .iter()
556                .enumerate()
557                .map(|(idx, &value)| (value, idx))
558                .collect();
559            eig_pairs.sort_by(|a, b| a.0.total_cmp(&b.0));
560
561            let take = m.min(rank);
562
563            // Back-transform: c_j = T^T * z_j
564            let mut c_mat = DenseMatrix::zeros(n_sub, take);
565            for (j, pair) in eig_pairs.iter().take(take).enumerate() {
566                let c = t_mat
567                    .transpose()
568                    .mul_vector(eig_reduced_vecs.column(pair.1));
569                c_mat.set_column(j, &c);
570            }
571
572            let x_new = s.mul(&c_mat);
573
574            if take == m {
575                for j in 0..take {
576                    lambda[j] = eig_pairs[j].0;
577                }
578                for col in 0..take {
579                    for row in 0..n {
580                        p[(row, col)] = x_new[(row, col)] - x[(row, col)];
581                    }
582                }
583                lobpcg_block_spmv_into_prefix(&mut kp, k_mat, &p, take);
584                x = x_new;
585                p_cols = take;
586            } else {
587                // Rank-deficient: partial update with P reset
588                for j in 0..take {
589                    lambda[j] = eig_pairs[j].0;
590                }
591                x.copy_columns_from(0, &x_new, 0, take);
592                if !orthonormalize_b(&mut x, m_mat, &mut orth_buf) {
593                    p_cols = 0;
594                    continue;
595                }
596                lobpcg_block_spmv_into(&mut kx_buf, k_mat, &x);
597                for (j, lambda_j) in lambda.iter_mut().enumerate().take(m) {
598                    *lambda_j = dot(x.column(j), kx_buf.column(j));
599                }
600                p_cols = 0;
601            }
602        }
603
604        if iteration % 10 == 9 {
605            if config.do_deflate_dc {
606                deflate_dc(&mut x, m_mat);
607            }
608            if !orthonormalize_b(&mut x, m_mat, &mut orth_buf) {
609                p_cols = 0;
610            }
611        }
612    }
613
614    let _ = orthonormalize_b(&mut x, m_mat, &mut orth_buf);
615
616    lobpcg_block_spmv_into(&mut kx_buf, k_mat, &x);
617    for (j, lambda_j) in lambda.iter_mut().enumerate().take(m) {
618        *lambda_j = dot(x.column(j), kx_buf.column(j));
619    }
620
621    let mut pairs: Vec<(f64, usize)> = lambda.iter().enumerate().map(|(i, &v)| (v, i)).collect();
622    pairs.sort_by(|a, b| a.0.total_cmp(&b.0));
623
624    let take = config.n_modes.min(pairs.len());
625    let eigenvalues: Vec<f64> = pairs[..take].iter().map(|pair| pair.0).collect();
626    let mut eigenvectors = DenseMatrix::zeros(take, n);
627    for (mode_idx, &(_, col_idx)) in pairs[..take].iter().enumerate() {
628        for i in 0..n {
629            eigenvectors[(mode_idx, i)] = x[(i, col_idx)];
630        }
631    }
632
633    LobpcgResult {
634        eigenvalues,
635        eigenvectors,
636        iterations: iter,
637    }
638}
639
640#[cfg(test)]
641mod tests {
642    use super::*;
643    use neco_sparse::CooMat;
644
645    fn diag(values: &[f64]) -> CsrMat<f64> {
646        let mut coo = CooMat::new(values.len(), values.len());
647        for (i, value) in values.iter().enumerate() {
648            coo.push(i, i, *value);
649        }
650        CsrMat::from(&coo)
651    }
652
653    fn permuted_diag(values: &[f64], perm: &[usize]) -> CsrMat<f64> {
654        let mut coo = CooMat::new(values.len(), values.len());
655        for (i, &src) in perm.iter().enumerate() {
656            coo.push(i, i, values[src]);
657        }
658        CsrMat::from(&coo)
659    }
660
661    fn row_as_vec(mat: &DenseMatrix, row: usize) -> Vec<f64> {
662        let n = mat.ncols();
663        let mut v = vec![0.0; n];
664        for i in 0..n {
665            v[i] = mat[(row, i)];
666        }
667        v
668    }
669
670    fn csr_matvec(a: &CsrMat<f64>, x: &[f64]) -> Vec<f64> {
671        let mut y = vec![0.0; a.nrows()];
672        for (i, y_i) in y.iter_mut().enumerate().take(a.nrows()) {
673            let row = a.row(i);
674            let mut sum = 0.0;
675            for (&col, &val) in row.col_indices().iter().zip(row.values().iter()) {
676                sum += val * x[col];
677            }
678            *y_i = sum;
679        }
680        y
681    }
682
683    fn residual_norm(k: &CsrMat<f64>, m: &CsrMat<f64>, lambda: f64, x: &[f64]) -> f64 {
684        let kx = csr_matvec(k, x);
685        let mx = csr_matvec(m, x);
686        let mut num_sq = 0.0;
687        let mut den_sq = 0.0;
688        for i in 0..x.len() {
689            let diff = kx[i] - lambda * mx[i];
690            num_sq += diff * diff;
691            den_sq += kx[i] * kx[i];
692        }
693        num_sq.sqrt() / den_sq.sqrt().max(1e-15)
694    }
695
696    fn max_m_orth_error(eigenvectors: &DenseMatrix, m: &CsrMat<f64>) -> f64 {
697        let n_modes = eigenvectors.nrows();
698        let mut max_err: f64 = 0.0;
699        for i in 0..n_modes {
700            let xi = row_as_vec(eigenvectors, i);
701            let m_xi = csr_matvec(m, &xi);
702            for j in 0..n_modes {
703                let xj = row_as_vec(eigenvectors, j);
704                let val = dot(&xj, &m_xi);
705                let target = if i == j { 1.0 } else { 0.0 };
706                max_err = max_err.max((val - target).abs());
707            }
708        }
709        max_err
710    }
711
712    fn sort_values(mut values: Vec<f64>) -> Vec<f64> {
713        values.sort_by(f64::total_cmp);
714        values
715    }
716
717    #[test]
718    fn jacobi_preconditioner_uses_inverse_diagonal() {
719        let k = diag(&[2.0, 4.0]);
720        let precond = JacobiPreconditioner::new(&k);
721        let r = DenseMatrix::from_row_slice(2, 1, &[2.0, 8.0]);
722        let w = precond.apply(&r);
723        assert!((w[(0, 0)] - 1.0).abs() < 1e-12);
724        assert!((w[(1, 0)] - 2.0).abs() < 1e-12);
725    }
726
727    #[test]
728    fn lobpcg_smoke_test_returns_requested_mode_count() {
729        let vals: Vec<f64> = (1..=30).map(|i| i as f64).collect();
730        let k = diag(&vals);
731        let m = CsrMat::identity(30);
732        let precond = JacobiPreconditioner::new(&k);
733        let result = lobpcg(&k, &m, 2, 1e-8, 200, &precond);
734        assert_eq!(result.eigenvalues.len(), 2);
735        assert_eq!(result.eigenvectors.nrows(), 2);
736        assert_eq!(result.eigenvectors.ncols(), 30);
737        assert!(result.eigenvalues.iter().all(|v| v.is_finite()));
738    }
739
740    #[test]
741    fn lobpcg_solves_diagonal_generalized_problem() {
742        // n=30 to ensure 3m < n (LOBPCG requires subspace [X,W,P] fits in R^n).
743        // n=12 with m=7 gives 3*7=21 > 12 → rank-deficient Gram matrix.
744        let vals: Vec<f64> = (1..=30).map(|i| i as f64).collect();
745        let k = diag(&vals);
746        let m = CsrMat::identity(30);
747        let precond = JacobiPreconditioner::new(&k);
748        let config = LobpcgConfig {
749            n_modes: 2,
750            tol: 1e-8,
751            max_iter: 200,
752            deflate_dc: false,
753        };
754        let result = lobpcg_configured(&k, &m, &config, &precond, |_, _| {});
755        assert!(
756            (result.eigenvalues[0] - 1.0).abs() < 1e-6,
757            "got {}",
758            result.eigenvalues[0]
759        );
760        assert!(
761            (result.eigenvalues[1] - 2.0).abs() < 1e-6,
762            "got {}",
763            result.eigenvalues[1]
764        );
765    }
766
767    #[test]
768    fn lobpcg_rank_deficient_fallback_converges() {
769        // n=12, n_modes=2 → m=min(7,11)=7
770        // Subspace S=[X(7),W,P] can have up to 21 columns > 12
771        // → gram_b becomes rank deficient → fallback path is triggered
772        let vals: Vec<f64> = (1..=12).map(|i| i as f64).collect();
773        let k = diag(&vals);
774        let m = CsrMat::identity(12);
775        let precond = JacobiPreconditioner::new(&k);
776        let config = LobpcgConfig {
777            n_modes: 2,
778            tol: 1e-8,
779            max_iter: 200,
780            deflate_dc: false,
781        };
782        let result = lobpcg_configured(&k, &m, &config, &precond, |_, _| {});
783        assert!(
784            (result.eigenvalues[0] - 1.0).abs() < 1e-6,
785            "got {}",
786            result.eigenvalues[0]
787        );
788        assert!(
789            (result.eigenvalues[1] - 2.0).abs() < 1e-6,
790            "got {}",
791            result.eigenvalues[1]
792        );
793        assert!(
794            result.iterations < 200,
795            "did not converge: {} iterations",
796            result.iterations
797        );
798    }
799
800    #[test]
801    fn lobpcg_residual_and_m_orthogonality_are_small() {
802        let vals: Vec<f64> = (1..=40).map(|i| i as f64).collect();
803        let k = diag(&vals);
804        let m = CsrMat::identity(vals.len());
805        let precond = JacobiPreconditioner::new(&k);
806        let config = LobpcgConfig {
807            n_modes: 3,
808            tol: 1e-9,
809            max_iter: 200,
810            deflate_dc: false,
811        };
812        let result = lobpcg_configured(&k, &m, &config, &precond, |_, _| {});
813
814        let expected = [1.0, 2.0, 3.0];
815        for (i, &e) in expected.iter().enumerate() {
816            assert!(
817                (result.eigenvalues[i] - e).abs() < 1e-6,
818                "eig[{i}]={}, expected={e}",
819                result.eigenvalues[i]
820            );
821        }
822
823        let orth_err = max_m_orth_error(&result.eigenvectors, &m);
824        assert!(orth_err < 1e-6, "orth_err={orth_err}");
825
826        for mode in 0..result.eigenvalues.len() {
827            let x = row_as_vec(&result.eigenvectors, mode);
828            let res = residual_norm(&k, &m, result.eigenvalues[mode], &x);
829            assert!(res < 1e-8, "mode={mode}, residual={res}");
830        }
831    }
832
833    #[test]
834    fn lobpcg_permutation_similarity_keeps_eigenvalues() {
835        let vals: Vec<f64> = (1..=50).map(|i| i as f64).collect();
836        let perm: Vec<usize> = (0..vals.len()).rev().collect();
837        let k = diag(&vals);
838        let kp = permuted_diag(&vals, &perm);
839        let m = CsrMat::identity(vals.len());
840        let precond_k = JacobiPreconditioner::new(&k);
841        let precond_kp = JacobiPreconditioner::new(&kp);
842        let config = LobpcgConfig {
843            n_modes: 4,
844            tol: 1e-9,
845            max_iter: 200,
846            deflate_dc: false,
847        };
848
849        let base = lobpcg_configured(&k, &m, &config, &precond_k, |_, _| {});
850        let permuted = lobpcg_configured(&kp, &m, &config, &precond_kp, |_, _| {});
851        let a = sort_values(base.eigenvalues);
852        let b = sort_values(permuted.eigenvalues);
853
854        for i in 0..a.len() {
855            assert!((a[i] - b[i]).abs() < 1e-6, "i={i}, {} vs {}", a[i], b[i]);
856        }
857    }
858
859    #[test]
860    fn lobpcg_deflate_dc_false_keeps_near_zero_mode() {
861        let n = 32usize;
862        let mut coo = CooMat::new(n, n);
863        for i in 0..n {
864            let mut diag = 0.0;
865            if i > 0 {
866                coo.push(i, i - 1, -1.0);
867                diag += 1.0;
868            }
869            if i + 1 < n {
870                coo.push(i, i + 1, -1.0);
871                diag += 1.0;
872            }
873            coo.push(i, i, diag);
874        }
875        let k = CsrMat::from(&coo);
876        let m = CsrMat::identity(n);
877        let precond = JacobiPreconditioner::new(&k);
878        let config = LobpcgConfig {
879            n_modes: 2,
880            tol: 1e-8,
881            max_iter: 300,
882            deflate_dc: false,
883        };
884        let result = lobpcg_configured(&k, &m, &config, &precond, |_, _| {});
885        let values = sort_values(result.eigenvalues);
886        assert!(
887            values[0].abs() < 1e-6,
888            "first eigenvalue should be near zero"
889        );
890    }
891}