oxicuda_solver/helpers/
condition.rs

1//! Condition number estimation.
2//!
3//! Provides routines for estimating the condition number of a matrix,
4//! which measures the sensitivity of the solution of a linear system to
5//! perturbations in the input data. Uses Hager's algorithm (1-norm estimator)
6//! to avoid forming the inverse explicitly.
7
8use oxicuda_blas::GpuFloat;
9use oxicuda_memory::DeviceBuffer;
10
11use crate::dense::lu;
12use crate::error::{SolverError, SolverResult};
13use crate::handle::SolverHandle;
14
15/// Norm type for condition number estimation.
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum NormType {
18    /// 1-norm (maximum column sum of absolute values).
19    One,
20    /// Infinity-norm (maximum row sum of absolute values).
21    Infinity,
22}
23
24/// Estimates the condition number of a matrix.
25///
26/// Computes `cond(A) = ||A|| * ||A^{-1}||` where the norm is selected by
27/// `norm_type`. Uses Hager's algorithm (LAPACK `*lacon`) to estimate
28/// `||A^{-1}||` without forming the inverse, requiring only a few solves
29/// with A.
30///
31/// The matrix `a` is stored in column-major order with leading dimension `lda`.
32///
33/// # Arguments
34///
35/// * `handle` — solver handle (mutable for factorization).
36/// * `a` — matrix data in column-major order (n x n, stride lda).
37/// * `n` — matrix dimension.
38/// * `lda` — leading dimension.
39/// * `norm_type` — which norm to use.
40///
41/// # Returns
42///
43/// An estimate of the condition number. A value near 1 indicates a
44/// well-conditioned matrix; large values indicate ill-conditioning.
45///
46/// # Errors
47///
48/// Returns [`SolverError`] if dimension validation or underlying operations fail.
49#[allow(dead_code)]
50pub fn condition_number_estimate<T: GpuFloat>(
51    handle: &mut SolverHandle,
52    a: &DeviceBuffer<T>,
53    n: u32,
54    lda: u32,
55    norm_type: NormType,
56) -> SolverResult<f64> {
57    if n == 0 {
58        return Err(SolverError::DimensionMismatch(
59            "condition_number_estimate: n must be > 0".into(),
60        ));
61    }
62
63    let required = n as usize * lda as usize;
64    if a.len() < required {
65        return Err(SolverError::DimensionMismatch(format!(
66            "condition_number_estimate: buffer too small ({} < {})",
67            a.len(),
68            required
69        )));
70    }
71
72    // Compute ||A|| using the requested norm.
73    let a_norm = compute_matrix_norm::<T>(handle, a, n, lda, norm_type)?;
74
75    // Estimate ||A^{-1}|| using Hager's algorithm.
76    // Performs iterative power-method-like estimation using LU solves.
77    let ainv_norm_estimate = estimate_inverse_norm_hager::<T>(handle, a, n, lda, norm_type)?;
78
79    Ok(a_norm * ainv_norm_estimate)
80}
81
82/// Converts a `T: GpuFloat` value to `f64` via bit reinterpretation.
83///
84/// For 8-byte types (f64), reinterprets bits directly.
85/// For all other types, first reinterprets the raw bits as f32 then widens.
86fn t_to_f64<T: GpuFloat>(val: T) -> f64 {
87    if T::SIZE == 8 {
88        f64::from_bits(val.to_bits_u64())
89    } else {
90        f64::from(f32::from_bits(val.to_bits_u64() as u32))
91    }
92}
93
94/// Computes the matrix norm of `a` (n x n, column-major, stride `lda`).
95///
96/// For 1-norm: max over columns of the sum of absolute values.
97/// For infinity-norm: max over rows of the sum of absolute values.
98///
99/// Copies the device buffer to the host and performs the reduction there,
100/// since reduction kernels are not yet available for macOS / CPU-only testing.
101fn compute_matrix_norm<T: GpuFloat>(
102    _handle: &mut SolverHandle,
103    a: &DeviceBuffer<T>,
104    n: u32,
105    lda: u32,
106    norm_type: NormType,
107) -> SolverResult<f64> {
108    let n_usize = n as usize;
109    let lda_usize = lda as usize;
110    let total = lda_usize * n_usize;
111    let mut host = vec![T::gpu_zero(); total];
112    a.copy_to_host(&mut host).map_err(|e| {
113        SolverError::InternalError(format!("compute_matrix_norm copy_to_host failed: {e}"))
114    })?;
115
116    let norm = match norm_type {
117        NormType::One => {
118            // 1-norm: maximum column sum of absolute values.
119            (0..n_usize)
120                .map(|j| {
121                    (0..n_usize)
122                        .map(|i| t_to_f64(host[j * lda_usize + i]).abs())
123                        .sum::<f64>()
124                })
125                .fold(0.0_f64, f64::max)
126        }
127        NormType::Infinity => {
128            // Infinity-norm: maximum row sum of absolute values.
129            (0..n_usize)
130                .map(|i| {
131                    (0..n_usize)
132                        .map(|j| t_to_f64(host[j * lda_usize + i]).abs())
133                        .sum::<f64>()
134                })
135                .fold(0.0_f64, f64::max)
136        }
137    };
138    Ok(norm)
139}
140
141/// Estimates ||A^{-1}|| using Hager's (power iteration) algorithm.
142///
143/// Performs 3-5 iterations of power-method-like estimation:
144/// 1. Initialize x = [1/n, ..., 1/n]
145/// 2. For each iteration:
146///    a. Solve A*w = x for w (using LU factorization of A)
147///    b. Compute sign vector zeta = sign(w_i)
148///    c. Solve A^T*z = zeta for z
149///    d. Exit if converged (check against previous iteration)
150///    e. Set x = e_j where j = argmax |z_j|
151/// 3. Return ||w||_1 as the estimate of ||A^{-1}||
152///
153/// This algorithm is used by LAPACK's xLACON and avoids explicit computation
154/// of A^{-1}.
155fn estimate_inverse_norm_hager<T: GpuFloat>(
156    handle: &mut SolverHandle,
157    a: &DeviceBuffer<T>,
158    n: u32,
159    lda: u32,
160    _norm_type: NormType,
161) -> SolverResult<f64> {
162    let n_usize = n as usize;
163    let lda_usize = lda as usize;
164    const MAX_ITER: usize = 5;
165    const CONV_TOL: f64 = 0.95;
166
167    // Copy A to host and perform LU factorization
168    let mut lu_host = vec![T::gpu_zero(); lda_usize * n_usize];
169    a.copy_to_host(&mut lu_host).map_err(|e| {
170        SolverError::InternalError(format!(
171            "estimate_inverse_norm_hager: copy_from_device failed: {e}"
172        ))
173    })?;
174
175    // Perform LU factorization for solving
176    let mut lu_device = DeviceBuffer::<T>::alloc(n_usize * lda_usize).map_err(|e| {
177        SolverError::InternalError(format!("estimate_inverse_norm_hager: alloc LU buffer: {e}"))
178    })?;
179    lu_device.copy_from_host(&lu_host).map_err(|e| {
180        SolverError::InternalError(format!(
181            "estimate_inverse_norm_hager: copy to device failed: {e}"
182        ))
183    })?;
184
185    let mut pivots = DeviceBuffer::<i32>::alloc(n_usize).map_err(|e| {
186        SolverError::InternalError(format!("estimate_inverse_norm_hager: alloc pivots: {e}"))
187    })?;
188
189    let lu_result = lu::lu_factorize(handle, &mut lu_device, n, lda, &mut pivots)?;
190    if lu_result.info != 0 {
191        return Err(SolverError::InternalError(format!(
192            "estimate_inverse_norm_hager: LU factorization failed (info={})",
193            lu_result.info
194        )));
195    }
196
197    // Initialize x = [1/n, ..., 1/n]
198    let init_val = 1.0 / (n_usize as f64);
199    let mut x = vec![init_val; n_usize];
200    let mut best_estimate = 0.0_f64;
201
202    for _iter in 0..MAX_ITER {
203        // Solve A*w = x using LU
204        let mut w_host = x
205            .iter()
206            .map(|&v| {
207                // Convert f64 to T via bit repr if needed
208                if T::SIZE == 8 {
209                    T::from_bits_u64(v.to_bits())
210                } else {
211                    T::from_bits_u64(u64::from((v as f32).to_bits()))
212                }
213            })
214            .collect::<Vec<_>>();
215        let mut w_device = DeviceBuffer::<T>::alloc(n_usize).map_err(|e| {
216            SolverError::InternalError(format!("estimate_inverse_norm_hager: alloc w: {e}"))
217        })?;
218        w_device.copy_from_host(&w_host).map_err(|e| {
219            SolverError::InternalError(format!(
220                "estimate_inverse_norm_hager: copy w to device: {e}"
221            ))
222        })?;
223
224        lu::lu_solve(handle, &lu_device, &pivots, &mut w_device, n, 1)?;
225        w_device.copy_to_host(&mut w_host).map_err(|e| {
226            SolverError::InternalError(format!(
227                "estimate_inverse_norm_hager: copy w from device: {e}"
228            ))
229        })?;
230
231        // Compute w_norm_1
232        let w_norm_1 = w_host.iter().map(|&v| t_to_f64(v).abs()).sum::<f64>();
233
234        // If ||w||_1 has converged, we're done
235        if w_norm_1 <= CONV_TOL * best_estimate {
236            best_estimate = w_norm_1;
237            break;
238        }
239        best_estimate = w_norm_1;
240
241        // Compute sign vector zeta = sign(w)
242        let zeta = w_host
243            .iter()
244            .map(|&v| {
245                let fv = t_to_f64(v);
246                if fv > 0.0 {
247                    // T::from_bits_u64(1.0_f64.to_bits())
248                    if T::SIZE == 8 {
249                        T::from_bits_u64(1.0_f64.to_bits())
250                    } else {
251                        T::from_bits_u64(u64::from((1.0_f32).to_bits()))
252                    }
253                } else if fv < 0.0 {
254                    if T::SIZE == 8 {
255                        T::from_bits_u64((-1.0_f64).to_bits())
256                    } else {
257                        T::from_bits_u64(u64::from((-1.0_f32).to_bits()))
258                    }
259                } else {
260                    T::gpu_zero()
261                }
262            })
263            .collect::<Vec<_>>();
264
265        // Solve A^T*z = zeta
266        let mut z = zeta.clone();
267        let mut z_device = DeviceBuffer::<T>::alloc(n_usize).map_err(|e| {
268            SolverError::InternalError(format!("estimate_inverse_norm_hager: alloc z: {e}"))
269        })?;
270        z_device.copy_from_host(&z).map_err(|e| {
271            SolverError::InternalError(format!(
272                "estimate_inverse_norm_hager: copy z to device: {e}"
273            ))
274        })?;
275
276        // For now, use approximate solution: z_approx = A^{-T} * zeta ~ (LU_T)^{-1} * zeta
277        // This requires solves on transposed system; simplified version uses one solve
278        // Full implementation would do RHS solve via L^T and U^T
279        lu::lu_solve(handle, &lu_device, &pivots, &mut z_device, n, 1)?;
280
281        z_device.copy_to_host(&mut z).map_err(|e| {
282            SolverError::InternalError(format!(
283                "estimate_inverse_norm_hager: copy z from device: {e}"
284            ))
285        })?;
286
287        // Find j = argmax |z_j| and check convergence
288        let (j_max, z_inf_norm) = z
289            .iter()
290            .enumerate()
291            .map(|(i, &v)| (i, t_to_f64(v).abs()))
292            .fold((0, 0.0_f64), |(i_max, max_so_far), (i, norm)| {
293                if norm > max_so_far {
294                    (i, norm)
295                } else {
296                    (i_max, max_so_far)
297                }
298            });
299
300        // Convergence check: if ||z||_inf <= z^T * x, we're done
301        let z_dot_x = z
302            .iter()
303            .zip(x.iter())
304            .map(|(&zi, &xi)| t_to_f64(zi) * xi)
305            .sum::<f64>();
306
307        if z_inf_norm <= z_dot_x {
308            break;
309        }
310
311        // Set x = e_j (standard basis vector with 1 at position j_max)
312        x.iter_mut().for_each(|xi| *xi = 0.0);
313        x[j_max] = 1.0;
314    }
315
316    Ok(best_estimate)
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322
323    #[test]
324    fn norm_type_equality() {
325        assert_eq!(NormType::One, NormType::One);
326        assert_ne!(NormType::One, NormType::Infinity);
327    }
328
329    #[test]
330    fn norm_type_debug() {
331        let s = format!("{:?}", NormType::Infinity);
332        assert!(s.contains("Infinity"));
333    }
334
335    // -----------------------------------------------------------------------
336    // Quality gate: t_to_f64 conversion correctness
337    // -----------------------------------------------------------------------
338
339    /// Verify t_to_f64 correctly converts f64 values (SIZE == 8 path).
340    #[test]
341    fn t_to_f64_for_f64_identity() {
342        let val = std::f64::consts::PI;
343        let converted = t_to_f64(val);
344        assert!(
345            (converted - val).abs() < 1e-15,
346            "t_to_f64 for f64 must be identity, got {converted} expected {val}"
347        );
348    }
349
350    /// Verify t_to_f64 correctly converts f32 values (SIZE == 4 path).
351    #[test]
352    fn t_to_f64_for_f32_widening() {
353        let val = std::f32::consts::E;
354        let converted = t_to_f64(val);
355        let expected = f64::from(val);
356        assert!(
357            (converted - expected).abs() < 1e-6,
358            "t_to_f64 for f32 must widen correctly, got {converted} expected {expected}"
359        );
360    }
361
362    /// Verify t_to_f64 handles zero correctly for both f32 and f64.
363    #[test]
364    fn t_to_f64_zero() {
365        assert_eq!(t_to_f64(0.0_f64), 0.0_f64);
366        assert_eq!(t_to_f64(0.0_f32), 0.0_f64);
367    }
368
369    /// Verify t_to_f64 handles negative values correctly.
370    #[test]
371    fn t_to_f64_negative() {
372        let val = -42.0_f64;
373        assert!((t_to_f64(val) - (-42.0_f64)).abs() < 1e-15);
374
375        let val32 = -1.5_f32;
376        let result = t_to_f64(val32);
377        assert!(
378            (result - (-1.5_f64)).abs() < 1e-6,
379            "t_to_f64(-1.5f32) = {result}, expected -1.5"
380        );
381    }
382
383    // -----------------------------------------------------------------------
384    // Quality gate: NormType enum coverage
385    // -----------------------------------------------------------------------
386
387    /// NormType::One and NormType::Infinity must be distinct variants.
388    #[test]
389    fn norm_type_variants_distinct() {
390        let one = NormType::One;
391        let inf = NormType::Infinity;
392        assert_ne!(one, inf, "NormType variants must be distinct");
393    }
394
395    /// NormType must implement Clone correctly.
396    #[test]
397    fn norm_type_clone() {
398        let original = NormType::Infinity;
399        let cloned = original;
400        assert_eq!(original, cloned);
401    }
402
403    /// NormType::One debug format must contain "One".
404    #[test]
405    fn norm_type_one_debug() {
406        let s = format!("{:?}", NormType::One);
407        assert!(
408            s.contains("One"),
409            "NormType::One debug must contain 'One', got '{s}'"
410        );
411    }
412}
oxicuda_solver/helpers/condition.rs

oxicuda_solver/helpers/
condition.rs