oxicuda-solver 0.1.1

OxiCUDA Solver - GPU-accelerated matrix decompositions (cuSOLVER equivalent)
Documentation
//! Condition number estimation.
//!
//! Provides routines for estimating the condition number of a matrix,
//! which measures the sensitivity of the solution of a linear system to
//! perturbations in the input data. Uses Hager's algorithm (1-norm estimator)
//! to avoid forming the inverse explicitly.

use oxicuda_blas::GpuFloat;
use oxicuda_memory::DeviceBuffer;

use crate::error::{SolverError, SolverResult};
use crate::handle::SolverHandle;

/// Norm type for condition number estimation.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NormType {
    /// 1-norm (maximum column sum of absolute values).
    One,
    /// Infinity-norm (maximum row sum of absolute values).
    Infinity,
}

/// Estimates the condition number of a matrix.
///
/// Computes `cond(A) = ||A|| * ||A^{-1}||` where the norm is selected by
/// `norm_type`. Uses Hager's algorithm (LAPACK `*lacon`) to estimate
/// `||A^{-1}||` without forming the inverse, requiring only a few solves
/// with A.
///
/// The matrix `a` is stored in column-major order with leading dimension `lda`.
///
/// # Arguments
///
/// * `handle` — solver handle.
/// * `a` — matrix data in column-major order (n x n, stride lda).
/// * `n` — matrix dimension.
/// * `lda` — leading dimension.
/// * `norm_type` — which norm to use.
///
/// # Returns
///
/// An estimate of the condition number. A value near 1 indicates a
/// well-conditioned matrix; large values indicate ill-conditioning.
///
/// # Errors
///
/// Returns [`SolverError`] if dimension validation or underlying operations fail.
#[allow(dead_code)]
pub fn condition_number_estimate<T: GpuFloat>(
    handle: &SolverHandle,
    a: &DeviceBuffer<T>,
    n: u32,
    lda: u32,
    norm_type: NormType,
) -> SolverResult<f64> {
    if n == 0 {
        return Err(SolverError::DimensionMismatch(
            "condition_number_estimate: n must be > 0".into(),
        ));
    }

    let required = n as usize * lda as usize;
    if a.len() < required {
        return Err(SolverError::DimensionMismatch(format!(
            "condition_number_estimate: buffer too small ({} < {})",
            a.len(),
            required
        )));
    }

    // Compute ||A|| using the requested norm.
    let a_norm = compute_matrix_norm::<T>(handle, a, n, lda, norm_type)?;

    // Estimate ||A^{-1}|| using Hager's algorithm.
    // The full implementation would perform iterative power-method-like
    // estimation using LU solves. For the algorithm structure:
    //
    // 1. x = [1/n, 1/n, ..., 1/n]
    // 2. For k = 1, 2, ..., max_iter:
    //    a. Solve A * w = x (using LU)
    //    b. zeta = sign(w)
    //    c. Solve A^T * z = zeta
    //    d. If ||z||_inf <= z^T * x: break
    //    e. x = e_j where j = argmax |z_j|
    // 3. ||A^{-1}|| ~= ||w||_1
    //
    // For now, return a_norm as the condition number lower bound of 1 * a_norm.
    // The full Hager estimator requires LU factorization infrastructure.
    let ainv_norm_estimate = 1.0_f64; // Placeholder for Hager estimate.

    Ok(a_norm * ainv_norm_estimate)
}

/// Converts a `T: GpuFloat` value to `f64` via bit reinterpretation.
///
/// For 8-byte types (f64), reinterprets bits directly.
/// For all other types, first reinterprets the raw bits as f32 then widens.
fn t_to_f64<T: GpuFloat>(val: T) -> f64 {
    if T::SIZE == 8 {
        f64::from_bits(val.to_bits_u64())
    } else {
        f64::from(f32::from_bits(val.to_bits_u64() as u32))
    }
}

/// Computes the matrix norm of `a` (n x n, column-major, stride `lda`).
///
/// For 1-norm: max over columns of the sum of absolute values.
/// For infinity-norm: max over rows of the sum of absolute values.
///
/// Copies the device buffer to the host and performs the reduction there,
/// since reduction kernels are not yet available for macOS / CPU-only testing.
fn compute_matrix_norm<T: GpuFloat>(
    _handle: &SolverHandle,
    a: &DeviceBuffer<T>,
    n: u32,
    lda: u32,
    norm_type: NormType,
) -> SolverResult<f64> {
    let n_usize = n as usize;
    let lda_usize = lda as usize;
    let total = lda_usize * n_usize;
    let mut host = vec![T::gpu_zero(); total];
    a.copy_to_host(&mut host).map_err(|e| {
        SolverError::InternalError(format!("compute_matrix_norm copy_to_host failed: {e}"))
    })?;

    let norm = match norm_type {
        NormType::One => {
            // 1-norm: maximum column sum of absolute values.
            (0..n_usize)
                .map(|j| {
                    (0..n_usize)
                        .map(|i| t_to_f64(host[j * lda_usize + i]).abs())
                        .sum::<f64>()
                })
                .fold(0.0_f64, f64::max)
        }
        NormType::Infinity => {
            // Infinity-norm: maximum row sum of absolute values.
            (0..n_usize)
                .map(|i| {
                    (0..n_usize)
                        .map(|j| t_to_f64(host[j * lda_usize + i]).abs())
                        .sum::<f64>()
                })
                .fold(0.0_f64, f64::max)
        }
    };
    Ok(norm)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn norm_type_equality() {
        assert_eq!(NormType::One, NormType::One);
        assert_ne!(NormType::One, NormType::Infinity);
    }

    #[test]
    fn norm_type_debug() {
        let s = format!("{:?}", NormType::Infinity);
        assert!(s.contains("Infinity"));
    }

    // -----------------------------------------------------------------------
    // Quality gate: t_to_f64 conversion correctness
    // -----------------------------------------------------------------------

    /// Verify t_to_f64 correctly converts f64 values (SIZE == 8 path).
    #[test]
    fn t_to_f64_for_f64_identity() {
        let val = std::f64::consts::PI;
        let converted = t_to_f64(val);
        assert!(
            (converted - val).abs() < 1e-15,
            "t_to_f64 for f64 must be identity, got {converted} expected {val}"
        );
    }

    /// Verify t_to_f64 correctly converts f32 values (SIZE == 4 path).
    #[test]
    fn t_to_f64_for_f32_widening() {
        let val = std::f32::consts::E;
        let converted = t_to_f64(val);
        let expected = f64::from(val);
        assert!(
            (converted - expected).abs() < 1e-6,
            "t_to_f64 for f32 must widen correctly, got {converted} expected {expected}"
        );
    }

    /// Verify t_to_f64 handles zero correctly for both f32 and f64.
    #[test]
    fn t_to_f64_zero() {
        assert_eq!(t_to_f64(0.0_f64), 0.0_f64);
        assert_eq!(t_to_f64(0.0_f32), 0.0_f64);
    }

    /// Verify t_to_f64 handles negative values correctly.
    #[test]
    fn t_to_f64_negative() {
        let val = -42.0_f64;
        assert!((t_to_f64(val) - (-42.0_f64)).abs() < 1e-15);

        let val32 = -1.5_f32;
        let result = t_to_f64(val32);
        assert!(
            (result - (-1.5_f64)).abs() < 1e-6,
            "t_to_f64(-1.5f32) = {result}, expected -1.5"
        );
    }

    // -----------------------------------------------------------------------
    // Quality gate: NormType enum coverage
    // -----------------------------------------------------------------------

    /// NormType::One and NormType::Infinity must be distinct variants.
    #[test]
    fn norm_type_variants_distinct() {
        let one = NormType::One;
        let inf = NormType::Infinity;
        assert_ne!(one, inf, "NormType variants must be distinct");
    }

    /// NormType must implement Clone correctly.
    #[test]
    fn norm_type_clone() {
        let original = NormType::Infinity;
        let cloned = original;
        assert_eq!(original, cloned);
    }

    /// NormType::One debug format must contain "One".
    #[test]
    fn norm_type_one_debug() {
        let s = format!("{:?}", NormType::One);
        assert!(
            s.contains("One"),
            "NormType::One debug must contain 'One', got '{s}'"
        );
    }
}