oxicuda_solver/helpers/
condition.rs1use oxicuda_blas::GpuFloat;
9use oxicuda_memory::DeviceBuffer;
10
11use crate::dense::lu;
12use crate::error::{SolverError, SolverResult};
13use crate::handle::SolverHandle;
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum NormType {
18 One,
20 Infinity,
22}
23
24#[allow(dead_code)]
50pub fn condition_number_estimate<T: GpuFloat>(
51 handle: &mut SolverHandle,
52 a: &DeviceBuffer<T>,
53 n: u32,
54 lda: u32,
55 norm_type: NormType,
56) -> SolverResult<f64> {
57 if n == 0 {
58 return Err(SolverError::DimensionMismatch(
59 "condition_number_estimate: n must be > 0".into(),
60 ));
61 }
62
63 let required = n as usize * lda as usize;
64 if a.len() < required {
65 return Err(SolverError::DimensionMismatch(format!(
66 "condition_number_estimate: buffer too small ({} < {})",
67 a.len(),
68 required
69 )));
70 }
71
72 let a_norm = compute_matrix_norm::<T>(handle, a, n, lda, norm_type)?;
74
75 let ainv_norm_estimate = estimate_inverse_norm_hager::<T>(handle, a, n, lda, norm_type)?;
78
79 Ok(a_norm * ainv_norm_estimate)
80}
81
82fn t_to_f64<T: GpuFloat>(val: T) -> f64 {
87 if T::SIZE == 8 {
88 f64::from_bits(val.to_bits_u64())
89 } else {
90 f64::from(f32::from_bits(val.to_bits_u64() as u32))
91 }
92}
93
94fn compute_matrix_norm<T: GpuFloat>(
102 _handle: &mut SolverHandle,
103 a: &DeviceBuffer<T>,
104 n: u32,
105 lda: u32,
106 norm_type: NormType,
107) -> SolverResult<f64> {
108 let n_usize = n as usize;
109 let lda_usize = lda as usize;
110 let total = lda_usize * n_usize;
111 let mut host = vec![T::gpu_zero(); total];
112 a.copy_to_host(&mut host).map_err(|e| {
113 SolverError::InternalError(format!("compute_matrix_norm copy_to_host failed: {e}"))
114 })?;
115
116 let norm = match norm_type {
117 NormType::One => {
118 (0..n_usize)
120 .map(|j| {
121 (0..n_usize)
122 .map(|i| t_to_f64(host[j * lda_usize + i]).abs())
123 .sum::<f64>()
124 })
125 .fold(0.0_f64, f64::max)
126 }
127 NormType::Infinity => {
128 (0..n_usize)
130 .map(|i| {
131 (0..n_usize)
132 .map(|j| t_to_f64(host[j * lda_usize + i]).abs())
133 .sum::<f64>()
134 })
135 .fold(0.0_f64, f64::max)
136 }
137 };
138 Ok(norm)
139}
140
141fn estimate_inverse_norm_hager<T: GpuFloat>(
156 handle: &mut SolverHandle,
157 a: &DeviceBuffer<T>,
158 n: u32,
159 lda: u32,
160 _norm_type: NormType,
161) -> SolverResult<f64> {
162 let n_usize = n as usize;
163 let lda_usize = lda as usize;
164 const MAX_ITER: usize = 5;
165 const CONV_TOL: f64 = 0.95;
166
167 let mut lu_host = vec![T::gpu_zero(); lda_usize * n_usize];
169 a.copy_to_host(&mut lu_host).map_err(|e| {
170 SolverError::InternalError(format!(
171 "estimate_inverse_norm_hager: copy_from_device failed: {e}"
172 ))
173 })?;
174
175 let mut lu_device = DeviceBuffer::<T>::alloc(n_usize * lda_usize).map_err(|e| {
177 SolverError::InternalError(format!("estimate_inverse_norm_hager: alloc LU buffer: {e}"))
178 })?;
179 lu_device.copy_from_host(&lu_host).map_err(|e| {
180 SolverError::InternalError(format!(
181 "estimate_inverse_norm_hager: copy to device failed: {e}"
182 ))
183 })?;
184
185 let mut pivots = DeviceBuffer::<i32>::alloc(n_usize).map_err(|e| {
186 SolverError::InternalError(format!("estimate_inverse_norm_hager: alloc pivots: {e}"))
187 })?;
188
189 let lu_result = lu::lu_factorize(handle, &mut lu_device, n, lda, &mut pivots)?;
190 if lu_result.info != 0 {
191 return Err(SolverError::InternalError(format!(
192 "estimate_inverse_norm_hager: LU factorization failed (info={})",
193 lu_result.info
194 )));
195 }
196
197 let init_val = 1.0 / (n_usize as f64);
199 let mut x = vec![init_val; n_usize];
200 let mut best_estimate = 0.0_f64;
201
202 for _iter in 0..MAX_ITER {
203 let mut w_host = x
205 .iter()
206 .map(|&v| {
207 if T::SIZE == 8 {
209 T::from_bits_u64(v.to_bits())
210 } else {
211 T::from_bits_u64(u64::from((v as f32).to_bits()))
212 }
213 })
214 .collect::<Vec<_>>();
215 let mut w_device = DeviceBuffer::<T>::alloc(n_usize).map_err(|e| {
216 SolverError::InternalError(format!("estimate_inverse_norm_hager: alloc w: {e}"))
217 })?;
218 w_device.copy_from_host(&w_host).map_err(|e| {
219 SolverError::InternalError(format!(
220 "estimate_inverse_norm_hager: copy w to device: {e}"
221 ))
222 })?;
223
224 lu::lu_solve(handle, &lu_device, &pivots, &mut w_device, n, 1)?;
225 w_device.copy_to_host(&mut w_host).map_err(|e| {
226 SolverError::InternalError(format!(
227 "estimate_inverse_norm_hager: copy w from device: {e}"
228 ))
229 })?;
230
231 let w_norm_1 = w_host.iter().map(|&v| t_to_f64(v).abs()).sum::<f64>();
233
234 if w_norm_1 <= CONV_TOL * best_estimate {
236 best_estimate = w_norm_1;
237 break;
238 }
239 best_estimate = w_norm_1;
240
241 let zeta = w_host
243 .iter()
244 .map(|&v| {
245 let fv = t_to_f64(v);
246 if fv > 0.0 {
247 if T::SIZE == 8 {
249 T::from_bits_u64(1.0_f64.to_bits())
250 } else {
251 T::from_bits_u64(u64::from((1.0_f32).to_bits()))
252 }
253 } else if fv < 0.0 {
254 if T::SIZE == 8 {
255 T::from_bits_u64((-1.0_f64).to_bits())
256 } else {
257 T::from_bits_u64(u64::from((-1.0_f32).to_bits()))
258 }
259 } else {
260 T::gpu_zero()
261 }
262 })
263 .collect::<Vec<_>>();
264
265 let mut z = zeta.clone();
267 let mut z_device = DeviceBuffer::<T>::alloc(n_usize).map_err(|e| {
268 SolverError::InternalError(format!("estimate_inverse_norm_hager: alloc z: {e}"))
269 })?;
270 z_device.copy_from_host(&z).map_err(|e| {
271 SolverError::InternalError(format!(
272 "estimate_inverse_norm_hager: copy z to device: {e}"
273 ))
274 })?;
275
276 lu::lu_solve(handle, &lu_device, &pivots, &mut z_device, n, 1)?;
280
281 z_device.copy_to_host(&mut z).map_err(|e| {
282 SolverError::InternalError(format!(
283 "estimate_inverse_norm_hager: copy z from device: {e}"
284 ))
285 })?;
286
287 let (j_max, z_inf_norm) = z
289 .iter()
290 .enumerate()
291 .map(|(i, &v)| (i, t_to_f64(v).abs()))
292 .fold((0, 0.0_f64), |(i_max, max_so_far), (i, norm)| {
293 if norm > max_so_far {
294 (i, norm)
295 } else {
296 (i_max, max_so_far)
297 }
298 });
299
300 let z_dot_x = z
302 .iter()
303 .zip(x.iter())
304 .map(|(&zi, &xi)| t_to_f64(zi) * xi)
305 .sum::<f64>();
306
307 if z_inf_norm <= z_dot_x {
308 break;
309 }
310
311 x.iter_mut().for_each(|xi| *xi = 0.0);
313 x[j_max] = 1.0;
314 }
315
316 Ok(best_estimate)
317}
318
319#[cfg(test)]
320mod tests {
321 use super::*;
322
323 #[test]
324 fn norm_type_equality() {
325 assert_eq!(NormType::One, NormType::One);
326 assert_ne!(NormType::One, NormType::Infinity);
327 }
328
329 #[test]
330 fn norm_type_debug() {
331 let s = format!("{:?}", NormType::Infinity);
332 assert!(s.contains("Infinity"));
333 }
334
335 #[test]
341 fn t_to_f64_for_f64_identity() {
342 let val = std::f64::consts::PI;
343 let converted = t_to_f64(val);
344 assert!(
345 (converted - val).abs() < 1e-15,
346 "t_to_f64 for f64 must be identity, got {converted} expected {val}"
347 );
348 }
349
350 #[test]
352 fn t_to_f64_for_f32_widening() {
353 let val = std::f32::consts::E;
354 let converted = t_to_f64(val);
355 let expected = f64::from(val);
356 assert!(
357 (converted - expected).abs() < 1e-6,
358 "t_to_f64 for f32 must widen correctly, got {converted} expected {expected}"
359 );
360 }
361
362 #[test]
364 fn t_to_f64_zero() {
365 assert_eq!(t_to_f64(0.0_f64), 0.0_f64);
366 assert_eq!(t_to_f64(0.0_f32), 0.0_f64);
367 }
368
369 #[test]
371 fn t_to_f64_negative() {
372 let val = -42.0_f64;
373 assert!((t_to_f64(val) - (-42.0_f64)).abs() < 1e-15);
374
375 let val32 = -1.5_f32;
376 let result = t_to_f64(val32);
377 assert!(
378 (result - (-1.5_f64)).abs() < 1e-6,
379 "t_to_f64(-1.5f32) = {result}, expected -1.5"
380 );
381 }
382
383 #[test]
389 fn norm_type_variants_distinct() {
390 let one = NormType::One;
391 let inf = NormType::Infinity;
392 assert_ne!(one, inf, "NormType variants must be distinct");
393 }
394
395 #[test]
397 fn norm_type_clone() {
398 let original = NormType::Infinity;
399 let cloned = original;
400 assert_eq!(original, cloned);
401 }
402
403 #[test]
405 fn norm_type_one_debug() {
406 let s = format!("{:?}", NormType::One);
407 assert!(
408 s.contains("One"),
409 "NormType::One debug must contain 'One', got '{s}'"
410 );
411 }
412}