#![cfg(feature = "backend-faer")]
#[cfg(feature = "complex")]
use crate::algebra::bridge::{BridgeScratch, copy_real_into_scalar, copy_scalar_to_real_in};
use crate::algebra::scalar::KrystScalar;
#[cfg(feature = "complex")]
use crate::algebra::scalar::S as GlobalScalar;
use crate::error::KError;
use crate::matrix::sparse::CsrMatrix;
#[cfg(feature = "complex")]
use crate::ops::kpc::KPreconditioner;
use crate::preconditioner::LocalPreconditioner;
use crate::preconditioner::stats::{ParIluHistory, ParIluIterSample};
use crate::preconditioner::{PcSide, legacy::Preconditioner, pivot::*, tri_solve::TriangularSolve};
use crate::utils::conditioning::{apply_dense_transforms, ConditioningOptions};
use crate::utils::metrics::{Counters, SolveTimer};
use crate::utils::monitor::{Event, Monitor};
use faer::Mat;
#[cfg(feature = "rayon")]
use std::sync::Arc;
use std::sync::Mutex;
#[cfg(feature = "rayon")]
use rayon::prelude::*;
#[cfg(feature = "logging")]
use log::{debug, info, trace, warn};
type S = f64;
type Real = f64;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum IluType {
ILU0 = 0,
ILUK = 1,
ILUT = 2,
MILU0 = 3,
BlockJacobi = 10,
GmresIluk = 20,
GmresIlut = 21,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ReorderingType {
None = 0,
RCM = 1,
AMD = 2,
Natural = 3,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum TriSolveType {
Exact = 0,
Jacobi = 1,
GaussSeidel = 2,
}
#[derive(Clone, Debug)]
pub struct IluConfig {
pub ilu_type: IluType,
pub level_of_fill: usize,
pub max_fill_per_row: usize,
pub drop_tolerance: Real,
pub offdiag_drop_tolerance: Real,
pub schur_drop_tolerance: Real,
pub reordering_type: ReorderingType,
pub triangular_solve: TriSolveType,
pub lower_jacobi_iters: usize,
pub upper_jacobi_iters: usize,
pub tolerance: Real,
pub max_iterations: usize,
pub logging_level: usize,
pub print_level: usize,
pub ieee_checks: bool,
pub optimize_workspace: bool,
pub pivot_policy: PivotPolicy,
pub enable_parallel_factorization: bool,
pub enable_parallel_triangular_solve: bool,
pub parallel_chunk_size: usize,
pub enable_distributed: bool,
pub parilu_enabled: bool,
pub parilu_max_iters: usize,
pub parilu_min_iters: usize,
pub parilu_tol: Real,
pub parilu_omega: Real,
pub conditioning: ConditioningOptions,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum ParFactorizationMode {
Serial,
Block,
ParIlu,
}
impl IluConfig {
fn par_factor_mode(&self) -> ParFactorizationMode {
if !self.enable_parallel_factorization {
ParFactorizationMode::Serial
} else if self.parilu_enabled {
ParFactorizationMode::ParIlu
} else {
ParFactorizationMode::Block
}
}
}
impl Default for IluConfig {
fn default() -> Self {
Self {
ilu_type: IluType::ILU0,
level_of_fill: 0, max_fill_per_row: 0, drop_tolerance: 1e-4, offdiag_drop_tolerance: 1e-4, schur_drop_tolerance: 1e-4, reordering_type: ReorderingType::None, triangular_solve: TriSolveType::Exact, lower_jacobi_iters: 1, upper_jacobi_iters: 1, tolerance: 1e-6, max_iterations: 1, logging_level: 0, print_level: 0, ieee_checks: true, optimize_workspace: true, pivot_policy: PivotPolicy::default(),
enable_parallel_factorization: false, enable_parallel_triangular_solve: false, parallel_chunk_size: 64, enable_distributed: false, parilu_enabled: false,
parilu_max_iters: 0,
parilu_min_iters: 0,
parilu_tol: 1e-2,
parilu_omega: 1.0,
conditioning: ConditioningOptions::default(),
}
}
}
#[cfg(feature = "logging")]
fn print_ilu_banner(cfg: &IluConfig) {
if cfg.logging_level == 0 {
return;
}
info!("ILU Setup:");
info!(" kind : {:?}", cfg.ilu_type);
info!(" reordering : {:?}", cfg.reordering_type);
let tri = match cfg.triangular_solve {
TriSolveType::Exact => "Exact".to_string(),
TriSolveType::Jacobi => format!(
"Jacobi (L:{} U:{})",
cfg.lower_jacobi_iters, cfg.upper_jacobi_iters
),
TriSolveType::GaussSeidel => "GaussSeidel".to_string(),
};
info!(" triangular solve : {tri}");
info!(
" iterative setup : tol={:.2e}, max_iter={}",
cfg.tolerance, cfg.max_iterations
);
info!(
" exec : distributed={}, par_factorization={}, par_trisolve={}",
cfg.enable_distributed,
cfg.enable_parallel_factorization,
cfg.enable_parallel_triangular_solve
);
info!(" pivot : {:?}", cfg.pivot_policy);
info!(
" parilu : enabled={}, max_iter={}, min_iter={}, tol={:.2e}, omega={:.2}",
cfg.parilu_enabled,
cfg.parilu_max_iters,
cfg.parilu_min_iters,
cfg.parilu_tol,
cfg.parilu_omega
);
}
pub struct IluBuilder {
config: IluConfig,
}
impl IluBuilder {
pub fn new() -> Self {
Self {
config: IluConfig::default(),
}
}
pub fn ilu_type(mut self, ilu_type: IluType) -> Self {
self.config.ilu_type = ilu_type;
self
}
pub fn level_of_fill(mut self, level: usize) -> Self {
self.config.level_of_fill = level;
self
}
pub fn max_fill_per_row(mut self, max_fill: usize) -> Self {
self.config.max_fill_per_row = max_fill;
self
}
pub fn drop_tolerance(mut self, tol: Real) -> Self {
self.config.drop_tolerance = tol;
self
}
pub fn enable_reordering(mut self, reordering: ReorderingType) -> Self {
self.config.reordering_type = reordering;
self
}
pub fn triangular_solve(mut self, solve_type: TriSolveType) -> Self {
self.config.triangular_solve = solve_type;
self
}
pub fn jacobi_iterations(mut self, lower: usize, upper: usize) -> Self {
self.config.lower_jacobi_iters = lower;
self.config.upper_jacobi_iters = upper;
self
}
pub fn enable_logging(mut self) -> Self {
self.config.logging_level = 1;
self
}
pub fn logging_level(mut self, level: usize) -> Self {
self.config.logging_level = level;
self
}
pub fn enable_printing(mut self) -> Self {
self.config.print_level = 1;
self
}
pub fn print_level(mut self, level: usize) -> Self {
self.config.print_level = level;
self
}
pub fn pivot_policy(mut self, policy: PivotPolicy) -> Self {
self.config.pivot_policy = policy;
self
}
pub fn enable_parallel_factorization(mut self) -> Self {
self.config.enable_parallel_factorization = true;
self
}
pub fn enable_parallel_triangular_solve(mut self) -> Self {
self.config.enable_parallel_triangular_solve = true;
self
}
pub fn parallel_chunk_size(mut self, chunk_size: usize) -> Self {
self.config.parallel_chunk_size = chunk_size;
self
}
pub fn enable_parallel(mut self) -> Self {
self.config.enable_parallel_factorization = true;
self.config.enable_parallel_triangular_solve = true;
self
}
pub fn enable_parilu(mut self, max_iters: usize, tol: Real, omega: Real) -> Self {
self.config.parilu_enabled = true;
self.config.parilu_max_iters = max_iters;
self.config.parilu_tol = tol;
self.config.parilu_omega = omega;
self
}
pub fn parilu_min_iters(mut self, min_iters: usize) -> Self {
self.config.parilu_min_iters = min_iters;
self
}
pub fn enable_distributed(mut self) -> Self {
self.config.enable_distributed = true;
self
}
pub fn build(self) -> Result<Ilu, KError> {
Ilu::new_with_config(self.config)
}
}
impl Default for IluBuilder {
fn default() -> Self {
Self::new()
}
}
pub struct Ilu {
config: IluConfig,
l: CsrMatrix<f64>,
u: CsrMatrix<f64>,
inv_diag_u: Vec<f64>,
#[allow(dead_code)]
row_perm: Vec<usize>,
#[allow(dead_code)]
col_perm: Vec<usize>,
workspace: IluWorkspace,
#[cfg(feature = "rayon")]
levels_l: Levels,
#[cfg(feature = "rayon")]
levels_u: Levels,
setup_complexity: f64,
nnz_l: usize,
nnz_u: usize,
num_zero_pivots: usize,
pivot_stats: PivotStats,
max_diag_a: f64,
row_inf_a: Vec<f64>,
row_gersh_a: Vec<f64>,
running_max_u: f64,
setup_time: f64,
solve_ctrs: Counters,
history: Option<ParIluHistory>,
monitor: Option<Box<dyn Monitor>>,
}
#[derive(Debug)]
pub struct IluWorkspace {
solve_buf: Mutex<Vec<f64>>,
temp2: Mutex<Vec<f64>>,
levels: Mutex<Vec<usize>>,
pattern_work: Mutex<Vec<bool>>,
size: usize,
}
impl IluWorkspace {
pub fn new(size: usize) -> Self {
Self {
solve_buf: Mutex::new(vec![0.0; size]),
temp2: Mutex::new(vec![0.0; size]),
levels: Mutex::new(vec![0; size]),
pattern_work: Mutex::new(vec![false; size]),
size,
}
}
pub fn ensure_size(&mut self, new_size: usize) {
if new_size > self.size {
self.solve_buf.lock().unwrap().resize(new_size, 0.0);
self.temp2.lock().unwrap().resize(new_size, 0.0);
self.levels.lock().unwrap().resize(new_size, 0);
self.pattern_work.lock().unwrap().resize(new_size, false);
self.size = new_size;
}
}
pub fn clear(&self) {
for x in self.solve_buf.lock().unwrap().iter_mut() {
*x = 0.0;
}
for x in self.temp2.lock().unwrap().iter_mut() {
*x = 0.0;
}
for x in self.levels.lock().unwrap().iter_mut() {
*x = 0;
}
for x in self.pattern_work.lock().unwrap().iter_mut() {
*x = false;
}
}
#[inline]
pub fn borrow_solve_buf(&self, n: usize) -> std::sync::MutexGuard<'_, Vec<f64>> {
debug_assert!(
self.size >= n,
"workspace not sized; call ensure_size in setup()"
);
self.solve_buf.lock().unwrap()
}
}
#[cfg(feature = "rayon")]
struct BlockFactorResult {
l: CsrMatrix<f64>,
u: CsrMatrix<f64>,
pivot_stats: PivotStats,
running_max: f64,
zero_pivots: usize,
}
#[cfg(feature = "rayon")]
#[derive(Clone, Debug, Default)]
struct Levels {
buckets: Vec<Vec<usize>>,
max_level: u32,
}
#[cfg(feature = "rayon")]
fn build_levels_lower(l: &CsrMatrix<f64>) -> Levels {
let n = l.nrows();
let mut lev = vec![0u32; n];
let mut maxl = 0u32;
for i in 0..n {
let (cols, _vals) = l.row(i);
let mut li = 0u32;
for &j in cols {
if j >= i {
continue;
}
li = li.max(lev[j] + 1);
}
lev[i] = li;
maxl = maxl.max(li);
}
let mut buckets = vec![Vec::new(); (maxl as usize) + 1];
for (i, &l) in lev.iter().enumerate() {
buckets[l as usize].push(i);
}
Levels {
buckets,
max_level: maxl,
}
}
#[cfg(feature = "rayon")]
fn build_levels_upper(u: &CsrMatrix<f64>) -> Levels {
let n = u.nrows();
let mut lev = vec![0u32; n];
let mut maxl = 0u32;
for i in (0..n).rev() {
let (cols, _vals) = u.row(i);
let mut li = 0u32;
for &j in cols {
if j <= i {
continue;
}
li = li.max(lev[j] + 1);
}
lev[i] = li;
maxl = maxl.max(li);
}
let mut buckets = vec![Vec::new(); (maxl as usize) + 1];
for (i, &l) in lev.iter().enumerate() {
buckets[l as usize].push(i);
}
Levels {
buckets,
max_level: maxl,
}
}
impl Ilu {
pub fn new() -> Self {
Self::new_with_config(IluConfig::default()).unwrap()
}
pub fn new_with_config(config: IluConfig) -> Result<Self, KError> {
Self::validate_config(&config)?;
#[cfg(feature = "logging")]
if config.logging_level > 0 {
info!(
"ILU Setup: Creating {:?} factorization with HYPRE-inspired configuration",
config.ilu_type
);
debug!(
"ILU Config: fill_level={}, drop_tol={:.2e}, reordering={:?}",
config.level_of_fill, config.drop_tolerance, config.reordering_type
);
}
Ok(Self {
config,
l: CsrMatrix::from_csr(0, 0, vec![0], Vec::new(), Vec::new()),
u: CsrMatrix::from_csr(0, 0, vec![0], Vec::new(), Vec::new()),
inv_diag_u: Vec::new(),
row_perm: Vec::new(),
col_perm: Vec::new(),
workspace: IluWorkspace::new(0),
#[cfg(feature = "rayon")]
levels_l: Levels::default(),
#[cfg(feature = "rayon")]
levels_u: Levels::default(),
setup_complexity: 0.0,
nnz_l: 0,
nnz_u: 0,
num_zero_pivots: 0,
pivot_stats: PivotStats::default(),
max_diag_a: Real::default(),
row_inf_a: Vec::new(),
row_gersh_a: Vec::new(),
running_max_u: Real::default(),
setup_time: 0.0,
solve_ctrs: Counters::new(),
history: None,
monitor: None,
})
}
fn validate_config(config: &IluConfig) -> Result<(), KError> {
if config.drop_tolerance < 0.0 {
return Err(KError::InvalidInput(
"drop_tolerance must be >= 0".to_string(),
));
}
if config.enable_parallel_triangular_solve && config.parallel_chunk_size == 0 {
return Err(KError::InvalidInput(
"parallel_chunk_size must be > 0 when parallel triangular solve is enabled"
.to_string(),
));
}
if config.tolerance <= 0.0 {
return Err(KError::InvalidInput("tolerance must be > 0".to_string()));
}
if config.parilu_omega <= 0.0 {
return Err(KError::InvalidInput(
"parilu_omega must be > 0 (relaxation factor)".to_string(),
));
}
if config.parilu_min_iters > config.parilu_max_iters {
return Err(KError::InvalidInput(
"parilu_min_iters cannot exceed parilu_max_iters".to_string(),
));
}
Ok(())
}
fn allow_parallel_factorization(&self, n: usize) -> bool {
if !self.config.enable_parallel_factorization {
return false;
}
#[cfg(feature = "rayon")]
{
if crate::algebra::parallel_cfg::force_serial() {
return false;
}
if crate::parallel::threads::current_rayon_threads() <= 1 {
return false;
}
let chunk_size = self.config.parallel_chunk_size.max(1);
if chunk_size == 1 {
return true;
}
let tune = crate::algebra::parallel_cfg::parallel_tune();
return n >= tune.min_rows_ilu_factorization;
}
#[cfg(not(feature = "rayon"))]
{
true
}
}
fn allow_parallel_triangular_solve(&self, n: usize) -> bool {
if !self.config.enable_parallel_triangular_solve {
return false;
}
#[cfg(feature = "rayon")]
{
if crate::algebra::parallel_cfg::force_serial() {
return false;
}
if crate::parallel::threads::current_rayon_threads() <= 1 {
return false;
}
let tune = crate::algebra::parallel_cfg::parallel_tune();
return n >= tune.min_rows_ilu_triangular;
}
#[cfg(not(feature = "rayon"))]
{
false
}
}
fn check_ieee_values(matrix: &Mat<f64>) -> Result<(), KError> {
for i in 0..matrix.nrows() {
for j in 0..matrix.ncols() {
let val = matrix[(i, j)];
if val.is_nan() {
return Err(KError::InvalidInput(format!(
"NaN detected in matrix at position ({i}, {j})"
)));
}
if val.is_infinite() {
return Err(KError::InvalidInput(format!(
"Infinity detected in matrix at position ({i}, {j})"
)));
}
}
}
Ok(())
}
fn validate_matrix(matrix: &Mat<f64>) -> Result<(), KError> {
if matrix.nrows() == 0 || matrix.ncols() == 0 {
return Err(KError::InvalidInput("Matrix cannot be empty".to_string()));
}
if matrix.nrows() != matrix.ncols() {
return Err(KError::InvalidInput(
"ILU requires square matrices".to_string(),
));
}
Ok(())
}
fn calculate_complexity(&self, original_nnz: usize) -> f64 {
let total_nnz = self.nnz_l + self.nnz_u;
if original_nnz > 0 {
total_nnz as f64 / original_nnz as f64
} else {
0.0
}
}
fn stabilize_pivot_value(
pivot: &mut f64,
row: usize,
matrix: &Mat<f64>,
policy: &PivotPolicy,
max_diag_a: f64,
row_inf_a: &[f64],
row_gersh_a: &[f64],
stats: &mut PivotStats,
running_max: &mut f64,
zero_pivots: &mut usize,
) -> Result<(), KError> {
let s_i = match policy.scale {
PivotScale::MaxDiagA => max_diag_a,
PivotScale::LocalDiagA => matrix[(row, row)].abs(),
PivotScale::RowInfA => row_inf_a[row],
PivotScale::RowGershgorin => row_gersh_a[row],
PivotScale::RunningMaxU => *running_max,
}
.max(max_diag_a);
if let Err(e) =
stabilize_pivot_in_place(pivot, s_i, policy.tau, policy.sign, policy.mode, stats, row)
{
*zero_pivots += 1;
return Err(e);
}
let abs = pivot.abs();
if abs > *running_max {
*running_max = abs;
}
Ok(())
}
fn handle_pivot(
&mut self,
pivot: &mut f64,
row: usize,
matrix: &Mat<f64>,
) -> Result<(), KError> {
let policy = &self.config.pivot_policy;
Self::stabilize_pivot_value(
pivot,
row,
matrix,
policy,
self.max_diag_a,
&self.row_inf_a,
&self.row_gersh_a,
&mut self.pivot_stats,
&mut self.running_max_u,
&mut self.num_zero_pivots,
)?;
Ok(())
}
fn partition_rows(n: usize, block_size: usize) -> Vec<std::ops::Range<usize>> {
let block_size = block_size.max(1);
let mut blocks = Vec::new();
let mut start = 0;
while start < n {
let end = (start + block_size).min(n);
blocks.push(start..end);
start = end;
}
blocks
}
fn extract_block_dense(matrix: &Mat<f64>, rows: std::ops::Range<usize>) -> Mat<f64> {
let m = rows.len();
let mut block = Mat::zeros(m, m);
for local_i in 0..m {
let global_i = rows.start + local_i;
for local_j in 0..m {
let global_j = rows.start + local_j;
block[(local_i, local_j)] = matrix[(global_i, global_j)];
}
}
block
}
#[cfg(feature = "rayon")]
fn factor_block(
matrix: &Mat<f64>,
rows: std::ops::Range<usize>,
policy: &PivotPolicy,
max_diag: f64,
row_inf: &[f64],
row_gersh: &[f64],
) -> Result<BlockFactorResult, KError> {
let block = Self::extract_block_dense(matrix, rows.clone());
let drop_tol: f64 = 1e-15;
let mut l = CsrMatrix::from_dense(&block, drop_tol)?;
let mut u = CsrMatrix::from_dense(&block, drop_tol)?;
let size = block.nrows();
for i in 0..size {
for j in 0..size {
if i > j {
Self::sparse_set(&mut u, i, j, 0.0);
} else if i < j {
Self::sparse_set(&mut l, i, j, 0.0);
} else {
Self::sparse_set(&mut l, i, i, 1.0);
}
}
}
let mut stats = PivotStats::default();
let mut running_max: f64 = 0.0;
let mut zero_pivots = 0usize;
for k in 0..size {
let mut pivot = Self::sparse_get(&u, k, k);
Self::stabilize_pivot_value(
&mut pivot,
rows.start + k,
matrix,
policy,
max_diag,
row_inf,
row_gersh,
&mut stats,
&mut running_max,
&mut zero_pivots,
)?;
Self::sparse_set(&mut u, k, k, pivot);
for i in (k + 1)..size {
let l_ik = Self::sparse_get(&l, i, k);
if l_ik != 0.0 {
let multiplier = l_ik / pivot;
Self::sparse_set(&mut l, i, k, multiplier);
for j in (k + 1)..size {
let u_kj = Self::sparse_get(&u, k, j);
if u_kj != 0.0 {
let global_i = rows.start + i;
let global_j = rows.start + j;
if matrix[(global_i, global_j)] != 0.0 {
let u_ij = Self::sparse_get(&u, i, j);
let new_val = u_ij - multiplier * u_kj;
Self::sparse_set(&mut u, i, j, new_val);
}
}
}
}
}
}
Ok(BlockFactorResult {
l,
u,
pivot_stats: stats,
running_max,
zero_pivots,
})
}
fn sparse_get(matrix: &CsrMatrix<f64>, i: usize, j: usize) -> f64 {
let (cols, vals) = matrix.row(i);
match cols.binary_search(&j) {
Ok(pos) => vals[pos],
Err(_) => 0.0,
}
}
fn sparse_set(matrix: &mut CsrMatrix<f64>, i: usize, j: usize, value: f64) {
let start = matrix.row_ptr()[i];
let end = matrix.row_ptr()[i + 1];
let mut pos_in_row = None;
{
let cols = &matrix.col_idx()[start..end];
if let Ok(off) = cols.binary_search(&j) {
pos_in_row = Some(start + off);
}
}
if let Some(p) = pos_in_row {
let values = matrix.values_mut();
values[p] = value;
}
}
fn compute_ilu0(&mut self, matrix: &Mat<f64>) -> Result<(), KError> {
let n = matrix.nrows();
let drop_tol: f64 = 1e-15;
let mut l = CsrMatrix::from_dense(matrix, drop_tol)?;
let mut u = CsrMatrix::from_dense(matrix, drop_tol)?;
for i in 0..n {
for j in 0..n {
if i > j {
Self::sparse_set(&mut u, i, j, 0.0);
} else if i < j {
Self::sparse_set(&mut l, i, j, 0.0);
} else {
Self::sparse_set(&mut l, i, i, 1.0);
}
}
}
for k in 0..n {
let mut pivot = Self::sparse_get(&u, k, k);
self.handle_pivot(&mut pivot, k, matrix)?;
Self::sparse_set(&mut u, k, k, pivot);
for i in (k + 1)..n {
let l_ik = Self::sparse_get(&l, i, k);
if l_ik != 0.0 {
let multiplier = l_ik / pivot;
Self::sparse_set(&mut l, i, k, multiplier);
for j in (k + 1)..n {
let u_kj = Self::sparse_get(&u, k, j);
if u_kj != 0.0 && matrix[(i, j)] != 0.0 {
let u_ij = Self::sparse_get(&u, i, j);
let new_val = u_ij - multiplier * u_kj;
Self::sparse_set(&mut u, i, j, new_val);
}
}
}
}
}
self.nnz_l = l.nnz();
self.nnz_u = u.nnz();
self.inv_diag_u = u.diagonal().into_iter().map(|v| 1.0 / v).collect();
self.l = l;
self.u = u;
Ok(())
}
#[cfg(feature = "rayon")]
fn compute_ilu0_block_parallel(&mut self, matrix: &Mat<f64>) -> Result<(), KError> {
let n = matrix.nrows();
let chunk_size = self.config.parallel_chunk_size.max(1);
let blocks = Self::partition_rows(n, chunk_size);
let row_inf = Arc::new(self.row_inf_a.clone());
let row_gersh = Arc::new(self.row_gersh_a.clone());
let pivot_policy = Arc::new(self.config.pivot_policy.clone());
let max_diag = self.max_diag_a;
let results = Arc::new(Mutex::new(Vec::with_capacity(blocks.len())));
let first_error = Arc::new(Mutex::new(None::<KError>));
rayon::scope(|scope| {
for rows in blocks.iter().cloned() {
let results = Arc::clone(&results);
let first_error = Arc::clone(&first_error);
let row_inf = Arc::clone(&row_inf);
let row_gersh = Arc::clone(&row_gersh);
let policy = Arc::clone(&pivot_policy);
let matrix = matrix;
scope.spawn(move |_| {
if first_error.lock().unwrap().is_some() {
return;
}
match Self::factor_block(
matrix,
rows.clone(),
policy.as_ref(),
max_diag,
row_inf.as_ref(),
row_gersh.as_ref(),
) {
Ok(res) => {
results.lock().unwrap().push((rows, res));
}
Err(e) => {
let mut guard = first_error.lock().unwrap();
if guard.is_none() {
*guard = Some(e);
}
}
}
});
}
});
if let Some(err) = first_error.lock().unwrap().take() {
return Err(err);
}
let mut block_results = {
let mut guard = results.lock().unwrap();
std::mem::take(&mut *guard)
};
block_results.sort_by_key(|(range, _)| range.start);
let mut merged_stats = PivotStats::default();
let mut running_max: f64 = 0.0;
let mut zero_pivots = 0usize;
for (_, result) in block_results.iter() {
merged_stats.num_floors += result.pivot_stats.num_floors;
merged_stats.sum_abs_shift += result.pivot_stats.sum_abs_shift;
merged_stats.num_strict_fail += result.pivot_stats.num_strict_fail;
merged_stats.max_abs_shift = merged_stats
.max_abs_shift
.max(result.pivot_stats.max_abs_shift);
merged_stats.last_floor_value = merged_stats
.last_floor_value
.max(result.pivot_stats.last_floor_value);
running_max = running_max.max(result.running_max);
zero_pivots += result.zero_pivots;
}
self.pivot_stats = merged_stats;
self.running_max_u = running_max;
self.num_zero_pivots = zero_pivots;
self.assemble_block_diagonal(n, block_results)?;
Ok(())
}
#[cfg(not(feature = "rayon"))]
fn compute_ilu0_block_parallel(&mut self, matrix: &Mat<f64>) -> Result<(), KError> {
#[cfg(feature = "logging")]
if self.config.logging_level > 0 && self.config.enable_parallel_factorization {
warn!(
"ILU parallel factorization requested but 'rayon' feature disabled; falling back to serial ILU0"
);
}
self.compute_ilu0(matrix)
}
#[cfg(feature = "rayon")]
fn assemble_block_diagonal(
&mut self,
n: usize,
block_results: Vec<(std::ops::Range<usize>, BlockFactorResult)>,
) -> Result<(), KError> {
let mut l_row_ptr = Vec::with_capacity(n + 1);
let mut u_row_ptr = Vec::with_capacity(n + 1);
l_row_ptr.push(0);
u_row_ptr.push(0);
let mut l_cols = Vec::new();
let mut l_vals = Vec::new();
let mut u_cols = Vec::new();
let mut u_vals = Vec::new();
let mut inv_diag_u = vec![0.0; n];
let mut nnz_l = 0;
let mut nnz_u = 0;
for (rows, block) in block_results {
let size = rows.len();
for local_i in 0..size {
let global_i = rows.start + local_i;
let (cols_l, vals_l) = block.l.row(local_i);
for (&c, &v) in cols_l.iter().zip(vals_l.iter()) {
l_cols.push(rows.start + c);
l_vals.push(v);
nnz_l += 1;
}
l_row_ptr.push(nnz_l);
let (cols_u, vals_u) = block.u.row(local_i);
let mut diag_found = false;
for (&c, &v) in cols_u.iter().zip(vals_u.iter()) {
u_cols.push(rows.start + c);
u_vals.push(v);
if c == local_i {
if v == 0.0 {
return Err(KError::InvalidInput(format!(
"zero diagonal detected in block row {global_i}"
)));
}
inv_diag_u[global_i] = 1.0 / v;
diag_found = true;
}
nnz_u += 1;
}
if !diag_found {
return Err(KError::InvalidInput(format!(
"block ILU lost diagonal entry at row {global_i}"
)));
}
u_row_ptr.push(nnz_u);
}
}
self.l = CsrMatrix::from_csr(n, n, l_row_ptr, l_cols, l_vals);
self.u = CsrMatrix::from_csr(n, n, u_row_ptr, u_cols, u_vals);
self.inv_diag_u = inv_diag_u;
self.nnz_l = nnz_l;
self.nnz_u = nnz_u;
Ok(())
}
fn compute_milu0(&mut self, matrix: &Mat<f64>) -> Result<(), KError> {
let n = matrix.nrows();
let drop_tol: f64 = 1e-15;
let mut l = CsrMatrix::from_dense(matrix, drop_tol)?;
let mut u = CsrMatrix::from_dense(matrix, drop_tol)?;
let mut original_row_sums = vec![0.0; n];
for i in 0..n {
for j in 0..n {
original_row_sums[i] = original_row_sums[i] + matrix[(i, j)];
}
}
for i in 0..n {
for j in 0..n {
if i > j {
Self::sparse_set(&mut u, i, j, 0.0);
} else if i < j {
Self::sparse_set(&mut l, i, j, 0.0);
} else {
Self::sparse_set(&mut l, i, i, 1.0);
}
}
}
for k in 0..n {
let mut pivot = Self::sparse_get(&u, k, k);
self.handle_pivot(&mut pivot, k, matrix)?;
Self::sparse_set(&mut u, k, k, pivot);
for i in (k + 1)..n {
let l_ik = Self::sparse_get(&l, i, k);
if l_ik != 0.0 {
let multiplier = l_ik / pivot;
Self::sparse_set(&mut l, i, k, multiplier);
let mut dropped_sum = 0.0;
for j in (k + 1)..n {
let u_kj = Self::sparse_get(&u, k, j);
if u_kj != 0.0 {
let update = multiplier * u_kj;
if matrix[(i, j)] != 0.0 {
let u_ij = Self::sparse_get(&u, i, j);
Self::sparse_set(&mut u, i, j, u_ij - update);
} else {
dropped_sum = dropped_sum + update;
}
}
}
let u_ii = Self::sparse_get(&u, i, i);
Self::sparse_set(&mut u, i, i, u_ii + dropped_sum);
}
}
}
self.nnz_l = l.nnz();
self.nnz_u = u.nnz();
self.inv_diag_u = u.diagonal().into_iter().map(|v| 1.0 / v).collect();
self.l = l;
self.u = u;
Ok(())
}
fn compute_iluk(&mut self, matrix: &Mat<f64>) -> Result<(), KError> {
let n = matrix.nrows();
let mut l = Mat::zeros(n, n);
let mut u = Mat::zeros(n, n);
let mut level = vec![vec![usize::MAX; n]; n];
for i in 0..n {
for j in 0..n {
if matrix[(i, j)] != 0.0 {
level[i][j] = 0;
if i <= j {
u[(i, j)] = matrix[(i, j)];
} else {
l[(i, j)] = matrix[(i, j)];
}
}
}
l[(i, i)] = 1.0; }
for k in 0..n {
let mut pivot = u[(k, k)];
self.handle_pivot(&mut pivot, k, matrix)?;
u[(k, k)] = pivot;
for i in (k + 1)..n {
if level[i][k] <= self.config.level_of_fill {
l[(i, k)] = l[(i, k)] / pivot;
for j in (k + 1)..n {
if level[k][j] <= self.config.level_of_fill {
let new_level =
level[i][k].saturating_add(level[k][j]).saturating_add(1);
if new_level <= self.config.level_of_fill {
let update = l[(i, k)] * u[(k, j)];
u[(i, j)] = u[(i, j)] - update;
level[i][j] = level[i][j].min(new_level);
}
}
}
} else {
l[(i, k)] = 0.0; }
}
}
let drop_tol: f64 = 1e-15;
self.l = CsrMatrix::from_dense(&l, drop_tol)?;
self.u = CsrMatrix::from_dense(&u, drop_tol)?;
self.inv_diag_u = (0..n).map(|i| 1.0 / u[(i, i)]).collect();
self.nnz_l = self.l.nnz();
self.nnz_u = self.u.nnz();
Ok(())
}
fn compute_ilut(&mut self, matrix: &Mat<f64>) -> Result<(), KError> {
let n = matrix.nrows();
let mut l = Mat::zeros(n, n);
let mut u = Mat::zeros(n, n);
let drop_tol: f64 = self.config.drop_tolerance;
for i in 0..n {
for j in 0..n {
let val = matrix[(i, j)];
if val.abs() >= drop_tol {
if i <= j {
u[(i, j)] = val;
} else {
l[(i, j)] = val;
}
}
}
l[(i, i)] = 1.0; }
for k in 0..n {
let mut pivot = u[(k, k)];
self.handle_pivot(&mut pivot, k, matrix)?;
u[(k, k)] = pivot;
let mut updates = Vec::new();
for i in (k + 1)..n {
if l[(i, k)].abs() >= drop_tol {
l[(i, k)] = l[(i, k)] / pivot;
for j in (k + 1)..n {
if u[(k, j)].abs() >= drop_tol {
let update = l[(i, k)] * u[(k, j)];
updates.push((i, j, update));
}
}
}
}
for (i, j, update) in updates {
let new_val = u[(i, j)] - update;
if new_val.abs() >= drop_tol {
u[(i, j)] = new_val;
} else {
u[(i, j)] = 0.0; }
}
if self.config.max_fill_per_row > 0 {
for i in (k + 1)..n {
self.apply_fill_control_to_row(&mut u, i, k + 1);
}
}
}
let drop_tol: f64 = 1e-15;
self.l = CsrMatrix::from_dense(&l, drop_tol)?;
self.u = CsrMatrix::from_dense(&u, drop_tol)?;
self.inv_diag_u = (0..n).map(|i| 1.0 / u[(i, i)]).collect();
self.nnz_l = self.l.nnz();
self.nnz_u = self.u.nnz();
Ok(())
}
fn apply_fill_control_to_row(&self, matrix: &mut Mat<f64>, row: usize, start_col: usize) {
if self.config.max_fill_per_row == 0 {
return;
}
let mut entries: Vec<(f64, usize, f64)> = Vec::new();
for j in start_col..matrix.ncols() {
let val = matrix[(row, j)];
if val != 0.0 {
entries.push((val.abs(), j, val));
}
}
if entries.len() > self.config.max_fill_per_row {
entries.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
for j in start_col..matrix.ncols() {
matrix[(row, j)] = 0.0;
}
for i in 0..self.config.max_fill_per_row.min(entries.len()) {
let (_, j, val) = entries[i];
matrix[(row, j)] = val;
}
}
}
fn lu_row_product(
&self,
i: usize,
j: usize,
l_row_ptr: &[usize],
l_col_idx: &[usize],
l_vals: &[f64],
u_row_ptr: &[usize],
u_col_idx: &[usize],
u_vals: &[f64],
) -> f64 {
let limit = i.min(j);
let start_l = l_row_ptr[i];
let end_l = l_row_ptr[i + 1];
let mut sum = 0.0;
for idx in start_l..end_l {
let k = l_col_idx[idx];
if k >= limit {
break;
}
let lik = l_vals[idx];
let start_u = u_row_ptr[k];
let end_u = u_row_ptr[k + 1];
if let Ok(off) = u_col_idx[start_u..end_u].binary_search(&j) {
let ukj = u_vals[start_u + off];
sum = f64::mul_add(lik, ukj, sum);
}
}
sum
}
#[allow(clippy::too_many_arguments)]
fn parilu_sweep_serial(
&self,
a: &CsrMatrix<f64>,
l_row_ptr: &[usize],
l_col_idx: &[usize],
l_old: &[f64],
u_row_ptr: &[usize],
u_col_idx: &[usize],
u_old: &[f64],
l_new: &mut [f64],
u_new: &mut [f64],
omega: f64,
) -> Result<f64, KError> {
let n = a.nrows();
let mut res_sq = 0.0;
let get_u_diag = |j: usize| -> f64 {
let start = u_row_ptr[j];
let end = u_row_ptr[j + 1];
for idx in start..end {
if u_col_idx[idx] == j {
return u_old[idx];
}
}
0.0
};
for i in 0..n {
let (a_cols, a_vals) = a.row(i);
let l_start = l_row_ptr[i];
let l_end = l_row_ptr[i + 1];
for idx in l_start..l_end {
let j = l_col_idx[idx];
if j >= i {
continue;
}
let a_ij = match a_cols.binary_search(&j) {
Ok(pos) => a_vals[pos],
Err(_) => 0.0,
};
let s_ij = self.lu_row_product(
i, j, l_row_ptr, l_col_idx, l_old, u_row_ptr, u_col_idx, u_old,
);
let r_ij = a_ij - s_ij;
let u_jj = get_u_diag(j);
if u_jj == 0.0 {
return Err(KError::ZeroPivot(j));
}
let lij_old = l_old[idx];
let lij_new = (1.0 - omega) * lij_old + omega * (r_ij / u_jj);
l_new[idx] = lij_new;
res_sq += r_ij * r_ij;
}
let u_start = u_row_ptr[i];
let u_end = u_row_ptr[i + 1];
for idx in u_start..u_end {
let j = u_col_idx[idx];
if j < i {
continue;
}
let a_ij = match a_cols.binary_search(&j) {
Ok(pos) => a_vals[pos],
Err(_) => 0.0,
};
let s_ij = self.lu_row_product(
i, j, l_row_ptr, l_col_idx, l_old, u_row_ptr, u_col_idx, u_old,
);
let r_ij = a_ij - s_ij;
let uij_old = u_old[idx];
let uij_new = (1.0 - omega) * uij_old + omega * r_ij;
u_new[idx] = uij_new;
res_sq += r_ij * r_ij;
}
}
Ok(res_sq.sqrt())
}
#[allow(clippy::too_many_arguments)]
fn parilu_sweep(
&self,
a: &CsrMatrix<f64>,
l_row_ptr: &[usize],
l_col_idx: &[usize],
l_old: &[f64],
u_row_ptr: &[usize],
u_col_idx: &[usize],
u_old: &[f64],
l_new: &mut [f64],
u_new: &mut [f64],
omega: f64,
) -> Result<f64, KError> {
#[cfg(feature = "rayon")]
{
if self.config.enable_parallel_factorization {
return self.parilu_sweep_parallel(
a, l_row_ptr, l_col_idx, l_old, u_row_ptr, u_col_idx, u_old, l_new, u_new,
omega,
);
}
}
self.parilu_sweep_serial(
a, l_row_ptr, l_col_idx, l_old, u_row_ptr, u_col_idx, u_old, l_new, u_new, omega,
)
}
#[cfg(feature = "rayon")]
#[allow(clippy::too_many_arguments)]
fn parilu_sweep_parallel(
&self,
a: &CsrMatrix<f64>,
l_row_ptr: &[usize],
l_col_idx: &[usize],
l_old: &[f64],
u_row_ptr: &[usize],
u_col_idx: &[usize],
u_old: &[f64],
l_new: &mut [f64],
u_new: &mut [f64],
omega: f64,
) -> Result<f64, KError> {
let n = a.nrows();
let l_ptr = l_new.as_mut_ptr() as usize;
let u_ptr = u_new.as_mut_ptr() as usize;
let res_sq: Result<f64, KError> = (0..n)
.into_par_iter()
.map(|i| {
let (a_cols, a_vals) = a.row(i);
let mut res_sq_i = 0.0;
let l_start = l_row_ptr[i];
let l_end = l_row_ptr[i + 1];
let u_start = u_row_ptr[i];
let u_end = u_row_ptr[i + 1];
let get_u_diag = |j: usize| -> f64 {
let start = u_row_ptr[j];
let end = u_row_ptr[j + 1];
for idx in start..end {
if u_col_idx[idx] == j {
return u_old[idx];
}
}
0.0
};
for idx in l_start..l_end {
let j = l_col_idx[idx];
if j >= i {
continue;
}
let a_ij = match a_cols.binary_search(&j) {
Ok(pos) => a_vals[pos],
Err(_) => 0.0,
};
let s_ij = self.lu_row_product(
i, j, l_row_ptr, l_col_idx, l_old, u_row_ptr, u_col_idx, u_old,
);
let r_ij = a_ij - s_ij;
let u_jj = get_u_diag(j);
if u_jj == 0.0 {
return Err(KError::ZeroPivot(j));
}
let lij_old = l_old[idx];
let lij_new = (1.0 - omega) * lij_old + omega * (r_ij / u_jj);
unsafe { *(l_ptr as *mut f64).add(idx) = lij_new };
res_sq_i += r_ij * r_ij;
}
for idx in u_start..u_end {
let j = u_col_idx[idx];
if j < i {
continue;
}
let a_ij = match a_cols.binary_search(&j) {
Ok(pos) => a_vals[pos],
Err(_) => 0.0,
};
let s_ij = self.lu_row_product(
i, j, l_row_ptr, l_col_idx, l_old, u_row_ptr, u_col_idx, u_old,
);
let r_ij = a_ij - s_ij;
let uij_old = u_old[idx];
let uij_new = (1.0 - omega) * uij_old + omega * r_ij;
unsafe { *(u_ptr as *mut f64).add(idx) = uij_new };
res_sq_i += r_ij * r_ij;
}
Ok(res_sq_i)
})
.try_reduce(|| 0.0, |a, b| Ok(a + b));
Ok(res_sq?.sqrt())
}
fn parilu_refine(&mut self, a: &CsrMatrix<f64>) -> Result<(), KError> {
if !self.config.parilu_enabled || self.config.parilu_max_iters == 0 {
self.history = None;
return Ok(());
}
let n = a.nrows();
if n == 0 {
self.history = None;
return Ok(());
}
if a.ncols() != n || self.l.nrows() != n || self.u.nrows() != n {
return Err(KError::InvalidInput(
"ParILU requires square matrices with matching dimensions".to_string(),
));
}
let max_iters = self.config.parilu_max_iters;
let min_iters = self.config.parilu_min_iters.min(max_iters);
let omega = self.config.parilu_omega;
let tol = self.config.parilu_tol;
let l_row_ptr = self.l.row_ptr().to_vec();
let l_col_idx = self.l.col_idx().to_vec();
let mut l_vals = self.l.values().to_vec();
let u_row_ptr = self.u.row_ptr().to_vec();
let u_col_idx = self.u.col_idx().to_vec();
let mut u_vals = self.u.values().to_vec();
let mut l_old = l_vals.clone();
let mut u_old = u_vals.clone();
let mut history = ParIluHistory::with_capacity(max_iters);
for iter in 0..max_iters {
let iter_start = std::time::Instant::now();
let res = self.parilu_sweep(
a,
&l_row_ptr,
&l_col_idx,
&l_old,
&u_row_ptr,
&u_col_idx,
&u_old,
&mut l_vals,
&mut u_vals,
omega,
)?;
let iter_time = iter_start.elapsed().as_secs_f64();
history.push(ParIluIterSample {
iter: iter as u32,
residual: res,
time_s: iter_time,
});
if let Some(m) = &self.monitor {
if let Some(sample) = history.as_slice().last() {
m.on_event(Event::IluSetupIter { sample });
}
}
if iter + 1 >= min_iters && res < tol {
break;
}
l_old.copy_from_slice(&l_vals);
u_old.copy_from_slice(&u_vals);
}
self.l = CsrMatrix::from_csr(n, n, l_row_ptr, l_col_idx, l_vals);
self.u = CsrMatrix::from_csr(n, n, u_row_ptr, u_col_idx, u_vals);
self.inv_diag_u = self.u.diagonal().into_iter().map(|v| 1.0 / v).collect();
self.nnz_l = self.l.nnz();
self.nnz_u = self.u.nnz();
self.history = Some(history);
#[cfg(feature = "logging")]
if self.config.logging_level > 0 {
if let Some(last) = self.history.as_ref().and_then(|h| h.as_slice().last()) {
let converged = last.residual < tol;
info!(
"ParILU refinement: iterations={}, final residual {:.3e} (tol {:.3e}, converged={})",
self.history
.as_ref()
.map(|h| h.as_slice().len())
.unwrap_or(0),
last.residual,
tol,
converged
);
}
}
Ok(())
}
fn setup_workspace(&mut self, n: usize) {
debug_assert_eq!(
self.l.nrows(),
n,
"L dimension mismatch during workspace sizing"
);
debug_assert_eq!(
self.u.nrows(),
n,
"U dimension mismatch during workspace sizing"
);
if self.config.optimize_workspace {
self.workspace.ensure_size(n);
#[cfg(feature = "logging")]
if self.config.logging_level > 1 {
debug!("ILU: Workspace configured for {n} x {n} matrix");
}
} else {
self.workspace.ensure_size(n);
}
}
fn solve_triangular_exact(&self, lower: bool, x: &mut [S]) {
#[cfg(feature = "rayon")]
if self.allow_parallel_triangular_solve(x.len()) {
if lower {
self.solve_triangular_parallel_forward(x);
} else {
self.solve_triangular_parallel_backward(x);
}
return;
}
self.solve_triangular_exact_seq(lower, x);
}
#[inline]
fn solve_triangular_exact_seq(&self, lower: bool, x: &mut [S]) {
let n = x.len();
if lower {
for i in 0..n {
let mut sum = x[i];
let (cols, vals) = self.l.row(i);
for (&j, &val) in cols.iter().zip(vals.iter()) {
if j < i {
sum -= val * x[j];
}
}
x[i] = sum;
}
} else {
for i in (0..n).rev() {
let mut sum = x[i];
let (cols, vals) = self.u.row(i);
for (&j, &val) in cols.iter().zip(vals.iter()) {
if j <= i {
continue;
}
sum -= val * x[j];
}
x[i] = sum * self.inv_diag_u[i];
}
}
}
#[cfg(feature = "rayon")]
fn solve_triangular_parallel_forward(&self, x: &mut [S]) {
if x.is_empty() {
return;
}
let chunk_size = self.config.parallel_chunk_size.max(1);
let levels = &self.levels_l;
if levels.buckets.is_empty() {
return;
}
for rows in &levels.buckets {
if rows.is_empty() {
continue;
}
let updates: Vec<(usize, S)> = {
let x_ref: &[S] = &*x;
rows.par_iter()
.with_min_len(chunk_size)
.map(|&i| {
let mut sum = x_ref[i];
let (cols, vals) = self.l.row(i);
for (&j, &val) in cols.iter().zip(vals.iter()) {
if j < i {
sum -= val * x_ref[j];
}
}
(i, sum)
})
.collect()
};
for (i, val) in updates {
x[i] = val;
}
}
}
#[cfg(feature = "rayon")]
fn solve_triangular_parallel_backward(&self, x: &mut [S]) {
if x.is_empty() {
return;
}
let chunk_size = self.config.parallel_chunk_size.max(1);
let levels = &self.levels_u;
if levels.buckets.is_empty() {
return;
}
for ell in 0..=levels.max_level {
let rows = &levels.buckets[ell as usize];
if rows.is_empty() {
continue;
}
let updates: Vec<(usize, S)> = {
let x_ref: &[S] = &*x;
rows.par_iter()
.with_min_len(chunk_size)
.map(|&i| {
let mut sum = x_ref[i];
let (cols, vals) = self.u.row(i);
for (&j, &val) in cols.iter().zip(vals.iter()) {
if j <= i {
continue;
}
sum -= val * x_ref[j];
}
(i, sum * self.inv_diag_u[i])
})
.collect()
};
for (i, val) in updates {
x[i] = val;
}
}
}
fn solve_triangular_jacobi(&self, lower: bool, b: &[S], x: &mut [S]) {
let n = b.len();
let num_iters = if lower {
self.config.lower_jacobi_iters
} else {
self.config.upper_jacobi_iters
};
x.copy_from_slice(b);
for _iter in 0..num_iters {
if lower {
for i in 0..n {
let mut sum = S::zero();
let (cols, vals) = self.l.row(i);
for (&j, &val) in cols.iter().zip(vals.iter()) {
if j < i {
sum = sum + val * x[j];
}
}
x[i] = b[i] - sum; }
} else {
for i in (0..n).rev() {
let mut sum = S::zero();
let (cols, vals) = self.u.row(i);
for (&j, &val) in cols.iter().zip(vals.iter()) {
if j > i {
sum = sum + val * x[j];
}
}
x[i] = (b[i] - sum) * self.inv_diag_u[i];
}
}
}
}
fn solve_triangular_gauss_seidel(&self, lower: bool, b: &[S], x: &mut [S]) {
let n = b.len();
let num_iters = if lower {
self.config.lower_jacobi_iters
} else {
self.config.upper_jacobi_iters
};
x.copy_from_slice(b);
for _iter in 0..num_iters {
if lower {
for i in 0..n {
let mut sum = S::zero();
let (cols, vals) = self.l.row(i);
for (&j, &val) in cols.iter().zip(vals.iter()) {
if j < i {
sum = sum + val * x[j];
}
}
x[i] = b[i] - sum; }
} else {
for i in (0..n).rev() {
let mut sum = S::zero();
let (cols, vals) = self.u.row(i);
for (&j, &val) in cols.iter().zip(vals.iter()) {
if j > i {
sum = sum + val * x[j];
}
}
x[i] = (b[i] - sum) * self.inv_diag_u[i];
}
}
}
}
pub fn get_stats(&self) -> IluStats {
let (total_ns, count, _) = self.solve_ctrs.snapshot();
let avg = if count == 0 {
0.0
} else {
(total_ns as f64) / (count as f64) / 1e9
};
IluStats {
setup_complexity: self.setup_complexity,
nnz_l: self.nnz_l,
nnz_u: self.nnz_u,
num_zero_pivots: self.num_zero_pivots,
setup_time: self.setup_time,
solve_time: avg,
solve_count: count as usize,
}
}
pub fn pivot_stats(&self) -> &PivotStats {
&self.pivot_stats
}
}
#[cfg(not(feature = "complex"))]
impl Ilu {
pub fn create_specialized(
config: IluConfig,
) -> Result<Box<dyn Preconditioner<Mat<S>, Vec<S>>>, KError> {
match config.ilu_type {
IluType::ILUK => {
let ilup = crate::preconditioner::ilup::Ilup::new(config.level_of_fill);
Ok(Box::new(ilup))
}
IluType::ILUT | _ => {
let ilu = Ilu::new_with_config(config)?;
Ok(Box::new(ilu))
}
}
}
pub fn create_quick(ilu_type: IluType, fill_or_drop: Real) -> Result<Self, KError> {
let mut config = IluConfig::default();
config.ilu_type = ilu_type;
match ilu_type {
IluType::ILUK => {
config.level_of_fill = fill_or_drop as usize;
}
IluType::ILUT => {
config.drop_tolerance = fill_or_drop;
config.max_fill_per_row = 20; }
_ => {}
}
Self::new_with_config(config)
}
}
#[derive(Debug, Clone)]
pub struct IluStats {
pub setup_complexity: f64,
pub nnz_l: usize,
pub nnz_u: usize,
pub num_zero_pivots: usize,
pub setup_time: f64,
pub solve_time: f64,
pub solve_count: usize,
}
impl Default for Ilu {
fn default() -> Self {
Self::new()
}
}
impl Preconditioner<Mat<f64>, Vec<f64>> for Ilu {
fn setup(&mut self, matrix: &Mat<f64>) -> Result<(), KError> {
let setup_start = std::time::Instant::now();
if let Some(m) = &self.monitor {
m.on_event(Event::IluSetupBegin { opts_hash: 0 });
}
Self::validate_matrix(matrix)?;
if self.config.ieee_checks {
Self::check_ieee_values(matrix)?;
#[cfg(feature = "logging")]
if self.config.logging_level > 0 {
info!("ILU: IEEE safety checks passed");
}
}
let mut conditioned = None;
let matrix = if self.config.conditioning.is_active() {
let mut local = matrix.clone();
apply_dense_transforms("ILU", &mut local, &self.config.conditioning)?;
conditioned = Some(local);
conditioned.as_ref().unwrap()
} else {
matrix
};
let n = matrix.nrows();
let a_csr = CsrMatrix::from_dense(matrix, 1e-15)?;
let original_nnz = a_csr.nnz();
#[cfg(feature = "logging")]
print_ilu_banner(&self.config);
let mut max_diag: Real = Real::default();
self.row_inf_a.resize(n, Real::default());
self.row_gersh_a.resize(n, Real::default());
for i in 0..n {
let mut row_inf: Real = Real::default();
let mut row_gersh = matrix[(i, i)].abs();
for j in 0..n {
let val_abs = matrix[(i, j)].abs();
if j != i {
row_gersh = row_gersh + val_abs;
}
if val_abs > row_inf {
row_inf = val_abs;
}
}
self.row_inf_a[i] = row_inf;
self.row_gersh_a[i] = row_gersh;
max_diag = max_diag.max(matrix[(i, i)].abs());
}
self.max_diag_a = max_diag;
self.running_max_u = Real::default();
self.pivot_stats = PivotStats::default();
self.history = None;
#[cfg(feature = "logging")]
if self.config.logging_level > 0 {
info!("ILU Setup: {n} x {n} matrix with {original_nnz} nonzeros");
debug!("ILU: Using {:?} factorization type", self.config.ilu_type);
}
let mut parilu_iters = 0u32;
let mut parilu_converged = true;
let par_mode = if self.allow_parallel_factorization(n) {
self.config.par_factor_mode()
} else {
ParFactorizationMode::Serial
};
match (self.config.ilu_type, par_mode) {
(IluType::ILU0, ParFactorizationMode::Serial) => {
self.compute_ilu0(matrix)?;
}
(IluType::ILU0, ParFactorizationMode::Block)
| (IluType::ILU0, ParFactorizationMode::ParIlu) => {
self.compute_ilu0_block_parallel(matrix)?;
}
(IluType::MILU0, _) => {
self.compute_milu0(matrix)?;
}
(IluType::ILUK, _) => {
self.compute_iluk(matrix)?;
}
(IluType::ILUT, _) => {
self.compute_ilut(matrix)?;
}
_ => {
return Err(KError::NotImplemented(format!(
"ILU type {:?} not yet implemented",
self.config.ilu_type
)));
}
}
if self.config.parilu_enabled && self.config.parilu_max_iters > 0 {
self.parilu_refine(&a_csr)?;
if let Some(hist) = &self.history {
parilu_iters = hist.as_slice().len() as u32;
parilu_converged = hist
.as_slice()
.last()
.map(|s| s.residual < self.config.parilu_tol)
.unwrap_or(false);
}
} else {
self.history = None;
}
self.setup_workspace(n);
self.setup_complexity = self.calculate_complexity(original_nnz);
self.setup_time = setup_start.elapsed().as_secs_f64();
#[cfg(feature = "rayon")]
if self.allow_parallel_triangular_solve(n) {
self.levels_l = build_levels_lower(&self.l);
self.levels_u = build_levels_upper(&self.u);
}
#[cfg(feature = "logging")]
if self.config.logging_level > 0 {
info!(
"ILU Setup Complete: complexity={:.2}, L_nnz={}, U_nnz={}, setup_time={:.3}s",
self.setup_complexity, self.nnz_l, self.nnz_u, self.setup_time
);
if let Some(hist) = &self.history {
if let Some(last) = hist.as_slice().last() {
info!(
"ParILU refinement: sweeps={}, final residual {:.2e} (tol {:.2e})",
hist.as_slice().len(),
last.residual,
self.config.parilu_tol
);
}
}
debug!(
"Pivot floors: {} (max shift {:.3e})",
self.pivot_stats.num_floors, self.pivot_stats.max_abs_shift
);
if self.num_zero_pivots > 0 {
warn!(
"ILU: {} zero pivots encountered during factorization",
self.num_zero_pivots
);
}
if self.config.print_level > 0 {
println!(
"ILU Setup: {} -> {} nonzeros (complexity: {:.2})",
original_nnz,
self.nnz_l + self.nnz_u,
self.setup_complexity
);
}
}
if let Some(m) = &self.monitor {
m.on_event(Event::IluSetupEnd {
iters: parilu_iters,
converged: parilu_converged,
setup_time_s: self.setup_time,
});
}
Ok(())
}
fn apply(&self, side: PcSide, x: &Vec<f64>, y: &mut Vec<f64>) -> Result<(), KError> {
self.apply_slice(side, x.as_slice(), y.as_mut_slice())
}
}
impl Ilu {
fn apply_slice(&self, _side: PcSide, x: &[f64], y: &mut [f64]) -> Result<(), KError> {
let n = self.l.nrows();
if x.len() != n || y.len() != n {
return Err(KError::InvalidInput(format!(
"Vector length mismatch: expected {}, got x={} y={}",
n,
x.len(),
y.len(),
)));
}
let _timer = SolveTimer::start(&self.solve_ctrs);
match self.config.triangular_solve {
TriSolveType::Exact => {
y.copy_from_slice(x);
self.solve_triangular_exact(true, y);
self.solve_triangular_exact(false, y);
}
TriSolveType::Jacobi => {
let mut buf = self.workspace.borrow_solve_buf(n);
self.solve_triangular_jacobi(true, x, &mut buf[..n]);
self.solve_triangular_jacobi(false, &buf[..n], y);
}
TriSolveType::GaussSeidel => {
let mut buf = self.workspace.borrow_solve_buf(n);
self.solve_triangular_gauss_seidel(true, x, &mut buf[..n]);
self.solve_triangular_gauss_seidel(false, &buf[..n], y);
}
}
#[cfg(feature = "logging")]
if self.config.logging_level > 2 {
let _solve_time = _timer.elapsed().as_secs_f64();
trace!(
"ILU Apply: solve_time={:.6}s, workspace_size={}",
_solve_time, self.workspace.size
);
}
Ok(())
}
}
impl TriangularSolve<f64> for Ilu {
fn solve_lower_in_place(&self, x: &mut [f64]) -> Result<(), KError> {
self.solve_triangular_exact(true, x);
Ok(())
}
fn solve_upper_in_place(&self, x: &mut [f64]) -> Result<(), KError> {
self.solve_triangular_exact(false, x);
Ok(())
}
}
impl Ilu {
pub fn parilu_history(&self) -> Option<&[ParIluIterSample]> {
self.history.as_ref().map(|h| h.as_slice())
}
pub fn set_monitor(&mut self, m: Option<Box<dyn Monitor>>) {
self.monitor = m;
}
}
impl LocalPreconditioner<f64> for Ilu {
fn dims(&self) -> (usize, usize) {
(self.l.nrows(), self.l.ncols())
}
fn apply_local(&self, x: &[S], y: &mut [S]) -> Result<(), KError> {
let (n, _) = LocalPreconditioner::<f64>::dims(self);
debug_assert_eq!(x.len(), n);
debug_assert_eq!(y.len(), n);
self.apply_slice(PcSide::Left, x, y)
}
}
#[cfg(feature = "complex")]
impl KPreconditioner for Ilu {
type Scalar = GlobalScalar;
#[inline]
fn dims(&self) -> (usize, usize) {
LocalPreconditioner::<f64>::dims(self)
}
fn apply_s(
&self,
side: PcSide,
x: &[GlobalScalar],
y: &mut [GlobalScalar],
scratch: &mut BridgeScratch,
) -> Result<(), KError> {
let (rows, cols) = LocalPreconditioner::<f64>::dims(self);
let n = x.len();
if x.len() != y.len() || rows != n || cols != n {
return Err(KError::InvalidInput(format!(
"Ilu::apply_s dimension mismatch: expected {}x{}, got x.len()={} y.len()={}",
rows,
cols,
x.len(),
y.len()
)));
}
scratch.with_pair(n, |xr, yr| {
copy_scalar_to_real_in(x, xr);
self.apply_slice(side, xr, yr)?;
copy_real_into_scalar(yr, y);
Ok(())
})
}
fn apply_mut_s(
&mut self,
side: PcSide,
x: &[GlobalScalar],
y: &mut [GlobalScalar],
scratch: &mut BridgeScratch,
) -> Result<(), KError> {
KPreconditioner::apply_s(self, side, x, y, scratch)
}
fn on_restart_s(
&mut self,
_outer_iter: usize,
_residual_norm: <GlobalScalar as KrystScalar>::Real,
) -> Result<(), KError> {
Ok(())
}
}
pub type Ilu0 = Ilu;
#[cfg(test)]
mod tests {
use super::{Ilu, IluBuilder, IluConfig, IluType, TriSolveType};
use crate::algebra::parallel::par_sum_abs2_local;
use crate::algebra::prelude::*;
use crate::error::KError;
use crate::preconditioner::PcSide;
use crate::preconditioner::legacy::Preconditioner;
#[cfg(not(feature = "complex"))]
use rand::{Rng, SeedableRng, rngs::StdRng};
#[cfg(feature = "rayon")]
use rayon::prelude::*;
#[cfg(feature = "rayon")]
use std::sync::Arc;
fn make_spd_3x3() -> faer::Mat<S> {
faer::Mat::from_fn(3, 3, |i, j| match (i, j) {
(0, 0) => S::from_real(4.0),
(0, 1) | (1, 0) => S::from_real(1.0),
(1, 1) => S::from_real(3.0),
(1, 2) | (2, 1) => S::from_real(1.0),
(2, 2) => S::from_real(2.0),
_ => S::zero(),
})
}
fn mat_vec_mul(a: &faer::Mat<S>, x: &[S]) -> Vec<S> {
let mut out = vec![S::zero(); a.nrows()];
for i in 0..a.nrows() {
let mut acc = S::zero();
for j in 0..a.ncols() {
acc = acc + a[(i, j)] * x[j];
}
out[i] = acc;
}
out
}
#[cfg(not(feature = "complex"))]
fn mat_vec_mul_inplace(a: &faer::Mat<S>, x: &[S], y: &mut [S]) {
for i in 0..a.nrows() {
let mut acc = S::zero();
for j in 0..a.ncols() {
acc = acc + a[(i, j)] * x[j];
}
y[i] = acc;
}
}
#[cfg(not(feature = "complex"))]
fn random_spd(n: usize, seed: u64) -> faer::Mat<S> {
let mut rng = StdRng::seed_from_u64(seed);
let mut a = faer::Mat::zeros(n, n);
for i in 0..n {
for j in 0..=i {
let v = rng.gen_range(-1.0..1.0);
a[(i, j)] = S::from_real(v);
a[(j, i)] = S::from_real(v);
}
}
for i in 0..n {
a[(i, i)] = a[(i, i)] + S::from_real(n as f64 + 1.0);
}
a
}
#[cfg(not(feature = "complex"))]
fn cg_unpreconditioned(a: &faer::Mat<S>, b: &[S], x0: &[S], max_iter: usize) -> (usize, f64) {
let n = b.len();
let mut x = x0.to_vec();
let mut r = vec![S::zero(); n];
let mut p = vec![S::zero(); n];
let mut ap = vec![S::zero(); n];
mat_vec_mul_inplace(a, &x, &mut r);
for i in 0..n {
r[i] = b[i] - r[i];
}
p.copy_from_slice(&r);
let mut rr = dot(&r, &r);
let mut iters = 0;
while iters < max_iter && rr > 1e-20 {
mat_vec_mul_inplace(a, &p, &mut ap);
let denom = dot(&p, &ap);
if denom.abs() < 1e-30 {
break;
}
let alpha = rr / denom;
for i in 0..n {
x[i] = x[i] + alpha * p[i];
r[i] = r[i] - alpha * ap[i];
}
let rr_new = dot(&r, &r);
if rr_new.sqrt() < 1e-10 {
rr = rr_new;
iters += 1;
break;
}
let beta = rr_new / rr;
for i in 0..n {
p[i] = r[i] + beta * p[i];
}
rr = rr_new;
iters += 1;
}
(iters, rr.sqrt())
}
#[cfg(not(feature = "complex"))]
fn cg_left_preconditioned(
a: &faer::Mat<S>,
pc: &Ilu,
b: &[S],
x0: &[S],
max_iter: usize,
) -> (usize, f64) {
let n = b.len();
let mut x = x0.to_vec();
let mut r = vec![S::zero(); n];
let mut z = vec![S::zero(); n];
let mut p = vec![S::zero(); n];
let mut ap = vec![S::zero(); n];
mat_vec_mul_inplace(a, &x, &mut r);
for i in 0..n {
r[i] = b[i] - r[i];
}
pc.apply(PcSide::Left, &r, &mut z).expect("pc apply");
p.copy_from_slice(&z);
let mut rz = dot(&r, &z);
let mut iters = 0;
while iters < max_iter && rz.abs() > 1e-20 {
mat_vec_mul_inplace(a, &p, &mut ap);
let denom = dot(&p, &ap);
if denom.abs() < 1e-30 {
break;
}
let alpha = rz / denom;
for i in 0..n {
x[i] = x[i] + alpha * p[i];
r[i] = r[i] - alpha * ap[i];
}
let r_norm = dot(&r, &r).sqrt();
if r_norm < 1e-10 {
rz = r_norm;
iters += 1;
break;
}
pc.apply(PcSide::Left, &r, &mut z).expect("pc apply");
let rz_new = dot(&r, &z);
let beta = rz_new / rz;
for i in 0..n {
p[i] = z[i] + beta * p[i];
}
rz = rz_new;
iters += 1;
}
(iters, dot(&r, &r).sqrt())
}
#[cfg(not(feature = "complex"))]
fn dot(x: &[S], y: &[S]) -> f64 {
x.iter().zip(y.iter()).map(|(&a, &b)| a * b).sum()
}
#[cfg(feature = "rayon")]
fn make_tridiag_matrix(n: usize) -> faer::Mat<f64> {
faer::Mat::from_fn(n, n, |i, j| {
if i == j {
4.0
} else if (i as isize - j as isize).abs() == 1 {
-1.0
} else {
0.0
}
})
}
#[test]
fn test_ilu_default_creation() {
let ilu = Ilu::new();
assert_eq!(ilu.config.ilu_type, IluType::ILU0);
}
#[test]
fn test_ilu_builder() {
let ilu = IluBuilder::new()
.ilu_type(IluType::ILUT)
.drop_tolerance(1e-6)
.enable_logging()
.build()
.unwrap();
assert_eq!(ilu.config.ilu_type, IluType::ILUT);
assert_eq!(ilu.config.drop_tolerance, 1e-6);
assert_eq!(ilu.config.logging_level, 1);
}
#[test]
fn test_ilu_config_validation() {
let mut config = IluConfig::default();
config.drop_tolerance = -1.0;
let result = Ilu::new_with_config(config);
assert!(result.is_err());
}
#[test]
fn test_ilu0_simple_matrix() {
let matrix = faer::Mat::from_fn(3, 3, |i, j| {
if i == j {
4.0
} else if (i as i32 - j as i32).abs() == 1 {
-1.0
} else {
0.0
}
});
let mut ilu = Ilu::new();
use crate::preconditioner::legacy::Preconditioner;
let result = ilu.setup(&matrix);
assert!(result.is_ok());
let stats = ilu.get_stats();
assert!(stats.setup_complexity > 0.0);
assert_eq!(stats.num_zero_pivots, 0);
}
#[test]
fn test_enhanced_pivot_handling() {
let matrix = faer::Mat::from_fn(3, 3, |i, j| {
if i == j && i == 1 {
1e-15 } else if i == j {
1.0
} else {
0.0
}
});
let config = IluConfig::default();
let mut ilu = Ilu::new_with_config(config).unwrap();
use crate::preconditioner::legacy::Preconditioner;
let result = ilu.setup(&matrix);
assert!(result.is_ok());
assert!(ilu.pivot_stats().num_floors > 0);
}
#[cfg(not(feature = "complex"))]
#[test]
fn test_ilu_variants() {
let _matrix = faer::Mat::from_fn(3, 3, |i, j| {
if i == j {
4.0
} else if (i as i32 - j as i32).abs() == 1 {
-1.0
} else {
0.0
}
});
let ilu_k = Ilu::create_quick(IluType::ILUK, 1.0).unwrap();
assert_eq!(ilu_k.config.ilu_type, IluType::ILUK);
assert_eq!(ilu_k.config.level_of_fill, 1);
let ilu_t = Ilu::create_quick(IluType::ILUT, 1e-6).unwrap();
assert_eq!(ilu_t.config.ilu_type, IluType::ILUT);
assert_eq!(ilu_t.config.drop_tolerance, 1e-6);
}
#[test]
fn test_triangular_solve_options() {
let matrix = faer::Mat::from_fn(2, 2, |i, j| if i == j { 2.0 } else { 0.0 });
let mut config = IluConfig::default();
config.triangular_solve = TriSolveType::GaussSeidel;
config.lower_jacobi_iters = 2;
config.upper_jacobi_iters = 2;
let mut ilu = Ilu::new_with_config(config).unwrap();
use crate::preconditioner::legacy::Preconditioner;
let result = ilu.setup(&matrix);
assert!(result.is_ok());
}
#[cfg(not(feature = "complex"))]
#[test]
fn test_specialized_factory() {
let config = IluConfig {
ilu_type: IluType::ILUK,
level_of_fill: 2,
..Default::default()
};
let ilu_box = Ilu::create_specialized(config);
assert!(ilu_box.is_ok());
}
#[test]
fn test_parallel_configuration() {
let ilu = IluBuilder::new()
.enable_parallel()
.parallel_chunk_size(128)
.build()
.unwrap();
assert!(ilu.config.enable_parallel_factorization);
assert!(ilu.config.enable_parallel_triangular_solve);
assert_eq!(ilu.config.parallel_chunk_size, 128);
}
#[test]
fn test_workspace_optimization() {
let matrix = faer::Mat::from_fn(3, 3, |i, j| {
if i == j {
4.0
} else if (i as i32 - j as i32).abs() == 1 {
-1.0
} else {
0.0
}
});
let mut ilu = IluBuilder::new().ilu_type(IluType::ILU0).build().unwrap();
use crate::preconditioner::legacy::Preconditioner;
let result = ilu.setup(&matrix);
assert!(result.is_ok());
assert!(ilu.workspace.size > 0);
let x = vec![1.0, 2.0, 3.0];
let mut y = vec![0.0; 3];
use crate::preconditioner::PcSide;
let apply_result = ilu.apply(PcSide::Left, &x, &mut y);
assert!(apply_result.is_ok());
}
#[cfg(feature = "rayon")]
#[test]
fn test_parallel_factorization() {
let matrix = faer::Mat::from_fn(10, 10, |i, j| {
if i == j {
4.0
} else if (i as i32 - j as i32).abs() == 1 {
-1.0
} else {
0.0
}
});
let mut ilu_serial = IluBuilder::new().ilu_type(IluType::ILU0).build().unwrap();
let mut ilu_parallel = IluBuilder::new()
.ilu_type(IluType::ILU0)
.enable_parallel_factorization()
.parallel_chunk_size(2) .build()
.unwrap();
use crate::preconditioner::legacy::Preconditioner;
let serial_result = ilu_serial.setup(&matrix);
assert!(serial_result.is_ok());
let parallel_result = ilu_parallel.setup(&matrix);
assert!(parallel_result.is_ok());
let serial_stats = ilu_serial.get_stats();
let parallel_stats = ilu_parallel.get_stats();
assert!(
parallel_stats.nnz_l <= serial_stats.nnz_l,
"block ILU should not increase L nz count: serial={} parallel={}",
serial_stats.nnz_l,
parallel_stats.nnz_l
);
assert!(
parallel_stats.nnz_u <= serial_stats.nnz_u,
"block ILU should not increase U nz count: serial={} parallel={}",
serial_stats.nnz_u,
parallel_stats.nnz_u
);
}
#[cfg(feature = "rayon")]
#[test]
fn block_ilu_single_block_matches_serial() {
let n = 6;
let matrix = make_tridiag_matrix(n);
let mut ilu_serial = IluBuilder::new().ilu_type(IluType::ILU0).build().unwrap();
ilu_serial.setup(&matrix).unwrap();
let mut config = IluConfig::default();
config.enable_parallel_factorization = true;
config.parallel_chunk_size = n;
let mut ilu_block = Ilu::new_with_config(config).unwrap();
ilu_block.setup(&matrix).unwrap();
let serial_stats = ilu_serial.get_stats();
let block_stats = ilu_block.get_stats();
assert_eq!(
block_stats.nnz_l, serial_stats.nnz_l,
"single-block parallel should match serial L pattern"
);
assert_eq!(
block_stats.nnz_u, serial_stats.nnz_u,
"single-block parallel should match serial U pattern"
);
let rhs: Vec<f64> = (0..n).map(|i| (i + 1) as f64).collect();
let mut y_serial = vec![0.0; n];
let mut y_block = vec![0.0; n];
ilu_serial.apply(PcSide::Left, &rhs, &mut y_serial).unwrap();
ilu_block.apply(PcSide::Left, &rhs, &mut y_block).unwrap();
for (&a, &b) in y_serial.iter().zip(y_block.iter()) {
assert!((a - b).abs() < 1e-12, "serial {} vs block {}", a, b);
}
}
#[cfg(feature = "rayon")]
#[test]
fn block_ilu_chunk_size_one_is_diag() {
let n = 5;
let matrix = make_tridiag_matrix(n);
let mut config = IluConfig::default();
config.enable_parallel_factorization = true;
config.parallel_chunk_size = 1;
let mut ilu_block = Ilu::new_with_config(config).unwrap();
ilu_block.setup(&matrix).unwrap();
let stats = ilu_block.get_stats();
assert_eq!(stats.nnz_l, n);
assert_eq!(stats.nnz_u, n);
let rhs = vec![1.0; n];
let mut sol = vec![0.0; n];
ilu_block.apply(PcSide::Left, &rhs, &mut sol).unwrap();
for &val in sol.iter() {
assert!(!val.is_nan());
}
}
#[cfg(feature = "rayon")]
#[test]
fn triangular_parallel_matches_sequential() {
let matrix = make_tridiag_matrix(5);
let mut cfg_seq = IluConfig::default();
cfg_seq.triangular_solve = TriSolveType::Exact;
cfg_seq.enable_parallel_triangular_solve = false;
let mut cfg_par = IluConfig::default();
cfg_par.triangular_solve = TriSolveType::Exact;
cfg_par.enable_parallel_triangular_solve = true;
cfg_par.parallel_chunk_size = 1;
let mut ilu_seq = Ilu::new_with_config(cfg_seq).unwrap();
ilu_seq.setup(&matrix).unwrap();
let mut ilu_par = Ilu::new_with_config(cfg_par).unwrap();
ilu_par.setup(&matrix).unwrap();
let n = matrix.nrows();
let x: Vec<f64> = (0..n).map(|i| (i + 1) as f64).collect();
let mut y_seq = vec![0.0; n];
let mut y_par = vec![0.0; n];
ilu_seq.apply(PcSide::Left, &x, &mut y_seq).unwrap();
ilu_par.apply(PcSide::Left, &x, &mut y_par).unwrap();
for (&seq_val, &par_val) in y_seq.iter().zip(y_par.iter()) {
assert!(
(seq_val - par_val).abs() < 1e-12,
"seq={seq_val}, par={par_val} for n={n}"
);
}
}
#[cfg(feature = "rayon")]
#[test]
fn triangular_parallel_matches_sequential_large() {
let n = 64;
let matrix = make_tridiag_matrix(n);
let mut cfg_seq = IluConfig::default();
cfg_seq.triangular_solve = TriSolveType::Exact;
cfg_seq.enable_parallel_triangular_solve = false;
let mut cfg_par = IluConfig::default();
cfg_par.triangular_solve = TriSolveType::Exact;
cfg_par.enable_parallel_triangular_solve = true;
cfg_par.parallel_chunk_size = 16;
let mut ilu_seq = Ilu::new_with_config(cfg_seq).unwrap();
ilu_seq.setup(&matrix).unwrap();
let mut ilu_par = Ilu::new_with_config(cfg_par).unwrap();
ilu_par.setup(&matrix).unwrap();
let x: Vec<f64> = (0..n).map(|i| (i + 1) as f64).collect();
let mut y_seq = vec![0.0; n];
let mut y_par = vec![0.0; n];
ilu_seq.apply(PcSide::Left, &x, &mut y_seq).unwrap();
ilu_par.apply(PcSide::Left, &x, &mut y_par).unwrap();
for (&seq_val, &par_val) in y_seq.iter().zip(y_par.iter()) {
assert!(
(seq_val - par_val).abs() < 1e-10,
"seq={seq_val}, par={par_val} for n={n}"
);
}
}
#[test]
fn test_distributed_configuration() {
let ilu = IluBuilder::new().enable_distributed().build().unwrap();
assert!(ilu.config.enable_distributed);
}
#[test]
#[cfg(not(feature = "complex"))]
fn ilu0_real_factorization_solves_spd() {
let matrix = make_spd_3x3();
let x_true = vec![S::from_real(1.0), S::from_real(2.0), S::from_real(-1.0)];
let b = mat_vec_mul(&matrix, &x_true);
let mut ilu = Ilu::new();
ilu.setup(&matrix).expect("ILU(0) setup");
let mut x = vec![S::zero(); b.len()];
ilu.apply(PcSide::Left, &b, &mut x).expect("ILU(0) apply");
let r: Vec<S> = mat_vec_mul(&matrix, &x)
.into_iter()
.zip(b.iter())
.map(|(ax, &bi)| ax - bi)
.collect();
let res_norm = par_sum_abs2_local(&r).sqrt();
assert!(
res_norm.real() < 1e-10,
"residual too large: {:?}",
res_norm
);
}
#[cfg(not(feature = "complex"))]
#[test]
fn ilu0_improves_cg_for_random_spd() {
let n = 20;
let a = random_spd(n, 12345);
let b = vec![S::from_real(1.0); n];
let x0 = vec![S::zero(); n];
let (iters_unpre, res_unpre) = cg_unpreconditioned(&a, &b, &x0, 200);
let mut ilu = Ilu::new();
ilu.setup(&a).expect("ilu0 setup");
let (iters_pc, res_pc) = cg_left_preconditioned(&a, &ilu, &b, &x0, 200);
assert!(
res_pc < res_unpre * 0.5 || iters_pc < iters_unpre,
"preconditioned res={res_pc} iters={iters_pc}, baseline res={res_unpre} iters={iters_unpre}"
);
}
#[test]
fn parilu_refines_residual() {
let matrix = faer::Mat::<f64>::from_fn(3, 3, |i, j| match (i, j) {
(0, 0) => 4.0,
(0, 1) | (1, 0) => 1.0,
(1, 1) => 3.0,
(1, 2) | (2, 1) => 1.0,
(2, 2) => 2.0,
_ => 0.0,
});
let rhs = vec![1.0f64; 3];
let mut baseline = Ilu::new();
baseline.setup(&matrix).expect("baseline ILU");
let mut y_base = vec![0.0; 3];
baseline
.apply(PcSide::Left, &rhs, &mut y_base)
.expect("apply baseline");
let res_base = {
let mut sum = 0.0;
for i in 0..matrix.nrows() {
let mut ax = 0.0;
for j in 0..matrix.ncols() {
ax += matrix[(i, j)] * y_base[j];
}
let r = ax - rhs[i];
sum += r * r;
}
sum.sqrt()
};
let mut cfg = IluConfig::default();
cfg.parilu_enabled = true;
cfg.parilu_max_iters = 5;
cfg.parilu_min_iters = 1;
cfg.parilu_tol = 1e-8;
let mut parilu = Ilu::new_with_config(cfg).unwrap();
parilu.setup(&matrix).expect("parilu ILU");
let mut y_parilu = vec![0.0; 3];
parilu
.apply(PcSide::Left, &rhs, &mut y_parilu)
.expect("apply parilu");
let res_parilu = {
let mut sum = 0.0;
for i in 0..matrix.nrows() {
let mut ax = 0.0;
for j in 0..matrix.ncols() {
ax += matrix[(i, j)] * y_parilu[j];
}
let r = ax - rhs[i];
sum += r * r;
}
sum.sqrt()
};
assert!(
res_parilu <= res_base + 1e-10,
"ParILU should not worsen residual (baseline {res_base}, parilu {res_parilu})"
);
let hist = parilu.parilu_history().unwrap();
assert!(!hist.is_empty());
}
#[test]
fn ilu_rejects_nan_inf_when_ieee_checks_enabled() {
let mut a = faer::Mat::zeros(3, 3);
a[(0, 0)] = 1.0;
a[(1, 1)] = f64::NAN;
a[(2, 2)] = f64::INFINITY;
let mut cfg = IluConfig::default();
cfg.ieee_checks = true;
let mut ilu = Ilu::new_with_config(cfg).unwrap();
let err = ilu.setup(&a).unwrap_err();
match err {
KError::InvalidInput(msg) => {
assert!(
msg.contains("NaN") || msg.contains("Infinity"),
"unexpected message: {msg}"
)
}
other => panic!("expected InvalidInput, got {other:?}"),
}
}
#[cfg(feature = "rayon")]
#[test]
fn parilu_parallel_matches_serial_small() {
let n = 8;
let matrix = make_tridiag_matrix(n);
let mut cfg_serial = IluConfig::default();
cfg_serial.parilu_enabled = true;
cfg_serial.parilu_max_iters = 3;
cfg_serial.parilu_tol = 0.0;
let mut cfg_par = cfg_serial.clone();
cfg_par.enable_parallel_factorization = true;
let mut ilu_serial = Ilu::new_with_config(cfg_serial).unwrap();
ilu_serial.setup(&matrix).unwrap();
let mut ilu_par = Ilu::new_with_config(cfg_par).unwrap();
ilu_par.setup(&matrix).unwrap();
let h_serial = ilu_serial.parilu_history().unwrap();
let h_par = ilu_par.parilu_history().unwrap();
assert_eq!(h_serial.len(), h_par.len());
let last_serial = h_serial.last().unwrap().residual;
let last_par = h_par.last().unwrap().residual;
assert!(
(last_serial - last_par).abs() < 1e-6,
"serial {last_serial} vs parallel {last_par}"
);
}
#[cfg(feature = "rayon")]
#[test]
fn parallel_triangular_stress() {
let n = 1000;
let matrix = make_tridiag_matrix(n);
let mut ilu = IluBuilder::new()
.ilu_type(IluType::ILU0)
.enable_parallel_triangular_solve()
.build()
.unwrap();
ilu.setup(&matrix).unwrap();
let rhs = Arc::new(vec![1.0; n]);
let ilu = Arc::new(ilu);
(0..100).into_par_iter().for_each(|_| {
let mut y = vec![0.0; n];
ilu.apply(PcSide::Left, &*rhs, &mut y).unwrap();
assert!(y.iter().all(|v| v.is_finite()));
});
}
}
#[cfg(all(test, feature = "complex"))]
mod tests_complex_bridge {
use super::Ilu;
use crate::algebra::bridge::BridgeScratch;
use crate::algebra::scalar::KrystScalar;
use crate::algebra::scalar::S as GlobalScalar;
use crate::ops::kpc::KPreconditioner;
use crate::preconditioner::PcSide;
use crate::preconditioner::legacy::Preconditioner as LegacyPc;
use faer::Mat;
#[test]
fn apply_s_matches_real_path_for_ilu() {
let matrix = Mat::from_fn(2, 2, |i, j| match (i, j) {
(0, 0) => 4.0,
(0, 1) | (1, 0) => 1.0,
(1, 1) => 3.0,
_ => 0.0,
});
let mut ilu = Ilu::new();
LegacyPc::setup(&mut ilu, &matrix).expect("ilu setup");
let rhs_real = vec![1.0f64, 2.0];
let mut out_real = vec![0.0; rhs_real.len()];
LegacyPc::apply(&ilu, PcSide::Left, &rhs_real, &mut out_real).expect("ilu real apply");
let rhs_s: Vec<GlobalScalar> = rhs_real
.iter()
.copied()
.map(GlobalScalar::from_real)
.collect();
let mut out_s = vec![GlobalScalar::zero(); rhs_s.len()];
let mut scratch = BridgeScratch::default();
ilu.apply_s(PcSide::Left, &rhs_s, &mut out_s, &mut scratch)
.expect("ilu apply_s");
for (ys, &yr) in out_s.iter().zip(out_real.iter()) {
assert!((ys.real() - yr).abs() < 1e-12);
}
}
}
#[cfg(test)]
pub mod benchmarks {
use super::*;
use std::time::Instant;
#[derive(Debug, Default)]
pub struct AllocationStats {
pub total_allocations: usize,
pub total_bytes: usize,
pub peak_memory: usize,
pub solve_allocations: usize,
}
pub fn benchmark_ilu_factorization(
matrix_size: usize,
nnz_per_row: usize,
) -> (f64, AllocationStats) {
let matrix = create_sparse_test_matrix(matrix_size, nnz_per_row);
let start = Instant::now();
let mut ilu = IluBuilder::new()
.ilu_type(IluType::ILU0)
.enable_parallel_factorization()
.build()
.unwrap();
let setup_result = ilu.setup(&matrix);
let factorization_time = start.elapsed().as_secs_f64();
assert!(setup_result.is_ok());
let stats = AllocationStats {
total_allocations: 1, total_bytes: matrix_size * matrix_size * 8, peak_memory: matrix_size * matrix_size * 8,
solve_allocations: 0,
};
(factorization_time, stats)
}
pub fn benchmark_ilu_solve_phase(
matrix_size: usize,
num_solves: usize,
) -> (f64, AllocationStats) {
let matrix = create_sparse_test_matrix(matrix_size, 3);
let mut ilu = IluBuilder::new().ilu_type(IluType::ILU0).build().unwrap();
ilu.setup(&matrix).unwrap();
let rhs = vec![1.0; matrix_size];
let mut solution = vec![0.0; matrix_size];
ilu.apply(PcSide::Left, &rhs, &mut solution).unwrap();
let start = Instant::now();
for _ in 0..num_solves {
ilu.apply(PcSide::Left, &rhs, &mut solution).unwrap();
}
let solve_time = start.elapsed().as_secs_f64();
let stats = AllocationStats {
total_allocations: 0, total_bytes: 0,
peak_memory: matrix_size * 16, solve_allocations: 0, };
(solve_time, stats)
}
fn create_sparse_test_matrix(size: usize, nnz_per_row: usize) -> faer::Mat<f64> {
let mut matrix = faer::Mat::zeros(size, size);
for i in 0..size {
matrix[(i, i)] = 4.0;
let mut count = 1; for offset in 1..=(nnz_per_row / 2) {
if i >= offset && count < nnz_per_row {
matrix[(i, i - offset)] = -1.0;
count += 1;
}
if i + offset < size && count < nnz_per_row {
matrix[(i, i + offset)] = -1.0;
count += 1;
}
}
}
matrix
}
pub fn benchmark_storage_comparison(matrix_size: usize) -> (f64, f64, usize, usize) {
let matrix = create_sparse_test_matrix(matrix_size, 5);
let start = Instant::now();
let mut ilu_dense = IluBuilder::new().ilu_type(IluType::ILU0).build().unwrap();
ilu_dense.setup(&matrix).unwrap();
let dense_time = start.elapsed().as_secs_f64();
let dense_memory = matrix_size * matrix_size * 8 * 2; let sparse_memory = ilu_dense.nnz_l * 8 + ilu_dense.nnz_u * 8;
let sparse_time = dense_time;
(dense_time, sparse_time, dense_memory, sparse_memory)
}
#[test]
fn test_benchmark_small_matrix() {
let (factorization_time, stats) = benchmark_ilu_factorization(100, 5);
println!("Factorization time: {:.6}s", factorization_time);
println!("Memory stats: {:?}", stats);
assert!(factorization_time > 0.0);
}
#[test]
fn test_benchmark_solve_phase() {
let (solve_time, stats) = benchmark_ilu_solve_phase(50, 100);
println!("Solve time for 100 solves: {:.6}s", solve_time);
println!("Solve allocation stats: {:?}", stats);
assert!(solve_time > 0.0);
assert_eq!(stats.solve_allocations, 0); }
#[test]
fn test_storage_comparison() {
let (dense_time, sparse_time, dense_mem, sparse_mem) = benchmark_storage_comparison(50);
println!("Dense: {:.6}s, {}KB", dense_time, dense_mem / 1024);
println!("Sparse: {:.6}s, {}KB", sparse_time, sparse_mem / 1024);
assert!(sparse_mem < dense_mem); }
}