faer/linalg/gevd/
mod.rs

1//! low level implementation of the generalized eigenvalue decomposition of a square matrix pair.
2//!
3//! the generalized eigenvalue decomposition of a matrix pair $(A, B)$ of shape $(n, n)$ is a
4//! decomposition into components $S$, $U$ such that:
5//!
6//! - $U$ has shape $(n, n)$ and is invertible
7//! - $S$ has shape $(n, n)$ and is a diagonal matrix
8//! - and finally:
9//!
10//! $$A = B U S U^{-1}$$
11//!
12//! if $A$ is self-adjoint and $B$ is positive definite, then $U$ can be made unitary ($U^{-1} =
13//! U^H$), and $S$ is real valued.
14//!
15//! the implementation can choose to compute the left and/or right generalized eigenvectors
16//! separately, and the generalized eigenvalues (diagonal of $S$) are represented as a ratio $\alpha
17//! / \beta$ to improve numerical stability.
18
19use crate::internal_prelude::*;
20use complex::Complex;
21use equator::assert;
22
23pub use linalg::evd::ComputeEigenvectors;
24
25/// eigendecomposition error
26#[derive(Copy, Clone, Debug, PartialEq, Eq)]
27pub enum GevdError {
28	/// reached max iterations
29	NoConvergence,
30}
31
32/// eigendecomposition error
33#[derive(Copy, Clone, Debug, PartialEq, Eq)]
34pub enum SelfAdjointGevdError {
35	/// reached max iterations
36	NoConvergence,
37	/// B matrix not positive definite
38	NonPositiveDefinite,
39}
40
41/// generalized schur decomposition parameters
42#[derive(Clone, Copy, Debug)]
43pub struct GeneralizedSchurParams {
44	/// An estimate of the relative cost of flops within the near-the-diagonal shift chase compared
45	/// to flops within the matmul calls of a QZ sweep.
46	pub relative_cost_estimate_of_shift_chase_to_matmul: fn(matrix_dimension: usize, active_block_dimension: usize) -> usize,
47	/// Function that returns the number of shifts to use for a given matrix size
48	pub recommended_shift_count: fn(matrix_dimension: usize, active_block_dimension: usize) -> usize,
49	/// Function that returns the deflation window to use for a given matrix size
50	pub recommended_deflation_window: fn(matrix_dimension: usize, active_block_dimension: usize) -> usize,
51	/// Threshold to switch between blocked and unblocked code
52	pub blocking_threshold: usize,
53	/// Threshold of percent of aggressive-early-deflation window that must converge to skip a
54	/// sweep
55	pub nibble_threshold: usize,
56
57	#[doc(hidden)]
58	pub non_exhaustive: NonExhaustive,
59}
60
61/// schur to eigendecomposition conversion parameters
62#[derive(Clone, Copy, Debug)]
63pub struct GevdFromSchurParams {
64	#[doc(hidden)]
65	pub non_exhaustive: NonExhaustive,
66}
67
68/// eigendecomposition tuning parameters
69#[derive(Clone, Copy, Debug)]
70pub struct GevdParams {
71	/// hessenberg parameters
72	pub hessenberg: gen_hessenberg::GeneralizedHessenbergParams,
73	/// schur from hessenberg conversion parameters
74	pub schur: GeneralizedSchurParams,
75	/// eigendecomposition from schur conversion parameters
76	pub evd_from_schur: GevdFromSchurParams,
77
78	#[doc(hidden)]
79	pub non_exhaustive: NonExhaustive,
80}
81
82fn default_relative_cost_estimate_of_shift_chase_to_matmul(n: usize, nh: usize) -> usize {
83	_ = (n, nh);
84	10
85}
86
87impl<T: ComplexField> Auto<T> for GeneralizedSchurParams {
88	fn auto() -> Self {
89		let schur: linalg::evd::SchurParams = auto!(T);
90
91		Self {
92			relative_cost_estimate_of_shift_chase_to_matmul: default_relative_cost_estimate_of_shift_chase_to_matmul,
93			recommended_shift_count: schur.recommended_shift_count,
94			recommended_deflation_window: schur.recommended_deflation_window,
95			blocking_threshold: schur.blocking_threshold,
96			nibble_threshold: schur.nibble_threshold,
97			non_exhaustive: NonExhaustive(()),
98		}
99	}
100}
101
102impl<T: ComplexField> Auto<T> for GevdParams {
103	fn auto() -> Self {
104		Self {
105			hessenberg: auto!(T),
106			schur: auto!(T),
107			evd_from_schur: auto!(T),
108			non_exhaustive: NonExhaustive(()),
109		}
110	}
111}
112
113impl<T: ComplexField> Auto<T> for GevdFromSchurParams {
114	fn auto() -> Self {
115		Self {
116			non_exhaustive: NonExhaustive(()),
117		}
118	}
119}
120
121/// hessenberg decomposition
122pub mod gen_hessenberg;
123
124/// $QZ$ decomposition for complex matrices
125pub mod qz_cplx;
126/// $QZ$ decomposition for real matrices
127pub mod qz_real;
128
129#[track_caller]
130#[math]
131fn compute_gevd_generic<T: ComplexField>(
132	A: MatMut<'_, T>,
133	B: MatMut<'_, T>,
134	alpha_re: ColMut<'_, T>,
135	alpha_im: ColMut<'_, T>,
136	beta: ColMut<'_, T>,
137	u_left: Option<MatMut<'_, T>>,
138	u_right: Option<MatMut<'_, T>>,
139	par: Par,
140	stack: &mut MemStack,
141	params: GevdParams,
142
143	hessenberg_to_qz: fn(
144		A: MatMut<'_, T>,
145		B: MatMut<'_, T>,
146		Q: Option<MatMut<'_, T>>,
147		Z: Option<MatMut<'_, T>>,
148		alphar: ColMut<'_, T>,
149		alphai: ColMut<'_, T>,
150		beta: ColMut<'_, T>,
151		eigenvectors: ComputeEigenvectors,
152		par: Par,
153		params: GeneralizedSchurParams,
154		stack: &mut MemStack,
155	),
156	qz_to_gevd: fn(A: MatRef<'_, T>, B: MatRef<'_, T>, Q: Option<MatMut<'_, T>>, Z: Option<MatMut<'_, T>>, par: Par, stack: &mut MemStack),
157) -> Result<(), GevdError> {
158	let n = A.nrows();
159	assert!(all(
160		A.nrows() == n,
161		A.ncols() == n,
162		B.nrows() == n,
163		B.ncols() == n,
164		alpha_re.nrows() == n,
165		if const { T::IS_REAL } { alpha_im.nrows() } else { n } == n,
166		beta.nrows() == n,
167	));
168	if let Some(u_left) = u_left.rb() {
169		assert!(all(u_left.nrows() == n, u_left.ncols() == n));
170	}
171	if let Some(u_right) = u_right.rb() {
172		assert!(all(u_right.nrows() == n, u_right.ncols() == n));
173	}
174
175	if n == 0 {
176		return Ok(());
177	}
178
179	#[cfg(feature = "perf-warn")]
180	{
181		for u in [u_left.rb(), u_right.rb()] {
182			if let Some(matrix) = u.rb() {
183				if matrix.row_stride().unsigned_abs() != 1 && crate::__perf_warn!(GEVD_WARN) {
184					if matrix.col_stride().unsigned_abs() == 1 {
185						log::warn!(target: "faer_perf", "GEVD prefers column-major eigenvector matrix. Found row-major matrix.");
186					} else {
187						log::warn!(target: "faer_perf", "GEVD prefers column-major eigenvector matrix. Found matrix with generic strides.");
188					}
189				}
190			}
191		}
192
193		for M in [A.rb(), B.rb()] {
194			if M.row_stride().unsigned_abs() != 1 && crate::__perf_warn!(GEVD_WARN) {
195				if M.col_stride().unsigned_abs() == 1 {
196					log::warn!(target: "faer_perf", "GEVD prefers column-major input matrix. Found row-major matrix.");
197				} else {
198					log::warn!(target: "faer_perf", "GEVD prefers column-major input matrix. Found matrix with generic strides.");
199				}
200			}
201		}
202	}
203
204	if !(A.is_all_finite() && B.is_all_finite()) {
205		{ alpha_re }.fill(nan());
206		{ alpha_im }.fill(nan());
207		{ beta }.fill(nan());
208		for u in [u_left, u_right] {
209			if let Some(mut u) = u {
210				u.fill(nan());
211			}
212		}
213		return Err(GevdError::NoConvergence);
214	}
215	let need_qz = u_left.is_some() || u_right.is_some();
216
217	let mut A = A;
218	let mut B = B;
219	let mut u_left = u_left;
220	let mut u_right = u_right;
221	let mut alpha_re = alpha_re;
222	let mut alpha_im = alpha_im;
223	let mut beta = beta;
224
225	for u in [u_left.rb_mut(), u_right.rb_mut()] {
226		if let Some(u) = u {
227			u.diagonal_mut().column_vector_mut().fill(one());
228		}
229	}
230
231	{
232		let block_size = linalg::qr::no_pivoting::factor::recommended_block_size::<T>(n, n);
233		let (mut householder, stack) = unsafe { linalg::temp_mat_uninit::<T, _, _>(block_size, n, stack) };
234		let mut householder = householder.as_mat_mut();
235		linalg::qr::no_pivoting::factor::qr_in_place(B.rb_mut(), householder.rb_mut(), par, stack, default());
236		linalg::householder::apply_block_householder_sequence_transpose_on_the_left_in_place_with_conj(
237			B.rb(),
238			householder.rb(),
239			Conj::Yes,
240			A.rb_mut(),
241			par,
242			stack,
243		);
244
245		if let Some(u_left) = u_left.rb_mut() {
246			linalg::householder::apply_block_householder_sequence_on_the_left_in_place_with_conj(
247				B.rb(),
248				householder.rb(),
249				Conj::No,
250				u_left,
251				par,
252				stack,
253			);
254		}
255
256		zip!(B.rb_mut()).for_each_triangular_lower(linalg::zip::Diag::Skip, |unzip!(x)| *x = zero());
257	}
258
259	gen_hessenberg::generalized_hessenberg(A.rb_mut(), B.rb_mut(), u_left.rb_mut(), u_right.rb_mut(), par, stack, params.hessenberg);
260
261	hessenberg_to_qz(
262		A.rb_mut(),
263		B.rb_mut(),
264		u_left.rb_mut(),
265		u_right.rb_mut(),
266		alpha_re.rb_mut(),
267		alpha_im.rb_mut(),
268		beta.rb_mut(),
269		if need_qz { ComputeEigenvectors::Yes } else { ComputeEigenvectors::No },
270		par,
271		params.schur,
272		stack,
273	);
274
275	qz_to_gevd(A.rb(), B.rb(), u_left.rb_mut(), u_right.rb_mut(), par, stack);
276	Ok(())
277}
278
279#[math]
280fn solve_shifted_1x1<T: ComplexField>(smin: T::Real, ca: T::Real, A: T, d0: T, B: &mut T, w: T) {
281	let safmin = min_positive::<T::Real>();
282	let smlnum = safmin + safmin;
283	let smin = max(smin, smlnum);
284
285	let CR = mul_real(A, ca) - w * d0;
286
287	let cmax = abs(CR);
288	if cmax < smin {
289		// use smin * I
290		let smin_inv = recip(smin);
291		*B = mul_real(*B, smin_inv);
292	}
293
294	// w is real
295	let C = recip(CR);
296	*B = *B * C;
297}
298
299#[math]
300fn solve_complex_shifted_1x1<T: RealField>(smin: T, ca: T, A: MatRef<'_, T>, d0: T, mut B: MatMut<'_, T>, wr: T, wi: T) {
301	let nw = B.ncols();
302	let safmin = min_positive::<T>();
303	let smlnum = safmin + safmin;
304	let smin = max(smin, smlnum);
305
306	// Compute the real part of  C = ca A - w D
307	let CR = ca * A[(0, 0)] - wr * d0;
308	let CI = -wi * d0;
309
310	let cmax = max(abs(CR), abs(CI));
311	if cmax < smin {
312		// use smin * I
313		let smin_inv = recip(smin);
314		zip!(B.rb_mut()).for_each(|unzip!(x)| *x = *x * smin_inv);
315	}
316
317	if nw == 1 {
318		// w is real
319		let C = recip(CR);
320		zip!(B.rb_mut()).for_each(|unzip!(x)| *x = *x * C);
321	} else {
322		// w is complex
323		let (Br, Bi) = B.two_cols_mut(0, 1);
324		let C = recip(Complex { re: CR, im: CI });
325		zip!(Br, Bi).for_each(|unzip!(re, im)| {
326			(*re, *im) = (*re * C.re - *im * C.im, *re * C.im + *im * C.re);
327		});
328	}
329}
330
331#[math]
332fn solve_complex_shifted_2x2<T: RealField>(smin: T, ca: T, A: MatRef<'_, T>, d0: T, d1: T, mut B: MatMut<'_, T>, wr: T, wi: T, stack: &mut MemStack) {
333	let nw = B.ncols();
334	let zero = zero::<T>;
335	let safmin = min_positive::<T>();
336	let smlnum = safmin + safmin;
337	let smin = max(smin, smlnum);
338
339	// Compute the real part of  C = ca A - w D
340
341	if nw == 1 {
342		// w is real
343
344		let (mut C, stack) = unsafe { linalg::temp_mat_uninit::<T, _, _>(2, 2, stack) };
345		let mut C = C.as_mat_mut();
346		let mut row_perm_fwd = [0usize; 2];
347		let mut row_perm_inv = [0usize; 2];
348		let mut col_perm_fwd = [0usize; 2];
349		let mut col_perm_inv = [0usize; 2];
350
351		C[(0, 0)] = ca * A[(0, 0)] - wr * d0;
352		C[(1, 0)] = ca * A[(1, 0)];
353		C[(0, 1)] = ca * A[(0, 1)];
354		C[(1, 1)] = ca * A[(1, 1)] - wr * d1;
355
356		let cmax = C.norm_max();
357		if cmax < smin {
358			// use smin * I
359			let smin_inv = recip(smin);
360			zip!(B.rb_mut()).for_each(|unzip!(x)| *x = *x * smin_inv);
361		}
362		let (_, row_perm, col_perm) = linalg::lu::full_pivoting::factor::lu_in_place(
363			C.rb_mut(),
364			&mut row_perm_fwd,
365			&mut row_perm_inv,
366			&mut col_perm_fwd,
367			&mut col_perm_inv,
368			Par::Seq,
369			stack,
370			Default::default(),
371		);
372		linalg::lu::full_pivoting::solve::solve_in_place(C.rb(), C.rb(), row_perm, col_perm, B.rb_mut(), Par::Seq, stack);
373	} else {
374		// w is complex
375
376		let (mut C, stack) = unsafe { linalg::temp_mat_uninit::<Complex<T>, _, _>(2, 2, stack) };
377		let mut C = C.as_mat_mut();
378		let mut row_perm_fwd = [0usize; 2];
379		let mut row_perm_inv = [0usize; 2];
380		let mut col_perm_fwd = [0usize; 2];
381		let mut col_perm_inv = [0usize; 2];
382
383		C[(0, 0)] = Complex {
384			re: ca * A[(0, 0)] - wr * d0,
385			im: -wi * d0,
386		};
387
388		C[(1, 0)] = Complex {
389			re: ca * A[(1, 0)],
390			im: zero(),
391		};
392
393		C[(0, 1)] = Complex {
394			re: ca * A[(0, 1)],
395			im: zero(),
396		};
397
398		C[(1, 1)] = Complex {
399			re: ca * A[(1, 1)] - wr * d1,
400			im: -wi * d1,
401		};
402
403		let cmax = C.norm_max();
404		if cmax < smin {
405			// use smin * I
406			let smin_inv = recip(smin);
407			zip!(B.rb_mut()).for_each(|unzip!(x)| *x = *x * smin_inv);
408		}
409
410		let (_, row_perm, col_perm) = linalg::lu::full_pivoting::factor::lu_in_place(
411			C.rb_mut(),
412			&mut row_perm_fwd,
413			&mut row_perm_inv,
414			&mut col_perm_fwd,
415			&mut col_perm_inv,
416			Par::Seq,
417			stack,
418			Default::default(),
419		);
420
421		let n = B.nrows();
422		let (Br, Bi) = B.two_cols_mut(0, 1);
423		let (mut B, stack) = unsafe { linalg::temp_mat_uninit::<Complex<T>, _, _>(n, 1, stack) };
424		let mut B = B.as_mat_mut().col_mut(0);
425		zip!(&mut B, &Br, &Bi).for_each(|unzip!(z, re, im)| {
426			*z = Complex {
427				re: copy(*re),
428				im: copy(*im),
429			}
430		});
431		linalg::lu::full_pivoting::solve::solve_in_place(C.rb(), C.rb(), row_perm, col_perm, B.rb_mut().as_mat_mut(), Par::Seq, stack);
432
433		zip!(Br, Bi, &B).for_each(|unzip!(re, im, z)| (*re, *im) = (copy(z.re), copy(z.im)));
434	}
435}
436
437#[math]
438fn qz_to_gevd_real<T: RealField>(
439	A: MatRef<'_, T>,
440	B: MatRef<'_, T>,
441	Q: Option<MatMut<'_, T>>,
442	Z: Option<MatMut<'_, T>>,
443	par: Par,
444	stack: &mut MemStack,
445) {
446	let n = A.nrows();
447	if n == 0 {
448		return;
449	}
450
451	let one = one::<T>;
452	let zero = zero::<T>;
453
454	let ulp = eps::<T>();
455	let safmin = min_positive::<T>();
456	let smallnum = safmin * from_f64(n as f64);
457	let small = smallnum * recip(ulp);
458	let bignum = recip(smallnum);
459	let big = recip(small);
460
461	let (mut acolnorm, stack) = linalg::temp_mat_zeroed::<T, _, _>(n, 1, stack);
462	let acolnorm = acolnorm.as_mat_mut();
463	let (mut bcolnorm, stack) = linalg::temp_mat_zeroed::<T, _, _>(n, 1, stack);
464	let bcolnorm = bcolnorm.as_mat_mut();
465
466	let mut acolnorm = acolnorm.col_mut(0);
467	let mut bcolnorm = bcolnorm.col_mut(0);
468	let mut anorm = zero();
469	let mut bnorm = zero();
470
471	let mut j = 0;
472	while j < n {
473		let cplx = j + 1 < n && A[(j + 1, j)] != zero();
474		if !cplx {
475			let a = A.rb().col(j).get(..j).norm_l1();
476			acolnorm[j] = copy(a);
477			anorm = max(anorm, a + abs(A[(j, j)]));
478
479			j += 1;
480		} else {
481			for jc in j..j + 2 {
482				let a = A.rb().col(jc).get(..j).norm_l1();
483				acolnorm[jc] = copy(a);
484				anorm = max(anorm, a + abs(A[(j, jc)]) + abs(A[(j + 1, jc)]));
485			}
486			j += 2
487		}
488	}
489	for j in 0..n {
490		let b = B.rb().col(j).get(..j).norm_l1();
491		bcolnorm[j] = copy(b);
492		bnorm = max(bnorm, b + abs(B[(j, j)]));
493	}
494	let ascale = recip(max(anorm, safmin));
495	let bscale = recip(max(bnorm, safmin));
496
497	// left eigenvectors
498	if let Some(mut u) = Q {
499		let mut je = 0usize;
500		while je < n {
501			let cplx_eigval = je + 1 < n && A[(je + 1, je)] != zero();
502			let nw = if cplx_eigval { 2 } else { 1 };
503
504			if !cplx_eigval && max(abs(A[(je, je)]), abs(B[(je, je)])) < safmin {
505				u.rb_mut().col_mut(je).fill(zero());
506				u[(je, je)] = one();
507
508				je += 1;
509				continue;
510			}
511
512			let mut acoef;
513			let mut acoefa;
514			let mut bcoefa;
515			let mut bcoefr;
516			let mut bcoefi;
517			let mut xmax;
518
519			let (mut rhs, stack) = linalg::temp_mat_zeroed::<T, _, _>(n, nw, stack);
520			let mut rhs = rhs.as_mat_mut();
521
522			if !cplx_eigval {
523				// real eigenvalue
524				let temp = max(max(abs(A[(je, je)]) * ascale, abs(B[(je, je)]) * bscale), safmin);
525				let salfar = (temp * A[(je, je)]) * ascale;
526				let sbeta = (temp * B[(je, je)]) * bscale;
527
528				acoef = sbeta * ascale;
529				bcoefr = salfar * bscale;
530				bcoefi = zero();
531
532				// scale to avoid underflow
533				let mut scale = one();
534				let lsa = abs(sbeta) >= safmin && abs(acoef) < small;
535				let lsb = abs(salfar) >= safmin && abs(bcoefr) < small;
536
537				if lsa {
538					scale = (small / abs(sbeta)) * min(anorm, big);
539				}
540				if lsb {
541					scale = max(scale, (small / abs(salfar)) * min(bnorm, big));
542				}
543				if lsa || lsb {
544					scale = min(scale, one() / (safmin * max(one(), max(abs(acoef), abs(bcoefr)))));
545					if lsa {
546						acoef = ascale * (scale * sbeta)
547					} else {
548						acoef = scale * acoef
549					}
550					if lsb {
551						bcoefr = bscale * (scale * salfar)
552					} else {
553						bcoefr = scale * bcoefr
554					}
555				}
556				acoefa = abs(acoef);
557				bcoefa = abs(bcoefr);
558				rhs[(je, 0)] = one();
559				xmax = one();
560			} else {
561				// complex eigenvalue
562				let (scale, _, wr, _, wi) = qz_real::generalized_eigval_2x2(
563					(copy(A[(je, je)]), copy(A[(je, je + 1)]), copy(A[(je + 1, je)]), copy(A[(je + 1, je + 1)])),
564					(copy(B[(je, je)]), copy(B[(je, je + 1)]), copy(B[(je + 1, je)]), copy(B[(je + 1, je + 1)])),
565				);
566
567				acoef = scale;
568				bcoefr = wr;
569				bcoefi = wi;
570
571				// scale to avoid over/underflow
572				acoefa = abs(acoef);
573				bcoefa = abs(bcoefr) + abs(bcoefi);
574				let mut scale = one();
575				if acoefa * ulp < safmin && acoefa >= safmin {
576					scale = (safmin / ulp) / acoefa
577				}
578				if bcoefa * ulp < safmin && bcoefa >= safmin {
579					scale = max(scale, (safmin / ulp) / bcoefa)
580				}
581				if safmin * acoefa > ascale {
582					scale = ascale / (safmin * acoefa)
583				}
584				if safmin * bcoefa > bscale {
585					scale = min(scale, bscale / (safmin * bcoefa))
586				}
587				if scale != one() {
588					acoef = scale * acoef;
589					acoefa = abs(acoef);
590					bcoefr = scale * bcoefr;
591					bcoefi = scale * bcoefi;
592					bcoefa = abs(bcoefr) + abs(bcoefi);
593				}
594
595				// compute first two components of eigenvector
596
597				let temp = acoef * A[(je + 1, je)];
598				let temp2r = acoef * A[(je, je)] - bcoefr * B[(je, je)];
599				let temp2i = -bcoefi * B[(je, je)];
600				if abs(temp) > abs(temp2r) + abs(temp2i) {
601					rhs[(je, 0)] = one();
602					rhs[(je, 1)] = zero();
603					rhs[(je + 1, 0)] = -temp2r / temp;
604					rhs[(je + 1, 1)] = -temp2i / temp;
605				} else {
606					rhs[(je + 1, 0)] = one();
607					rhs[(je + 1, 1)] = zero();
608					let temp = acoef * A[(je, je + 1)];
609					rhs[(je, 0)] = (bcoefr * B[(je + 1, je + 1)] - acoef * A[(je + 1, je + 1)]) / temp;
610					rhs[(je, 1)] = bcoefi * B[(je + 1, je + 1)] / temp;
611				}
612				xmax = max(abs(rhs[(je, 0)]) + abs(rhs[(je, 1)]), abs(rhs[(je + 1, 0)]) + abs(rhs[(je + 1, 1)]))
613			}
614
615			let dmin = max(max(ulp * acoefa * anorm, ulp * bcoefa * bnorm), safmin);
616			let mut j = je + nw;
617			while j < n {
618				let cplx = j + 1 < n && A[(j + 1, j)] != zero();
619				let na = if cplx { 2 } else { 1 };
620
621				let xscale = recip(xmax);
622
623				let mut temp = max(max(acolnorm[j], bcolnorm[j]), acoefa * acolnorm[j] + bcoefa * bcolnorm[j]);
624
625				let b0 = copy(B[(j, j)]);
626				let b1;
627
628				if cplx {
629					temp = max(
630						max(temp, acoefa * acolnorm[j + 1] + bcoefa * bcolnorm[j + 1]),
631						max(acolnorm[j + 1], bcolnorm[j + 1]),
632					);
633					b1 = copy(B[(j + 1, j + 1)]);
634				} else {
635					b1 = zero();
636				}
637				if temp > bignum * xscale {
638					for jw in 0..nw {
639						for jr in je..j {
640							rhs[(jr, jw)] = xscale * rhs[(jr, jw)];
641						}
642					}
643					xmax = xmax * xscale;
644				}
645
646				// Compute dot products
647				//
648				//       j-1
649				// SUM = sum  conjg( a*S(k,j) - b*P(k,j) )*x(k)
650				//       k=je
651				//
652				// To reduce the op count, this is done as
653				//
654				// _        j-1                  _        j-1
655				// a*conjg( sum  S(k,j)*x(k) ) - b*conjg( sum  P(k,j)*x(k) )
656				//          k=je                          k=je
657				//
658				// which may cause underflow problems if A or B are close
659				// to underflow.  (T.g., less than SMALL.)
660
661				let (mut sums, stack) = unsafe { linalg::temp_mat_uninit::<T, _, _>(na, nw, stack) };
662				let mut sums = sums.as_mat_mut();
663				let (mut sump, stack) = unsafe { linalg::temp_mat_uninit::<T, _, _>(na, nw, stack) };
664				let mut sump = sump.as_mat_mut();
665				for jw in 0..nw {
666					for ja in 0..na {
667						sums[(ja, jw)] = zero();
668						sump[(ja, jw)] = zero();
669
670						for jr in je..j {
671							sums[(ja, jw)] = sums[(ja, jw)] + A[(jr, j + ja)] * rhs[(jr, jw)];
672							sump[(ja, jw)] = sump[(ja, jw)] + B[(jr, j + ja)] * rhs[(jr, jw)];
673						}
674					}
675				}
676
677				for ja in 0..na {
678					if cplx_eigval {
679						rhs[(j + ja, 0)] = -acoef * sums[(ja, 0)] + bcoefr * sump[(ja, 0)] - bcoefi * sump[(ja, 1)];
680						rhs[(j + ja, 1)] = -acoef * sums[(ja, 1)] + bcoefr * sump[(ja, 1)] + bcoefi * sump[(ja, 0)];
681					} else {
682						rhs[(j + ja, 0)] = -acoef * sums[(ja, 0)] + bcoefr * sump[(ja, 0)];
683					}
684				}
685				// Solve  ( a A - b B ).T  y = SUM(,)
686				// with scaling and perturbation of the denominator
687				if cplx {
688					solve_complex_shifted_2x2(
689						copy(dmin),
690						copy(acoef),
691						A.submatrix(j, j, na, na).transpose(),
692						b0,
693						b1,
694						rhs.rb_mut().subrows_mut(j, na),
695						copy(bcoefr),
696						copy(bcoefi),
697						stack,
698					);
699				} else {
700					solve_complex_shifted_1x1(
701						copy(dmin),
702						copy(acoef),
703						A.submatrix(j, j, na, na).transpose(),
704						b0,
705						rhs.rb_mut().subrows_mut(j, na),
706						copy(bcoefr),
707						copy(bcoefi),
708					);
709				}
710
711				j += na;
712			}
713
714			let (mut tmp, _) = linalg::temp_mat_zeroed::<T, _, _>(n, nw, stack);
715			let mut tmp = tmp.as_mat_mut();
716			linalg::matmul::matmul(tmp.rb_mut(), Accum::Replace, u.rb().get(.., je..), rhs.rb().get(je.., ..), one(), par);
717
718			let mut u = u.rb_mut().subcols_mut(je, nw);
719			u.copy_from(&tmp);
720			let scale = recip(u.norm_l2());
721			zip!(u).for_each(|unzip!(u)| {
722				*u = *u * scale;
723			});
724
725			je += nw;
726		}
727	}
728	// right eigenvectors
729	if let Some(mut u) = Z {
730		let mut je = n;
731		while je > 0 {
732			je -= 1;
733			let cplx_eigval = je > 0 && A[(je, je - 1)] != zero();
734			let nw = if cplx_eigval { 2 } else { 1 };
735			je -= nw - 1;
736
737			if !cplx_eigval && max(abs(A[(je, je)]), abs(B[(je, je)])) < safmin {
738				u.rb_mut().col_mut(je).fill(zero());
739				u[(je, je)] = one();
740
741				continue;
742			}
743
744			let mut acoef;
745			let mut acoefa;
746			let mut bcoefa;
747			let mut bcoefr;
748			let mut bcoefi;
749
750			let (mut rhs, stack) = linalg::temp_mat_zeroed::<T, _, _>(n, nw, stack);
751			let mut rhs = rhs.as_mat_mut();
752
753			if !cplx_eigval {
754				// real eigenvalue
755				let temp = max(max(abs(A[(je, je)]) * ascale, abs(B[(je, je)]) * bscale), safmin);
756				let salfar = (temp * A[(je, je)]) * ascale;
757				let sbeta = (temp * B[(je, je)]) * bscale;
758
759				acoef = sbeta * ascale;
760				bcoefr = salfar * bscale;
761				bcoefi = zero();
762
763				// scale to avoid underflow
764				let mut scale = one();
765				let lsa = abs(sbeta) >= safmin && abs(acoef) < small;
766				let lsb = abs(salfar) >= safmin && abs(bcoefr) < small;
767
768				if lsa {
769					scale = (small / abs(sbeta)) * min(anorm, big);
770				}
771				if lsb {
772					scale = max(scale, (small / abs(salfar)) * min(bnorm, big));
773				}
774				if lsa || lsb {
775					scale = min(scale, one() / (safmin * max(one(), max(abs(acoef), abs(bcoefr)))));
776					if lsa {
777						acoef = ascale * (scale * sbeta)
778					} else {
779						acoef = scale * acoef
780					}
781					if lsb {
782						bcoefr = bscale * (scale * salfar)
783					} else {
784						bcoefr = scale * bcoefr
785					}
786				}
787				acoefa = abs(acoef);
788				bcoefa = abs(bcoefr);
789				rhs[(je, 0)] = one();
790
791				for jr in 0..je {
792					rhs[(jr, 0)] = bcoefr * B[(jr, je)] - acoef * A[(jr, je)];
793				}
794			} else {
795				// complex eigenvalue
796				let (scale, _, wr, _, wi) = qz_real::generalized_eigval_2x2(
797					(copy(A[(je, je)]), copy(A[(je, je + 1)]), copy(A[(je + 1, je)]), copy(A[(je + 1, je + 1)])),
798					(copy(B[(je, je)]), copy(B[(je, je + 1)]), copy(B[(je + 1, je)]), copy(B[(je + 1, je + 1)])),
799				);
800
801				acoef = scale;
802				bcoefr = wr;
803				bcoefi = wi;
804
805				// scale to avoid over/underflow
806				acoefa = abs(acoef);
807				bcoefa = abs(bcoefr) + abs(bcoefi);
808				let mut scale = one();
809				if acoefa * ulp < safmin && acoefa >= safmin {
810					scale = (safmin / ulp) / acoefa
811				}
812				if bcoefa * ulp < safmin && bcoefa >= safmin {
813					scale = max(scale, (safmin / ulp) / bcoefa)
814				}
815				if safmin * acoefa > ascale {
816					scale = ascale / (safmin * acoefa)
817				}
818				if safmin * bcoefa > bscale {
819					scale = min(scale, bscale / (safmin * bcoefa))
820				}
821				if scale != one() {
822					acoef = scale * acoef;
823					acoefa = abs(acoef);
824					bcoefr = scale * bcoefr;
825					bcoefi = scale * bcoefi;
826					bcoefa = abs(bcoefr) + abs(bcoefi);
827				}
828
829				// compute first two components of eigenvector
830
831				let temp = acoef * A[(je + 1, je)];
832				let temp2r = acoef * A[(je + 1, je + 1)] - bcoefr * B[(je + 1, je + 1)];
833				let temp2i = -bcoefi * B[(je + 1, je + 1)];
834				if abs(temp) > abs(temp2r) + abs(temp2i) {
835					rhs[(je + 1, 0)] = one();
836					rhs[(je + 1, 1)] = zero();
837					rhs[(je, 0)] = -temp2r / temp;
838					rhs[(je, 1)] = -temp2i / temp;
839				} else {
840					rhs[(je, 0)] = one();
841					rhs[(je, 1)] = zero();
842					let temp = acoef * A[(je, je + 1)];
843					rhs[(je + 1, 0)] = (bcoefr * B[(je, je)] - acoef * A[(je, je)]) / temp;
844					rhs[(je + 1, 1)] = bcoefi * B[(je, je)] / temp;
845				}
846
847				let creala = acoef * rhs[(je, 0)];
848				let cimaga = acoef * rhs[(je, 1)];
849				let crealb = bcoefr * rhs[(je, 0)] - bcoefi * rhs[(je, 1)];
850				let cimagb = bcoefi * rhs[(je, 0)] + bcoefr * rhs[(je, 1)];
851				let cre2a = acoef * rhs[(je + 1, 0)];
852				let cim2a = acoef * rhs[(je + 1, 1)];
853				let cre2b = bcoefr * rhs[(je + 1, 0)] - bcoefi * rhs[(je + 1, 1)];
854				let cim2b = bcoefi * rhs[(je + 1, 0)] + bcoefr * rhs[(je + 1, 1)];
855				for jr in 0..je {
856					rhs[(jr, 0)] = -creala * A[(jr, je)] + crealb * B[(jr, je)] - cre2a * A[(jr, je + 1)] + cre2b * B[(jr, je + 1)];
857					rhs[(jr, 1)] = -cimaga * A[(jr, je)] + cimagb * B[(jr, je)] - cim2a * A[(jr, je + 1)] + cim2b * B[(jr, je + 1)];
858				}
859			}
860			let dmin = max(max(ulp * acoefa * anorm, ulp * bcoefa * bnorm), safmin);
861			let mut j = je;
862			while j > 0 {
863				j -= 1;
864				let cplx = j > 0 && A[(j, j - 1)] != zero();
865				let na = if cplx { 2 } else { 1 };
866				j -= na - 1;
867
868				let b0 = copy(B[(j, j)]);
869				let b1;
870
871				if cplx {
872					b1 = copy(B[(j + 1, j + 1)]);
873				} else {
874					b1 = zero();
875				}
876
877				if cplx {
878					solve_complex_shifted_2x2(
879						copy(dmin),
880						copy(acoef),
881						A.submatrix(j, j, na, na),
882						b0,
883						b1,
884						rhs.rb_mut().subrows_mut(j, na),
885						copy(bcoefr),
886						copy(bcoefi),
887						stack,
888					);
889				} else {
890					solve_complex_shifted_1x1(
891						copy(dmin),
892						copy(acoef),
893						A.submatrix(j, j, na, na),
894						b0,
895						rhs.rb_mut().subrows_mut(j, na),
896						copy(bcoefr),
897						copy(bcoefi),
898					);
899				}
900
901				// Compute the contributions of the off-diagonals of
902				// column j (and j+1, if 2-by-2 block) of A and B to the
903				// sums.
904
905				for ja in 0..na {
906					if cplx_eigval {
907						let creala = acoef * rhs[(j + ja, 0)];
908						let cimaga = acoef * rhs[(j + ja, 1)];
909						let crealb = bcoefr * rhs[(j + ja, 0)] - bcoefi * rhs[(j + ja, 1)];
910						let cimagb = bcoefi * rhs[(j + ja, 0)] + bcoefr * rhs[(j + ja, 1)];
911						for jr in 0..j {
912							rhs[(jr, 0)] = rhs[(jr, 0)] - creala * A[(jr, j + ja)] + crealb * B[(jr, j + ja)];
913							rhs[(jr, 1)] = rhs[(jr, 1)] - cimaga * A[(jr, j + ja)] + cimagb * B[(jr, j + ja)];
914						}
915					} else {
916						let creala = acoef * rhs[(j + ja, 0)];
917						let crealb = bcoefr * rhs[(j + ja, 0)];
918
919						for jr in 0..j {
920							rhs[(jr, 0)] = rhs[(jr, 0)] - creala * A[(jr, j + ja)] + crealb * B[(jr, j + ja)];
921						}
922					}
923				}
924			}
925			let (mut tmp, _) = linalg::temp_mat_zeroed::<T, _, _>(n, nw, stack);
926			let mut tmp = tmp.as_mat_mut();
927			linalg::matmul::matmul(
928				tmp.rb_mut(),
929				Accum::Replace,
930				u.rb().get(.., ..je + nw),
931				rhs.rb().get(..je + nw, ..),
932				one(),
933				par,
934			);
935
936			let mut u = u.rb_mut().subcols_mut(je, nw);
937			u.copy_from(&tmp);
938			let scale = recip(u.norm_l2());
939			zip!(u).for_each(|unzip!(u)| {
940				*u = *u * scale;
941			});
942		}
943	}
944}
945
946#[math]
947fn qz_to_gevd_cplx<T: ComplexField>(
948	A: MatRef<'_, T>,
949	B: MatRef<'_, T>,
950	Q: Option<MatMut<'_, T>>,
951	Z: Option<MatMut<'_, T>>,
952	par: Par,
953	stack: &mut MemStack,
954) {
955	let n = A.nrows();
956	if n == 0 {
957		return;
958	}
959
960	let one = one::<T::Real>;
961
962	let ulp = eps::<T::Real>();
963	let safmin = min_positive::<T::Real>();
964	let smallnum = safmin * from_f64(n as f64);
965	let small = smallnum * recip(ulp);
966	let bignum = recip(smallnum);
967	let big = recip(small);
968
969	let (mut acolnorm, stack) = linalg::temp_mat_zeroed::<T::Real, _, _>(n, 1, stack);
970	let acolnorm = acolnorm.as_mat_mut();
971	let (mut bcolnorm, stack) = linalg::temp_mat_zeroed::<T::Real, _, _>(n, 1, stack);
972	let bcolnorm = bcolnorm.as_mat_mut();
973
974	let mut acolnorm = acolnorm.col_mut(0);
975	let mut bcolnorm = bcolnorm.col_mut(0);
976	let mut anorm = zero::<T::Real>();
977	let mut bnorm = zero::<T::Real>();
978
979	let mut j = 0;
980	while j < n {
981		let a = A.rb().col(j).get(..j).norm_l1();
982		acolnorm[j] = copy(a);
983		anorm = max(anorm, a + abs(A[(j, j)]));
984
985		j += 1;
986	}
987	for j in 0..n {
988		let b = B.rb().col(j).get(..j).norm_l1();
989		bcolnorm[j] = copy(b);
990		bnorm = max(bnorm, b + abs(B[(j, j)]));
991	}
992	let ascale = recip(max(anorm, safmin));
993	let bscale = recip(max(bnorm, safmin));
994
995	// left eigenvectors
996	if let Some(mut u) = Q {
997		let mut je = 0usize;
998		while je < n {
999			if max(abs(A[(je, je)]), abs(B[(je, je)])) < safmin {
1000				u.rb_mut().col_mut(je).fill(zero());
1001				u[(je, je)] = from_real(one());
1002
1003				je += 1;
1004				continue;
1005			}
1006
1007			let mut acoef;
1008			let acoefa;
1009			let bcoefa;
1010			let mut bcoef;
1011			let mut xmax;
1012
1013			let (mut rhs, stack) = linalg::temp_mat_zeroed::<T, _, _>(n, 1, stack);
1014			let mut rhs = rhs.as_mat_mut().col_mut(0);
1015
1016			{
1017				// real eigenvalue
1018				let temp = max(max(abs(A[(je, je)]) * ascale, abs(B[(je, je)]) * bscale), safmin);
1019				let salfar = mul_real(mul_real(A[(je, je)], temp), ascale);
1020				let sbeta = real(B[(je, je)]) * temp * bscale;
1021
1022				acoef = sbeta * ascale;
1023				bcoef = mul_real(salfar, bscale);
1024
1025				// scale to avoid underflow
1026				let mut scale = one();
1027				let lsa = abs(sbeta) >= safmin && abs(acoef) < small;
1028				let lsb = abs(salfar) >= safmin && abs(bcoef) < small;
1029
1030				if lsa {
1031					scale = (small / abs(sbeta)) * min(anorm, big);
1032				}
1033				if lsb {
1034					scale = max(scale, (small / abs(salfar)) * min(bnorm, big));
1035				}
1036				if lsa || lsb {
1037					scale = min(scale, one() / (safmin * max(one(), max(abs(acoef), abs(bcoef)))));
1038					if lsa {
1039						acoef = sbeta * scale * ascale
1040					} else {
1041						acoef = acoef * scale
1042					}
1043					if lsb {
1044						bcoef = mul_real(mul_real(salfar, scale), bscale)
1045					} else {
1046						bcoef = mul_real(bcoef, scale)
1047					}
1048				}
1049				acoefa = abs(acoef);
1050				bcoefa = abs(bcoef);
1051				rhs[je] = from_real(one());
1052				xmax = one();
1053			}
1054
1055			let dmin = max(max(ulp * acoefa * anorm, ulp * bcoefa * bnorm), safmin);
1056			let mut j = je + 1;
1057			while j < n {
1058				let xscale = recip(xmax);
1059
1060				let temp = max(max(acolnorm[j], bcolnorm[j]), acoefa * acolnorm[j] + bcoefa * bcolnorm[j]);
1061
1062				let b0 = copy(B[(j, j)]);
1063
1064				if temp > bignum * xscale {
1065					for jr in je..j {
1066						rhs[jr] = mul_real(rhs[jr], xscale);
1067					}
1068					xmax = xmax * xscale;
1069				}
1070
1071				// Compute dot products
1072				//
1073				//       j-1
1074				// SUM = sum  conjg( a*S(k,j) - b*P(k,j) )*x(k)
1075				//       k=je
1076				//
1077				// To reduce the op count, this is done as
1078				//
1079				// _        j-1                  _        j-1
1080				// a*conjg( sum  S(k,j)*x(k) ) - b*conjg( sum  P(k,j)*x(k) )
1081				//          k=je                          k=je
1082				//
1083				// which may cause underflow problems if A or B are close
1084				// to underflow.  (T.g., less than SMALL.)
1085
1086				let mut sums = zero::<T>();
1087				let mut sump = zero::<T>();
1088
1089				for jr in je..j {
1090					sums = sums + conj(A[(jr, j)]) * rhs[jr];
1091					sump = sump + conj(B[(jr, j)]) * rhs[jr];
1092				}
1093
1094				rhs[j] = conj(bcoef) * sump - mul_real(sums, acoef);
1095				// Solve  ( a A - b B ).T  y = SUM(,)
1096				// with scaling and perturbation of the denominator
1097
1098				solve_shifted_1x1(copy(dmin), copy(acoef), conj(A[(j, j)]), conj(b0), &mut rhs[j], conj(bcoef));
1099
1100				j += 1;
1101			}
1102
1103			let (mut tmp, _) = linalg::temp_mat_zeroed::<T, _, _>(n, 1, stack);
1104			let mut tmp = tmp.as_mat_mut().col_mut(0);
1105			linalg::matmul::matmul(
1106				tmp.rb_mut(),
1107				Accum::Replace,
1108				u.rb().get(.., je..),
1109				rhs.rb().get(je..),
1110				from_real(one()),
1111				par,
1112			);
1113
1114			let mut u = u.rb_mut().col_mut(je);
1115			u.copy_from(&tmp);
1116			let scale = recip(u.norm_l2());
1117			zip!(u).for_each(|unzip!(u)| {
1118				*u = mul_real(*u, scale);
1119			});
1120
1121			je += 1;
1122		}
1123	}
1124	// right eigenvectors
1125	if let Some(mut u) = Z {
1126		let mut je = n;
1127		while je > 0 {
1128			je -= 1;
1129
1130			if max(abs(A[(je, je)]), abs(B[(je, je)])) < safmin {
1131				u.rb_mut().col_mut(je).fill(zero());
1132				u[(je, je)] = from_real(one());
1133
1134				continue;
1135			}
1136
1137			let mut acoef;
1138			let acoefa;
1139			let bcoefa;
1140			let mut bcoefr;
1141
1142			let (mut rhs, stack) = linalg::temp_mat_zeroed::<T, _, _>(n, 1, stack);
1143			let mut rhs = rhs.as_mat_mut().col_mut(0);
1144
1145			{
1146				// real eigenvalue
1147				let temp = max(max(abs(A[(je, je)]) * ascale, abs(B[(je, je)]) * bscale), safmin);
1148				let salfar = mul_real(mul_real(A[(je, je)], temp), ascale);
1149				let sbeta = real(B[(je, je)]) * temp * bscale;
1150
1151				acoef = sbeta * ascale;
1152				bcoefr = mul_real(salfar, bscale);
1153
1154				// scale to avoid underflow
1155				let mut scale = one();
1156				let lsa = abs(sbeta) >= safmin && abs(acoef) < small;
1157				let lsb = abs(salfar) >= safmin && abs(bcoefr) < small;
1158
1159				if lsa {
1160					scale = (small / abs(sbeta)) * min(anorm, big);
1161				}
1162				if lsb {
1163					scale = max(scale, (small / abs(salfar)) * min(bnorm, big));
1164				}
1165				if lsa || lsb {
1166					scale = min(scale, one() / (safmin * max(one(), max(abs(acoef), abs(bcoefr)))));
1167					if lsa {
1168						acoef = sbeta * scale * ascale
1169					} else {
1170						acoef = acoef * scale
1171					}
1172					if lsb {
1173						bcoefr = mul_real(mul_real(salfar, scale), bscale)
1174					} else {
1175						bcoefr = mul_real(bcoefr, scale)
1176					}
1177				}
1178				acoefa = abs(acoef);
1179				bcoefa = abs(bcoefr);
1180				rhs[je] = from_real(one());
1181
1182				for jr in 0..je {
1183					rhs[jr] = bcoefr * B[(jr, je)] - mul_real(A[(jr, je)], acoef);
1184				}
1185			}
1186			let dmin = max(max(ulp * acoefa * anorm, ulp * bcoefa * bnorm), safmin);
1187			let mut j = je;
1188			while j > 0 {
1189				j -= 1;
1190
1191				let b0 = copy(B[(j, j)]);
1192
1193				solve_shifted_1x1(copy(dmin), copy(acoef), copy(A[(j, j)]), b0, &mut rhs[j], copy(bcoefr));
1194
1195				// Compute the contributions of the off-diagonals of
1196				// column j (and j+1, if 2-by-2 block) of A and B to the
1197				// sums.
1198
1199				let creala = mul_real(rhs[j], acoef);
1200				let crealb = bcoefr * rhs[j];
1201
1202				for jr in 0..j {
1203					rhs[jr] = rhs[jr] - creala * A[(jr, j)] + crealb * B[(jr, j)];
1204				}
1205			}
1206			let (mut tmp, _) = linalg::temp_mat_zeroed::<T, _, _>(n, 1, stack);
1207			let mut tmp = tmp.as_mat_mut().col_mut(0);
1208			linalg::matmul::matmul(
1209				tmp.rb_mut(),
1210				Accum::Replace,
1211				u.rb().get(.., ..je + 1),
1212				rhs.rb().get(..je + 1),
1213				from_real(one()),
1214				par,
1215			);
1216
1217			let mut u = u.rb_mut().col_mut(je);
1218			u.copy_from(&tmp);
1219			let scale = recip(u.norm_l2());
1220			zip!(u).for_each(|unzip!(u)| {
1221				*u = mul_real(*u, scale);
1222			});
1223		}
1224	}
1225}
1226
1227/// computes the layout of the workspace required to compute a matrix pair's
1228/// generalized eigendecomposition
1229pub fn gevd_scratch<T: ComplexField>(
1230	dim: usize,
1231	left: ComputeEigenvectors,
1232	right: ComputeEigenvectors,
1233	par: Par,
1234	params: Spec<GevdParams, T>,
1235) -> StackReq {
1236	let _ = (left, right);
1237
1238	let n = dim;
1239	let block_size = linalg::qr::no_pivoting::factor::recommended_block_size::<T>(n, n);
1240
1241	StackReq::any_of(&[
1242		linalg::temp_mat_scratch::<T>(block_size, n).and(
1243			linalg::qr::no_pivoting::factor::qr_in_place_scratch::<T>(n, n, block_size, par, default())
1244				.or(linalg::householder::apply_block_householder_sequence_transpose_on_the_left_in_place_scratch::<T>(n, block_size, n)),
1245		),
1246		gen_hessenberg::generalized_hessenberg_scratch::<T>(n, auto!(T)),
1247		if const { T::IS_REAL } {
1248			qz_real::hessenberg_to_qz_scratch::<T::Real>(n, par, params.schur)
1249		} else {
1250			qz_cplx::hessenberg_to_qz_scratch::<T>(n, par, params.schur)
1251		},
1252	])
1253}
1254
1255/// computes the real matrix pair $(A, B)$'s eigendecomposition
1256///
1257/// the eigenvalues are stored in $S$, the left eigenvectors in $U_L$, and the right eigenvectors in
1258/// $U_R$
1259///
1260/// the values in $A$ and $B$ after this function is called are unspecified.
1261#[track_caller]
1262pub fn gevd_real<T: RealField>(
1263	A: MatMut<'_, T>,
1264	B: MatMut<'_, T>,
1265	S_re: DiagMut<'_, T>,
1266	S_im: DiagMut<'_, T>,
1267	beta: DiagMut<'_, T>,
1268	U_left: Option<MatMut<'_, T>>,
1269	U_right: Option<MatMut<'_, T>>,
1270	par: Par,
1271	stack: &mut MemStack,
1272	params: Spec<GevdParams, T>,
1273) -> Result<(), GevdError> {
1274	compute_gevd_generic(
1275		A,
1276		B,
1277		S_re.column_vector_mut(),
1278		S_im.column_vector_mut(),
1279		beta.column_vector_mut(),
1280		U_left,
1281		U_right,
1282		par,
1283		stack,
1284		params.config,
1285		qz_real::hessenberg_to_qz,
1286		qz_to_gevd_real,
1287	)
1288}
1289
1290/// computes the complex matrix pair $(A, B)$'s eigendecomposition
1291///
1292/// the eigenvalues are stored in $S$, the left eigenvectors in $U_L$, and the right eigenvectors in
1293/// $U_R$
1294///
1295/// the values in $A$ and $B$ after this function is called are unspecified.
1296#[track_caller]
1297pub fn gevd_cplx<T: ComplexField>(
1298	A: MatMut<'_, T>,
1299	B: MatMut<'_, T>,
1300	S: DiagMut<'_, T>,
1301	beta: DiagMut<'_, T>,
1302	U_left: Option<MatMut<'_, T>>,
1303	U_right: Option<MatMut<'_, T>>,
1304	par: Par,
1305	stack: &mut MemStack,
1306	params: Spec<GevdParams, T>,
1307) -> Result<(), GevdError> {
1308	compute_gevd_generic(
1309		A,
1310		B,
1311		S.column_vector_mut(),
1312		ColMut::from_slice_mut(&mut []),
1313		beta.column_vector_mut(),
1314		U_left,
1315		U_right,
1316		par,
1317		stack,
1318		params.config,
1319		|A: MatMut<'_, T>,
1320		 B: MatMut<'_, T>,
1321		 Q: Option<MatMut<'_, T>>,
1322		 Z: Option<MatMut<'_, T>>,
1323		 alphar: ColMut<'_, T>,
1324		 _: ColMut<'_, T>,
1325		 beta: ColMut<'_, T>,
1326		 eigenvectors: ComputeEigenvectors,
1327		 par: Par,
1328		 params: GeneralizedSchurParams,
1329		 stack: &mut MemStack| qz_cplx::hessenberg_to_qz(A, B, Q, Z, alphar, beta, eigenvectors, par, params, stack),
1330		qz_to_gevd_cplx,
1331	)
1332}
1333
1334#[cfg(test)]
1335mod tests {
1336	use super::*;
1337	use crate::stats::prelude::*;
1338	use crate::utils;
1339	use dyn_stack::MemBuffer;
1340	use equator::assert;
1341
1342	#[test]
1343	fn test_lishen_() {
1344		let approx_eq = utils::approx::CwiseMat(utils::approx::ApproxEq::<f64>::eps() * 128.0);
1345
1346		let A = &[
1347			[
1348				0.0000000067116330731078674,
1349				-0.0000000024189842394753674,
1350				0.00000000032505193899740187,
1351				0.0,
1352				0.0,
1353				0.0,
1354			],
1355			[
1356				-0.0000000024189842394753674,
1357				0.000000011866543705551487,
1358				-0.00000001582083742901503,
1359				0.0,
1360				0.0,
1361				0.0,
1362			],
1363			[
1364				0.00000000032505193899740187,
1365				-0.00000001582083742901503,
1366				0.00000002523087227632826,
1367				0.0,
1368				0.0,
1369				0.0,
1370			],
1371			[0.0, 0.0, 0.0, 1.0, 0.0, 0.0],
1372			[0.0, 0.0, 0.0, 0.0, 1.0, 0.0],
1373			[0.0, 0.0, 0.0, 0.0, 0.0, 1.0f64],
1374		];
1375
1376		let B = &[
1377			[
1378				0.00016038376001067668,
1379				-0.00003674235041182116,
1380				-0.0000029397921270058582,
1381				-1.0,
1382				-0.0,
1383				-0.0,
1384			],
1385			[-0.00003674235041182116, 0.0002038454538304322, -0.00021213416901900504, -0.0, -1.0, -0.0],
1386			[
1387				-0.0000029397921270058582,
1388				-0.00021213416901900504,
1389				0.0003954706173886255,
1390				-0.0,
1391				-0.0,
1392				-1.0,
1393			],
1394			[1.0, 0.0, 0.0, 0.0, 0.0, 0.0],
1395			[0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
1396			[0.0, 0.0, 1.0, 0.0, 0.0, 0.0f64],
1397		];
1398
1399		let A = MatRef::from_row_major_array(A);
1400		let B = MatRef::from_row_major_array(B);
1401
1402		let n = A.nrows();
1403
1404		{
1405			let mut H = A.to_owned();
1406			let mut T = B.to_owned();
1407			let mut alpha_re = Diag::<f64>::zeros(n);
1408			let mut alpha_im = Diag::<f64>::zeros(n);
1409			let mut beta = Diag::<f64>::zeros(n);
1410
1411			let mut UL = Mat::<f64>::identity(n, n);
1412			let mut UR = Mat::<f64>::identity(n, n);
1413
1414			gevd_real(
1415				H.as_mut(),
1416				T.as_mut(),
1417				alpha_re.rb_mut(),
1418				alpha_im.rb_mut(),
1419				beta.rb_mut(),
1420				Some(UL.as_mut()),
1421				Some(UR.as_mut()),
1422				Par::Seq,
1423				MemStack::new(&mut MemBuffer::new(gevd_scratch::<f64>(
1424					n,
1425					ComputeEigenvectors::Yes,
1426					ComputeEigenvectors::Yes,
1427					Par::Seq,
1428					default(),
1429				))),
1430				default(),
1431			)
1432			.unwrap();
1433
1434			{
1435				let mut i = 0;
1436				while i < n {
1437					if alpha_im[i] == 0.0 {
1438						let u = UR.col(i);
1439						let a = alpha_re[i];
1440						let b = beta[i];
1441
1442						assert!((b / a) * &A * u ~ B * u);
1443
1444						i += 1;
1445					} else {
1446						let ur = UR.col(i);
1447						let ui = UR.col(i + 1);
1448						let ar = alpha_re[i];
1449						let ai = alpha_im[i];
1450						let b = beta[i];
1451
1452						assert!(b * &A * ur ~ B * (ar * ur - ai * ui) );
1453						assert!(b * &A * ui ~ B * (ar * ui + ai * ur) );
1454
1455						i += 2;
1456					}
1457				}
1458			}
1459
1460			{
1461				let mut i = 0;
1462				while i < n {
1463					if alpha_im[i] == 0.0 {
1464						let u = UL.col(i);
1465						let a = alpha_re[i];
1466						let b = beta[i];
1467
1468						assert!(b / a * u.adjoint() * &A ~ u.adjoint() * B);
1469
1470						i += 1;
1471					} else {
1472						let ur = UL.col(i);
1473						let ui = UL.col(i + 1);
1474						let ar = alpha_re[i];
1475						let ai = alpha_im[i];
1476						let b = beta[i];
1477
1478						assert!(b * ur.adjoint() * &A ~ (ar * ur - ai * ui).adjoint() * B);
1479						assert!(b * ui.adjoint() * &A ~ (ar * ui + ai * ur).adjoint() * B);
1480
1481						i += 2;
1482					}
1483				}
1484			}
1485		}
1486	}
1487
1488	#[test]
1489	fn test_cplx() {
1490		let rng = &mut StdRng::seed_from_u64(1);
1491		let approx_eq = utils::approx::CwiseMat(utils::approx::ApproxEq::<f64>::eps() * 128.0);
1492		let n = 20;
1493
1494		let A = &CwiseMatDistribution {
1495			nrows: n,
1496			ncols: n,
1497			dist: ComplexDistribution::new(StandardNormal, StandardNormal),
1498		}
1499		.rand::<Mat<c64>>(rng);
1500
1501		let B = &CwiseMatDistribution {
1502			nrows: n,
1503			ncols: n,
1504			dist: ComplexDistribution::new(StandardNormal, StandardNormal),
1505		}
1506		.rand::<Mat<c64>>(rng);
1507
1508		{
1509			let mut H = A.to_owned();
1510			let mut T = B.to_owned();
1511			let mut alpha_re = Diag::<c64>::zeros(n);
1512			let mut beta = Diag::<c64>::zeros(n);
1513
1514			let mut UL = Mat::<c64>::identity(n, n);
1515			let mut UR = Mat::<c64>::identity(n, n);
1516
1517			gevd_cplx(
1518				H.as_mut(),
1519				T.as_mut(),
1520				alpha_re.rb_mut(),
1521				beta.rb_mut(),
1522				Some(UL.as_mut()),
1523				Some(UR.as_mut()),
1524				Par::Seq,
1525				MemStack::new(&mut MemBuffer::new(gevd_scratch::<c64>(
1526					n,
1527					ComputeEigenvectors::Yes,
1528					ComputeEigenvectors::Yes,
1529					Par::Seq,
1530					default(),
1531				))),
1532				default(),
1533			)
1534			.unwrap();
1535
1536			{
1537				let mut i = 0;
1538				while i < n {
1539					let u = UR.col(i);
1540					let a = alpha_re[i];
1541					let b = beta[i];
1542
1543					assert!(Scale(b / a) * A * u ~ B * u);
1544
1545					i += 1;
1546				}
1547			}
1548
1549			{
1550				let mut i = 0;
1551				while i < n {
1552					let u = UL.col(i);
1553					let a = alpha_re[i];
1554					let b = beta[i];
1555
1556					assert!(Scale(b / a) * u.adjoint() * A ~ u.adjoint() * B);
1557
1558					i += 1;
1559				}
1560			}
1561		}
1562	}
1563}