1use core::iter::zip;
2use faer_core::{
3 assert, group_helpers::SliceGroup, permutation::Index, sparse::SparseColMatRef, Conj, MatMut,
4 Parallelism,
5};
6use faer_entity::ComplexField;
7
8pub fn solve_lower_triangular_in_place<I: Index, E: ComplexField>(
9 l: SparseColMatRef<'_, I, E>,
10 conj: Conj,
11 rhs: MatMut<'_, E>,
12 parallelism: Parallelism,
13) {
14 let _ = parallelism;
15 assert!(l.nrows() == l.ncols());
16 assert!(rhs.nrows() == l.nrows());
17
18 let slice_group = SliceGroup::<'_, E>::new;
19
20 faer_core::constrained::Size::with2(
21 rhs.nrows(),
22 rhs.ncols(),
23 #[inline(always)]
24 |N, K| {
25 let mut x = faer_core::constrained::MatMut::new(rhs, N, K);
26 let l = faer_core::constrained::sparse::SparseColMatRef::new(l, N, N);
27
28 let mut k = 0usize;
29 while k < *K {
30 let bs = Ord::min(*K - k, 4);
31 match bs {
32 1 => {
33 let k0 = K.check(k);
34
35 for j in N.indices() {
36 let d = slice_group(l.values_of_col(j)).read(0).faer_inv();
37 let d = if conj == Conj::Yes { d.faer_conj() } else { d };
38
39 let xj0 = x.read(j, k0).faer_mul(d);
40 x.write(j, k0, xj0);
41
42 for (i, lij) in zip(
43 l.row_indices_of_col(j),
44 slice_group(l.values_of_col(j)).into_ref_iter(),
45 )
46 .skip(1)
47 {
48 let lij = lij.read();
49 let lij = if conj == Conj::Yes {
50 lij.faer_conj()
51 } else {
52 lij
53 };
54 x.write(i, k0, x.read(i, k0).faer_sub(lij.faer_mul(xj0)));
55 }
56 }
57 }
58 2 => {
59 let k0 = K.check(k);
60 let k1 = K.check(k + 1);
61 for j in N.indices() {
62 let d = slice_group(l.values_of_col(j)).read(0).faer_inv();
63 let d = if conj == Conj::Yes { d.faer_conj() } else { d };
64
65 let xj0 = x.read(j, k0).faer_mul(d);
66 x.write(j, k0, xj0);
67 let xj1 = x.read(j, k1).faer_mul(d);
68 x.write(j, k1, xj1);
69
70 for (i, lij) in zip(
71 l.row_indices_of_col(j),
72 slice_group(l.values_of_col(j)).into_ref_iter(),
73 )
74 .skip(1)
75 {
76 let lij = lij.read();
77 let lij = if conj == Conj::Yes {
78 lij.faer_conj()
79 } else {
80 lij
81 };
82 x.write(i, k0, x.read(i, k0).faer_sub(lij.faer_mul(xj0)));
83 x.write(i, k1, x.read(i, k1).faer_sub(lij.faer_mul(xj1)));
84 }
85 }
86 }
87 3 => {
88 let k0 = K.check(k);
89 let k1 = K.check(k + 1);
90 let k2 = K.check(k + 2);
91 for j in N.indices() {
92 let d = slice_group(l.values_of_col(j)).read(0).faer_inv();
93 let d = if conj == Conj::Yes { d.faer_conj() } else { d };
94
95 let xj0 = x.read(j, k0).faer_mul(d);
96 x.write(j, k0, xj0);
97 let xj1 = x.read(j, k1).faer_mul(d);
98 x.write(j, k1, xj1);
99 let xj2 = x.read(j, k2).faer_mul(d);
100 x.write(j, k2, xj2);
101
102 for (i, lij) in zip(
103 l.row_indices_of_col(j),
104 slice_group(l.values_of_col(j)).into_ref_iter(),
105 )
106 .skip(1)
107 {
108 let lij = lij.read();
109 let lij = if conj == Conj::Yes {
110 lij.faer_conj()
111 } else {
112 lij
113 };
114 x.write(i, k0, x.read(i, k0).faer_sub(lij.faer_mul(xj0)));
115 x.write(i, k1, x.read(i, k1).faer_sub(lij.faer_mul(xj1)));
116 x.write(i, k2, x.read(i, k2).faer_sub(lij.faer_mul(xj2)));
117 }
118 }
119 }
120 4 => {
121 let k0 = K.check(k);
122 let k1 = K.check(k + 1);
123 let k2 = K.check(k + 2);
124 let k3 = K.check(k + 3);
125 for j in N.indices() {
126 let d = slice_group(l.values_of_col(j)).read(0).faer_inv();
127 let d = if conj == Conj::Yes { d.faer_conj() } else { d };
128
129 let xj0 = x.read(j, k0).faer_mul(d);
130 x.write(j, k0, xj0);
131 let xj1 = x.read(j, k1).faer_mul(d);
132 x.write(j, k1, xj1);
133 let xj2 = x.read(j, k2).faer_mul(d);
134 x.write(j, k2, xj2);
135 let xj3 = x.read(j, k3).faer_mul(d);
136 x.write(j, k3, xj3);
137
138 for (i, lij) in zip(
139 l.row_indices_of_col(j),
140 slice_group(l.values_of_col(j)).into_ref_iter(),
141 )
142 .skip(1)
143 {
144 let lij = lij.read();
145 let lij = if conj == Conj::Yes {
146 lij.faer_conj()
147 } else {
148 lij
149 };
150 x.write(i, k0, x.read(i, k0).faer_sub(lij.faer_mul(xj0)));
151 x.write(i, k1, x.read(i, k1).faer_sub(lij.faer_mul(xj1)));
152 x.write(i, k2, x.read(i, k2).faer_sub(lij.faer_mul(xj2)));
153 x.write(i, k3, x.read(i, k3).faer_sub(lij.faer_mul(xj3)));
154 }
155 }
156 }
157 _ => unreachable!(),
158 }
159 k += bs;
160 }
161 },
162 );
163}
164
165pub fn solve_lower_triangular_transpose_in_place<I: Index, E: ComplexField>(
166 l: SparseColMatRef<'_, I, E>,
167 conj: Conj,
168 rhs: MatMut<'_, E>,
169 parallelism: Parallelism,
170) {
171 let _ = parallelism;
172 assert!(l.nrows() == l.ncols());
173 assert!(rhs.nrows() == l.nrows());
174
175 let slice_group = SliceGroup::<'_, E>::new;
176
177 faer_core::constrained::Size::with2(
178 rhs.nrows(),
179 rhs.ncols(),
180 #[inline(always)]
181 |N, K| {
182 let mut x = faer_core::constrained::MatMut::new(rhs, N, K);
183 let l = faer_core::constrained::sparse::SparseColMatRef::new(l, N, N);
184
185 let mut k = 0usize;
186 while k < *K {
187 let bs = Ord::min(*K - k, 4);
188 match bs {
189 1 => {
190 let k0 = K.check(k);
191
192 for j in N.indices().rev() {
193 let mut acc0a = E::faer_zero();
194 let mut acc0b = E::faer_zero();
195 let mut acc0c = E::faer_zero();
196 let mut acc0d = E::faer_zero();
197
198 let a = 0;
199 let b = 1;
200 let c = 2;
201 let d = 3;
202
203 let nrows = l.row_indices_of_col_raw(j).len();
204 let rows_head = l.row_indices_of_col_raw(j)[1..].chunks_exact(4);
205 let rows_tail = rows_head.remainder();
206 let (values_head, values_tail) = slice_group(l.values_of_col(j))
207 .subslice(1..nrows)
208 .into_chunks_exact(4);
209
210 for (i, lij) in zip(rows_head, values_head) {
211 let lija = lij.read(a);
212 let lijb = lij.read(b);
213 let lijc = lij.read(c);
214 let lijd = lij.read(d);
215 let lija = if conj == Conj::Yes {
216 lija.faer_conj()
217 } else {
218 lija
219 };
220 let lijb = if conj == Conj::Yes {
221 lijb.faer_conj()
222 } else {
223 lijb
224 };
225 let lijc = if conj == Conj::Yes {
226 lijc.faer_conj()
227 } else {
228 lijc
229 };
230 let lijd = if conj == Conj::Yes {
231 lijd.faer_conj()
232 } else {
233 lijd
234 };
235 acc0a = acc0a.faer_add(lija.faer_mul(x.read(i[a].zx(), k0)));
236 acc0b = acc0b.faer_add(lijb.faer_mul(x.read(i[b].zx(), k0)));
237 acc0c = acc0c.faer_add(lijc.faer_mul(x.read(i[c].zx(), k0)));
238 acc0d = acc0d.faer_add(lijd.faer_mul(x.read(i[d].zx(), k0)));
239 }
240
241 for (i, lij) in zip(rows_tail, values_tail.into_ref_iter()) {
242 let lija = lij.read();
243 let lija = if conj == Conj::Yes {
244 lija.faer_conj()
245 } else {
246 lija
247 };
248 acc0a = acc0a.faer_add(lija.faer_mul(x.read(i.zx(), k0)));
249 }
250
251 let d = slice_group(l.values_of_col(j)).read(0).faer_inv();
252 let d = if conj == Conj::Yes { d.faer_conj() } else { d };
253 x.write(
254 j,
255 k0,
256 x.read(j, k0)
257 .faer_sub(acc0a.faer_add(acc0b).faer_add(acc0c.faer_add(acc0d)))
258 .faer_mul(d),
259 );
260 }
261 }
262 2 => {
263 let k0 = K.check(k);
264 let k1 = K.check(k + 1);
265
266 for j in N.indices().rev() {
267 let mut acc0a = E::faer_zero();
268 let mut acc0b = E::faer_zero();
269 let mut acc1a = E::faer_zero();
270 let mut acc1b = E::faer_zero();
271
272 let a = 0;
273 let b = 1;
274
275 let nrows = l.row_indices_of_col_raw(j).len();
276 let rows_head = l.row_indices_of_col_raw(j)[1..].chunks_exact(2);
277 let rows_tail = rows_head.remainder();
278 let (values_head, values_tail) = slice_group(l.values_of_col(j))
279 .subslice(1..nrows)
280 .into_chunks_exact(2);
281
282 for (i, lij) in zip(rows_head, values_head) {
283 let lija = lij.read(a);
284 let lijb = lij.read(b);
285 let lija = if conj == Conj::Yes {
286 lija.faer_conj()
287 } else {
288 lija
289 };
290 let lijb = if conj == Conj::Yes {
291 lijb.faer_conj()
292 } else {
293 lijb
294 };
295 acc0a = acc0a.faer_add(lija.faer_mul(x.read(i[a].zx(), k0)));
296 acc0b = acc0b.faer_add(lijb.faer_mul(x.read(i[b].zx(), k0)));
297 acc1a = acc1a.faer_add(lija.faer_mul(x.read(i[a].zx(), k1)));
298 acc1b = acc1b.faer_add(lijb.faer_mul(x.read(i[b].zx(), k1)));
299 }
300
301 for (i, lij) in zip(rows_tail, values_tail.into_ref_iter()) {
302 let lija = lij.read();
303 let lija = if conj == Conj::Yes {
304 lija.faer_conj()
305 } else {
306 lija
307 };
308 acc0a = acc0a.faer_add(lija.faer_mul(x.read(i.zx(), k0)));
309 acc1a = acc1a.faer_add(lija.faer_mul(x.read(i.zx(), k1)));
310 }
311
312 let d = slice_group(l.values_of_col(j)).read(0).faer_inv();
313 let d = if conj == Conj::Yes { d.faer_conj() } else { d };
314 x.write(
315 j,
316 k0,
317 x.read(j, k0).faer_sub(acc0a.faer_add(acc0b)).faer_mul(d),
318 );
319 x.write(
320 j,
321 k1,
322 x.read(j, k1).faer_sub(acc1a.faer_add(acc1b)).faer_mul(d),
323 );
324 }
325 }
326 3 => {
327 let k0 = K.check(k);
328 let k1 = K.check(k + 1);
329 let k2 = K.check(k + 2);
330
331 for j in N.indices().rev() {
332 let mut acc0a = E::faer_zero();
333 let mut acc1a = E::faer_zero();
334 let mut acc2a = E::faer_zero();
335
336 for (i, lij) in zip(
337 l.row_indices_of_col(j),
338 slice_group(l.values_of_col(j)).into_ref_iter(),
339 )
340 .skip(1)
341 {
342 let lij = lij.read();
343 let lij = if conj == Conj::Yes {
344 lij.faer_conj()
345 } else {
346 lij
347 };
348 acc0a = acc0a.faer_add(lij.faer_mul(x.read(i, k0)));
349 acc1a = acc1a.faer_add(lij.faer_mul(x.read(i, k1)));
350 acc2a = acc2a.faer_add(lij.faer_mul(x.read(i, k2)));
351 }
352
353 let d = slice_group(l.values_of_col(j)).read(0).faer_inv();
354 let d = if conj == Conj::Yes { d.faer_conj() } else { d };
355 x.write(j, k0, x.read(j, k0).faer_sub(acc0a).faer_mul(d));
356 x.write(j, k1, x.read(j, k1).faer_sub(acc1a).faer_mul(d));
357 x.write(j, k2, x.read(j, k2).faer_sub(acc2a).faer_mul(d));
358 }
359 }
360 4 => {
361 let k0 = K.check(k);
362 let k1 = K.check(k + 1);
363 let k2 = K.check(k + 2);
364 let k3 = K.check(k + 3);
365
366 for j in N.indices().rev() {
367 let mut acc0a = E::faer_zero();
368 let mut acc1a = E::faer_zero();
369 let mut acc2a = E::faer_zero();
370 let mut acc3a = E::faer_zero();
371
372 for (i, lij) in zip(
373 l.row_indices_of_col(j),
374 slice_group(l.values_of_col(j)).into_ref_iter(),
375 )
376 .skip(1)
377 {
378 let lij = lij.read();
379 let lij = if conj == Conj::Yes {
380 lij.faer_conj()
381 } else {
382 lij
383 };
384 acc0a = acc0a.faer_add(lij.faer_mul(x.read(i, k0)));
385 acc1a = acc1a.faer_add(lij.faer_mul(x.read(i, k1)));
386 acc2a = acc2a.faer_add(lij.faer_mul(x.read(i, k2)));
387 acc3a = acc3a.faer_add(lij.faer_mul(x.read(i, k3)));
388 }
389
390 let d = slice_group(l.values_of_col(j)).read(0).faer_inv();
391 let d = if conj == Conj::Yes { d.faer_conj() } else { d };
392 x.write(j, k0, x.read(j, k0).faer_sub(acc0a).faer_mul(d));
393 x.write(j, k1, x.read(j, k1).faer_sub(acc1a).faer_mul(d));
394 x.write(j, k2, x.read(j, k2).faer_sub(acc2a).faer_mul(d));
395 x.write(j, k3, x.read(j, k3).faer_sub(acc3a).faer_mul(d));
396 }
397 }
398 _ => unreachable!(),
399 }
400 k += bs;
401 }
402 },
403 );
404}
405
406pub fn solve_unit_lower_triangular_in_place<I: Index, E: ComplexField>(
407 l: SparseColMatRef<'_, I, E>,
408 conj: Conj,
409 rhs: MatMut<'_, E>,
410 parallelism: Parallelism,
411) {
412 let _ = parallelism;
413 assert!(l.nrows() == l.ncols());
414 assert!(rhs.nrows() == l.nrows());
415
416 let slice_group = SliceGroup::<'_, E>::new;
417
418 faer_core::constrained::Size::with2(
419 rhs.nrows(),
420 rhs.ncols(),
421 #[inline(always)]
422 |N, K| {
423 let mut x = faer_core::constrained::MatMut::new(rhs, N, K);
424 let l = faer_core::constrained::sparse::SparseColMatRef::new(l, N, N);
425
426 let mut k = 0usize;
427 while k < *K {
428 let bs = Ord::min(*K - k, 4);
429 match bs {
430 1 => {
431 let k0 = K.check(k);
432
433 for j in N.indices() {
434 let xj0 = x.read(j, k0);
435
436 for (i, lij) in zip(
437 l.row_indices_of_col(j),
438 slice_group(l.values_of_col(j)).into_ref_iter(),
439 )
440 .skip(1)
441 {
442 let lij = lij.read();
443 let lij = if conj == Conj::Yes {
444 lij.faer_conj()
445 } else {
446 lij
447 };
448 x.write(i, k0, x.read(i, k0).faer_sub(lij.faer_mul(xj0)));
449 }
450 }
451 }
452 2 => {
453 let k0 = K.check(k);
454 let k1 = K.check(k + 1);
455 for j in N.indices() {
456 let xj0 = x.read(j, k0);
457 let xj1 = x.read(j, k1);
458
459 for (i, lij) in zip(
460 l.row_indices_of_col(j),
461 slice_group(l.values_of_col(j)).into_ref_iter(),
462 )
463 .skip(1)
464 {
465 let lij = lij.read();
466 let lij = if conj == Conj::Yes {
467 lij.faer_conj()
468 } else {
469 lij
470 };
471 x.write(i, k0, x.read(i, k0).faer_sub(lij.faer_mul(xj0)));
472 x.write(i, k1, x.read(i, k1).faer_sub(lij.faer_mul(xj1)));
473 }
474 }
475 }
476 3 => {
477 let k0 = K.check(k);
478 let k1 = K.check(k + 1);
479 let k2 = K.check(k + 2);
480 for j in N.indices() {
481 let xj0 = x.read(j, k0);
482 let xj1 = x.read(j, k1);
483 let xj2 = x.read(j, k2);
484
485 for (i, lij) in zip(
486 l.row_indices_of_col(j),
487 slice_group(l.values_of_col(j)).into_ref_iter(),
488 )
489 .skip(1)
490 {
491 let lij = lij.read();
492 let lij = if conj == Conj::Yes {
493 lij.faer_conj()
494 } else {
495 lij
496 };
497 x.write(i, k0, x.read(i, k0).faer_sub(lij.faer_mul(xj0)));
498 x.write(i, k1, x.read(i, k1).faer_sub(lij.faer_mul(xj1)));
499 x.write(i, k2, x.read(i, k2).faer_sub(lij.faer_mul(xj2)));
500 }
501 }
502 }
503 4 => {
504 let k0 = K.check(k);
505 let k1 = K.check(k + 1);
506 let k2 = K.check(k + 2);
507 let k3 = K.check(k + 3);
508 for j in N.indices() {
509 let xj0 = x.read(j, k0);
510 let xj1 = x.read(j, k1);
511 let xj2 = x.read(j, k2);
512 let xj3 = x.read(j, k3);
513
514 for (i, lij) in zip(
515 l.row_indices_of_col(j),
516 slice_group(l.values_of_col(j)).into_ref_iter(),
517 )
518 .skip(1)
519 {
520 let lij = lij.read();
521 let lij = if conj == Conj::Yes {
522 lij.faer_conj()
523 } else {
524 lij
525 };
526 x.write(i, k0, x.read(i, k0).faer_sub(lij.faer_mul(xj0)));
527 x.write(i, k1, x.read(i, k1).faer_sub(lij.faer_mul(xj1)));
528 x.write(i, k2, x.read(i, k2).faer_sub(lij.faer_mul(xj2)));
529 x.write(i, k3, x.read(i, k3).faer_sub(lij.faer_mul(xj3)));
530 }
531 }
532 }
533 _ => unreachable!(),
534 }
535
536 k += bs;
537 }
538 },
539 );
540}
541
542pub fn solve_unit_lower_triangular_transpose_in_place<I: Index, E: ComplexField>(
543 l: SparseColMatRef<'_, I, E>,
544 conj: Conj,
545 rhs: MatMut<'_, E>,
546 parallelism: Parallelism,
547) {
548 let _ = parallelism;
549 assert!(l.nrows() == l.ncols());
550 assert!(rhs.nrows() == l.nrows());
551
552 let slice_group = SliceGroup::<'_, E>::new;
553
554 faer_core::constrained::Size::with2(
555 rhs.nrows(),
556 rhs.ncols(),
557 #[inline(always)]
558 |N, K| {
559 let mut x = faer_core::constrained::MatMut::new(rhs, N, K);
560 let l = faer_core::constrained::sparse::SparseColMatRef::new(l, N, N);
561
562 let mut k = 0usize;
563 while k < *K {
564 let bs = Ord::min(*K - k, 4);
565 match bs {
566 1 => {
567 let k0 = K.check(k);
568
569 for j in N.indices().rev() {
570 let mut acc0a = E::faer_zero();
571 let mut acc0b = E::faer_zero();
572 let mut acc0c = E::faer_zero();
573 let mut acc0d = E::faer_zero();
574
575 let a = 0;
576 let b = 1;
577 let c = 2;
578 let d = 3;
579
580 let nrows = l.row_indices_of_col_raw(j).len();
581 let rows_head = l.row_indices_of_col_raw(j)[1..].chunks_exact(4);
582 let rows_tail = rows_head.remainder();
583 let (values_head, values_tail) = slice_group(l.values_of_col(j))
584 .subslice(1..nrows)
585 .into_chunks_exact(4);
586
587 for (i, lij) in zip(rows_head, values_head) {
588 let lija = lij.read(a);
589 let lijb = lij.read(b);
590 let lijc = lij.read(c);
591 let lijd = lij.read(d);
592 let lija = if conj == Conj::Yes {
593 lija.faer_conj()
594 } else {
595 lija
596 };
597 let lijb = if conj == Conj::Yes {
598 lijb.faer_conj()
599 } else {
600 lijb
601 };
602 let lijc = if conj == Conj::Yes {
603 lijc.faer_conj()
604 } else {
605 lijc
606 };
607 let lijd = if conj == Conj::Yes {
608 lijd.faer_conj()
609 } else {
610 lijd
611 };
612 acc0a = acc0a.faer_add(lija.faer_mul(x.read(i[a].zx(), k0)));
613 acc0b = acc0b.faer_add(lijb.faer_mul(x.read(i[b].zx(), k0)));
614 acc0c = acc0c.faer_add(lijc.faer_mul(x.read(i[c].zx(), k0)));
615 acc0d = acc0d.faer_add(lijd.faer_mul(x.read(i[d].zx(), k0)));
616 }
617
618 for (i, lij) in zip(rows_tail, values_tail.into_ref_iter()) {
619 let lija = lij.read();
620 let lija = if conj == Conj::Yes {
621 lija.faer_conj()
622 } else {
623 lija
624 };
625 acc0a = acc0a.faer_add(lija.faer_mul(x.read(i.zx(), k0)));
626 }
627
628 x.write(
629 j,
630 k0,
631 x.read(j, k0).faer_sub(
632 acc0a.faer_add(acc0b).faer_add(acc0c.faer_add(acc0d)),
633 ),
634 );
635 }
636 }
637 2 => {
638 let k0 = K.check(k);
639 let k1 = K.check(k + 1);
640
641 for j in N.indices().rev() {
642 let mut acc0a = E::faer_zero();
643 let mut acc0b = E::faer_zero();
644 let mut acc1a = E::faer_zero();
645 let mut acc1b = E::faer_zero();
646
647 let a = 0;
648 let b = 1;
649
650 let nrows = l.row_indices_of_col_raw(j).len();
651 let rows_head = l.row_indices_of_col_raw(j)[1..].chunks_exact(2);
652 let rows_tail = rows_head.remainder();
653 let (values_head, values_tail) = slice_group(l.values_of_col(j))
654 .subslice(1..nrows)
655 .into_chunks_exact(2);
656
657 for (i, lij) in zip(rows_head, values_head) {
658 let lija = lij.read(a);
659 let lijb = lij.read(b);
660 let lija = if conj == Conj::Yes {
661 lija.faer_conj()
662 } else {
663 lija
664 };
665 let lijb = if conj == Conj::Yes {
666 lijb.faer_conj()
667 } else {
668 lijb
669 };
670 acc0a = acc0a.faer_add(lija.faer_mul(x.read(i[a].zx(), k0)));
671 acc0b = acc0b.faer_add(lijb.faer_mul(x.read(i[b].zx(), k0)));
672 acc1a = acc1a.faer_add(lija.faer_mul(x.read(i[a].zx(), k1)));
673 acc1b = acc1b.faer_add(lijb.faer_mul(x.read(i[b].zx(), k1)));
674 }
675
676 for (i, lij) in zip(rows_tail, values_tail.into_ref_iter()) {
677 let lija = lij.read();
678 let lija = if conj == Conj::Yes {
679 lija.faer_conj()
680 } else {
681 lija
682 };
683 acc0a = acc0a.faer_add(lija.faer_mul(x.read(i.zx(), k0)));
684 acc1a = acc1a.faer_add(lija.faer_mul(x.read(i.zx(), k1)));
685 }
686
687 x.write(j, k0, x.read(j, k0).faer_sub(acc0a.faer_add(acc0b)));
688 x.write(j, k1, x.read(j, k1).faer_sub(acc1a.faer_add(acc1b)));
689 }
690 }
691 3 => {
692 let k0 = K.check(k);
693 let k1 = K.check(k + 1);
694 let k2 = K.check(k + 2);
695
696 for j in N.indices().rev() {
697 let mut acc0a = E::faer_zero();
698 let mut acc1a = E::faer_zero();
699 let mut acc2a = E::faer_zero();
700
701 for (i, lij) in zip(
702 l.row_indices_of_col(j),
703 slice_group(l.values_of_col(j)).into_ref_iter(),
704 )
705 .skip(1)
706 {
707 let lij = lij.read();
708 let lij = if conj == Conj::Yes {
709 lij.faer_conj()
710 } else {
711 lij
712 };
713 acc0a = acc0a.faer_add(lij.faer_mul(x.read(i, k0)));
714 acc1a = acc1a.faer_add(lij.faer_mul(x.read(i, k1)));
715 acc2a = acc2a.faer_add(lij.faer_mul(x.read(i, k2)));
716 }
717
718 x.write(j, k0, x.read(j, k0).faer_sub(acc0a));
719 x.write(j, k1, x.read(j, k1).faer_sub(acc1a));
720 x.write(j, k2, x.read(j, k2).faer_sub(acc2a));
721 }
722 }
723 4 => {
724 let k0 = K.check(k);
725 let k1 = K.check(k + 1);
726 let k2 = K.check(k + 2);
727 let k3 = K.check(k + 3);
728
729 for j in N.indices().rev() {
730 let mut acc0a = E::faer_zero();
731 let mut acc1a = E::faer_zero();
732 let mut acc2a = E::faer_zero();
733 let mut acc3a = E::faer_zero();
734
735 for (i, lij) in zip(
736 l.row_indices_of_col(j),
737 slice_group(l.values_of_col(j)).into_ref_iter(),
738 )
739 .skip(1)
740 {
741 let lij = lij.read();
742 let lij = if conj == Conj::Yes {
743 lij.faer_conj()
744 } else {
745 lij
746 };
747 acc0a = acc0a.faer_add(lij.faer_mul(x.read(i, k0)));
748 acc1a = acc1a.faer_add(lij.faer_mul(x.read(i, k1)));
749 acc2a = acc2a.faer_add(lij.faer_mul(x.read(i, k2)));
750 acc3a = acc3a.faer_add(lij.faer_mul(x.read(i, k3)));
751 }
752
753 x.write(j, k0, x.read(j, k0).faer_sub(acc0a));
754 x.write(j, k1, x.read(j, k1).faer_sub(acc1a));
755 x.write(j, k2, x.read(j, k2).faer_sub(acc2a));
756 x.write(j, k3, x.read(j, k3).faer_sub(acc3a));
757 }
758 }
759 _ => unreachable!(),
760 }
761 k += bs;
762 }
763 },
764 );
765}
766
767pub fn solve_upper_triangular_in_place<I: Index, E: ComplexField>(
768 u: SparseColMatRef<'_, I, E>,
769 conj: Conj,
770 rhs: MatMut<'_, E>,
771 parallelism: Parallelism,
772) {
773 let _ = parallelism;
774 assert!(u.nrows() == u.ncols());
775 assert!(rhs.nrows() == u.nrows());
776
777 faer_core::constrained::Size::with2(
778 rhs.nrows(),
779 rhs.ncols(),
780 #[inline(always)]
781 |N, K| {
782 let mut x = faer_core::constrained::MatMut::new(rhs, N, K);
783 let u = faer_core::constrained::sparse::SparseColMatRef::new(u, N, N);
784
785 let mut k = 0usize;
786 while k < *K {
787 let bs = Ord::min(*K - k, 4);
788 match bs {
789 1 => {
790 let k0 = K.check(k);
791
792 for j in N.indices().rev() {
793 let ui = u.row_indices_of_col_raw(j);
794 let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
795
796 let u_inv = ux.read(ui.len() - 1).faer_inv();
797 let u_inv = if conj == Conj::Yes {
798 u_inv.faer_conj()
799 } else {
800 u_inv
801 };
802 let xj = x.read(j, k0).faer_mul(u_inv);
803 x.write(j, k0, xj);
804
805 for (i, u) in zip(
806 &ui[..ui.len() - 1],
807 ux.subslice(0..ui.len() - 1).into_ref_iter(),
808 ) {
809 let i = i.zx();
810 let u = if conj == Conj::Yes {
811 u.read().faer_conj()
812 } else {
813 u.read()
814 };
815
816 x.write(i, k0, x.read(i, k0).faer_sub(E::faer_mul(u, xj)));
817 }
818 }
819 }
820 2 => {
821 let k0 = K.check(k);
822 let k1 = K.check(k + 1);
823
824 for j in N.indices().rev() {
825 let ui = u.row_indices_of_col_raw(j);
826 let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
827
828 let u_inv = ux.read(ui.len() - 1).faer_inv();
829 let u_inv = if conj == Conj::Yes {
830 u_inv.faer_conj()
831 } else {
832 u_inv
833 };
834 let xj0 = x.read(j, k0).faer_mul(u_inv);
835 let xj1 = x.read(j, k1).faer_mul(u_inv);
836 x.write(j, k0, xj0);
837 x.write(j, k1, xj1);
838
839 for (i, u) in zip(
840 &ui[..ui.len() - 1],
841 ux.subslice(0..ui.len() - 1).into_ref_iter(),
842 ) {
843 let i = i.zx();
844 let u = if conj == Conj::Yes {
845 u.read().faer_conj()
846 } else {
847 u.read()
848 };
849
850 x.write(i, k0, x.read(i, k0).faer_sub(E::faer_mul(u, xj0)));
851 x.write(i, k1, x.read(i, k1).faer_sub(E::faer_mul(u, xj1)));
852 }
853 }
854 }
855 3 => {
856 let k0 = K.check(k);
857 let k1 = K.check(k + 1);
858 let k2 = K.check(k + 2);
859
860 for j in N.indices().rev() {
861 let ui = u.row_indices_of_col_raw(j);
862 let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
863
864 let u_inv = ux.read(ui.len() - 1).faer_inv();
865 let u_inv = if conj == Conj::Yes {
866 u_inv.faer_conj()
867 } else {
868 u_inv
869 };
870 let xj0 = x.read(j, k0).faer_mul(u_inv);
871 let xj1 = x.read(j, k1).faer_mul(u_inv);
872 let xj2 = x.read(j, k2).faer_mul(u_inv);
873 x.write(j, k0, xj0);
874 x.write(j, k1, xj1);
875 x.write(j, k2, xj2);
876
877 for (i, u) in zip(
878 &ui[..ui.len() - 1],
879 ux.subslice(0..ui.len() - 1).into_ref_iter(),
880 ) {
881 let i = i.zx();
882 let u = if conj == Conj::Yes {
883 u.read().faer_conj()
884 } else {
885 u.read()
886 };
887
888 x.write(i, k0, x.read(i, k0).faer_sub(E::faer_mul(u, xj0)));
889 x.write(i, k1, x.read(i, k1).faer_sub(E::faer_mul(u, xj1)));
890 x.write(i, k2, x.read(i, k2).faer_sub(E::faer_mul(u, xj2)));
891 }
892 }
893 }
894 4 => {
895 let k0 = K.check(k);
896 let k1 = K.check(k + 1);
897 let k2 = K.check(k + 2);
898 let k3 = K.check(k + 3);
899
900 for j in N.indices().rev() {
901 let ui = u.row_indices_of_col_raw(j);
902 let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
903
904 let u_inv = ux.read(ui.len() - 1).faer_inv();
905 let u_inv = if conj == Conj::Yes {
906 u_inv.faer_conj()
907 } else {
908 u_inv
909 };
910 let xj0 = x.read(j, k0).faer_mul(u_inv);
911 let xj1 = x.read(j, k1).faer_mul(u_inv);
912 let xj2 = x.read(j, k2).faer_mul(u_inv);
913 let xj3 = x.read(j, k3).faer_mul(u_inv);
914 x.write(j, k0, xj0);
915 x.write(j, k1, xj1);
916 x.write(j, k2, xj2);
917 x.write(j, k3, xj3);
918
919 for (i, u) in zip(
920 &ui[..ui.len() - 1],
921 ux.subslice(0..ui.len() - 1).into_ref_iter(),
922 ) {
923 let i = i.zx();
924 let u = if conj == Conj::Yes {
925 u.read().faer_conj()
926 } else {
927 u.read()
928 };
929
930 x.write(i, k0, x.read(i, k0).faer_sub(E::faer_mul(u, xj0)));
931 x.write(i, k1, x.read(i, k1).faer_sub(E::faer_mul(u, xj1)));
932 x.write(i, k2, x.read(i, k2).faer_sub(E::faer_mul(u, xj2)));
933 x.write(i, k3, x.read(i, k3).faer_sub(E::faer_mul(u, xj3)));
934 }
935 }
936 }
937 _ => unreachable!(),
938 }
939 k += bs;
940 }
941 },
942 );
943}
944
945pub fn solve_upper_triangular_transpose_in_place<I: Index, E: ComplexField>(
946 u: SparseColMatRef<'_, I, E>,
947 conj: Conj,
948 rhs: MatMut<'_, E>,
949 parallelism: Parallelism,
950) {
951 let _ = parallelism;
952 assert!(u.nrows() == u.ncols());
953 assert!(rhs.nrows() == u.nrows());
954
955 faer_core::constrained::Size::with2(
956 rhs.nrows(),
957 rhs.ncols(),
958 #[inline(always)]
959 |N, K| {
960 let mut x = faer_core::constrained::MatMut::new(rhs, N, K);
961 let u = faer_core::constrained::sparse::SparseColMatRef::new(u, N, N);
962
963 let mut k = 0usize;
964 while k < *K {
965 let bs = Ord::min(*K - k, 4);
966 match bs {
967 1 => {
968 let k0 = K.check(k);
969 for j in N.indices() {
970 let ui = u.row_indices_of_col_raw(j);
971 let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
972
973 let u_inv = ux.read(ui.len() - 1).faer_inv();
974 let u_inv = if conj == Conj::Yes {
975 u_inv.faer_conj()
976 } else {
977 u_inv
978 };
979
980 let mut acc0a = E::faer_zero();
981 let mut acc0b = E::faer_zero();
982 let mut acc0c = E::faer_zero();
983 let mut acc0d = E::faer_zero();
984
985 let a = 0;
986 let b = 1;
987 let c = 2;
988 let d = 3;
989
990 let rows_head = ui[..ui.len() - 1].chunks_exact(4);
991 let rows_tail = rows_head.remainder();
992 let (values_head, values_tail) =
993 ux.subslice(0..ui.len() - 1).into_chunks_exact(4);
994
995 for (i, uij) in zip(rows_head, values_head) {
996 let uija = uij.read(a);
997 let uijb = uij.read(b);
998 let uijc = uij.read(c);
999 let uijd = uij.read(d);
1000 let uija = if conj == Conj::Yes {
1001 uija.faer_conj()
1002 } else {
1003 uija
1004 };
1005 let uijb = if conj == Conj::Yes {
1006 uijb.faer_conj()
1007 } else {
1008 uijb
1009 };
1010 let uijc = if conj == Conj::Yes {
1011 uijc.faer_conj()
1012 } else {
1013 uijc
1014 };
1015 let uijd = if conj == Conj::Yes {
1016 uijd.faer_conj()
1017 } else {
1018 uijd
1019 };
1020 acc0a = acc0a.faer_add(uija.faer_mul(x.read(i[a].zx(), k0)));
1021 acc0b = acc0b.faer_add(uijb.faer_mul(x.read(i[b].zx(), k0)));
1022 acc0c = acc0c.faer_add(uijc.faer_mul(x.read(i[c].zx(), k0)));
1023 acc0d = acc0d.faer_add(uijd.faer_mul(x.read(i[d].zx(), k0)));
1024 }
1025
1026 for (i, uij) in zip(rows_tail, values_tail.into_ref_iter()) {
1027 let uija = uij.read();
1028 let uija = if conj == Conj::Yes {
1029 uija.faer_conj()
1030 } else {
1031 uija
1032 };
1033 acc0a = acc0a.faer_add(uija.faer_mul(x.read(i.zx(), k0)));
1034 }
1035
1036 x.write(
1037 j,
1038 k0,
1039 x.read(j, k0)
1040 .faer_sub(acc0a.faer_add(acc0b).faer_add(acc0c.faer_add(acc0d)))
1041 .faer_mul(u_inv),
1042 );
1043 }
1044 }
1045 2 => {
1046 let k0 = K.check(k);
1047 let k1 = K.check(k + 1);
1048
1049 for j in N.indices() {
1050 let ui = u.row_indices_of_col_raw(j);
1051 let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1052
1053 let u_inv = ux.read(ui.len() - 1).faer_inv();
1054 let u_inv = if conj == Conj::Yes {
1055 u_inv.faer_conj()
1056 } else {
1057 u_inv
1058 };
1059
1060 let mut acc0a = E::faer_zero();
1061 let mut acc0b = E::faer_zero();
1062 let mut acc1a = E::faer_zero();
1063 let mut acc1b = E::faer_zero();
1064
1065 let a = 0;
1066 let b = 1;
1067
1068 let rows_head = ui[..ui.len() - 1].chunks_exact(2);
1069 let rows_tail = rows_head.remainder();
1070 let (values_head, values_tail) =
1071 ux.subslice(0..ui.len() - 1).into_chunks_exact(2);
1072
1073 for (i, uij) in zip(rows_head, values_head) {
1074 let uija = uij.read(a);
1075 let uijb = uij.read(b);
1076 let uija = if conj == Conj::Yes {
1077 uija.faer_conj()
1078 } else {
1079 uija
1080 };
1081 let uijb = if conj == Conj::Yes {
1082 uijb.faer_conj()
1083 } else {
1084 uijb
1085 };
1086 acc0a = acc0a.faer_add(uija.faer_mul(x.read(i[a].zx(), k0)));
1087 acc0b = acc0b.faer_add(uijb.faer_mul(x.read(i[b].zx(), k0)));
1088 acc1a = acc1a.faer_add(uija.faer_mul(x.read(i[a].zx(), k1)));
1089 acc1b = acc1b.faer_add(uijb.faer_mul(x.read(i[b].zx(), k1)));
1090 }
1091
1092 for (i, uij) in zip(rows_tail, values_tail.into_ref_iter()) {
1093 let uija = uij.read();
1094 let uija = if conj == Conj::Yes {
1095 uija.faer_conj()
1096 } else {
1097 uija
1098 };
1099 acc0a = acc0a.faer_add(uija.faer_mul(x.read(i.zx(), k0)));
1100 acc1a = acc1a.faer_add(uija.faer_mul(x.read(i.zx(), k1)));
1101 }
1102
1103 x.write(
1104 j,
1105 k0,
1106 x.read(j, k0)
1107 .faer_sub(acc0a.faer_add(acc0b))
1108 .faer_mul(u_inv),
1109 );
1110 x.write(
1111 j,
1112 k1,
1113 x.read(j, k1)
1114 .faer_sub(acc1a.faer_add(acc1b))
1115 .faer_mul(u_inv),
1116 );
1117 }
1118 }
1119 3 => {
1120 let k0 = K.check(k);
1121 let k1 = K.check(k + 1);
1122 let k2 = K.check(k + 2);
1123
1124 for j in N.indices() {
1125 let ui = u.row_indices_of_col_raw(j);
1126 let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1127
1128 let u_inv = ux.read(ui.len() - 1).faer_inv();
1129 let u_inv = if conj == Conj::Yes {
1130 u_inv.faer_conj()
1131 } else {
1132 u_inv
1133 };
1134
1135 let mut acc0a = E::faer_zero();
1136 let mut acc1a = E::faer_zero();
1137 let mut acc2a = E::faer_zero();
1138
1139 let rows = &ui[..ui.len() - 1];
1140 let values = ux.subslice(0..ui.len() - 1);
1141
1142 for (i, uij) in zip(rows, values.into_ref_iter()) {
1143 let uija = uij.read();
1144 let uija = if conj == Conj::Yes {
1145 uija.faer_conj()
1146 } else {
1147 uija
1148 };
1149 acc0a = acc0a.faer_add(uija.faer_mul(x.read(i.zx(), k0)));
1150 acc1a = acc1a.faer_add(uija.faer_mul(x.read(i.zx(), k1)));
1151 acc2a = acc2a.faer_add(uija.faer_mul(x.read(i.zx(), k2)));
1152 }
1153
1154 x.write(j, k0, x.read(j, k0).faer_sub(acc0a).faer_mul(u_inv));
1155 x.write(j, k1, x.read(j, k1).faer_sub(acc1a).faer_mul(u_inv));
1156 x.write(j, k2, x.read(j, k2).faer_sub(acc2a).faer_mul(u_inv));
1157 }
1158 }
1159 4 => {
1160 let k0 = K.check(k);
1161 let k1 = K.check(k + 1);
1162 let k2 = K.check(k + 2);
1163 let k3 = K.check(k + 3);
1164
1165 for j in N.indices() {
1166 let ui = u.row_indices_of_col_raw(j);
1167 let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1168
1169 let u_inv = ux.read(ui.len() - 1).faer_inv();
1170 let u_inv = if conj == Conj::Yes {
1171 u_inv.faer_conj()
1172 } else {
1173 u_inv
1174 };
1175
1176 let mut acc0a = E::faer_zero();
1177 let mut acc1a = E::faer_zero();
1178 let mut acc2a = E::faer_zero();
1179 let mut acc3a = E::faer_zero();
1180
1181 let rows = &ui[..ui.len() - 1];
1182 let values = ux.subslice(0..ui.len() - 1);
1183
1184 for (i, uij) in zip(rows, values.into_ref_iter()) {
1185 let uija = uij.read();
1186 let uija = if conj == Conj::Yes {
1187 uija.faer_conj()
1188 } else {
1189 uija
1190 };
1191 acc0a = acc0a.faer_add(uija.faer_mul(x.read(i.zx(), k0)));
1192 acc1a = acc1a.faer_add(uija.faer_mul(x.read(i.zx(), k1)));
1193 acc2a = acc2a.faer_add(uija.faer_mul(x.read(i.zx(), k2)));
1194 acc3a = acc3a.faer_add(uija.faer_mul(x.read(i.zx(), k3)));
1195 }
1196
1197 x.write(j, k0, x.read(j, k0).faer_sub(acc0a).faer_mul(u_inv));
1198 x.write(j, k1, x.read(j, k1).faer_sub(acc1a).faer_mul(u_inv));
1199 x.write(j, k2, x.read(j, k2).faer_sub(acc2a).faer_mul(u_inv));
1200 x.write(j, k3, x.read(j, k3).faer_sub(acc3a).faer_mul(u_inv));
1201 }
1202 }
1203 _ => unreachable!(),
1204 }
1205 k += bs;
1206 }
1207 },
1208 );
1209}
1210
1211pub fn solve_unit_upper_triangular_in_place<I: Index, E: ComplexField>(
1212 u: SparseColMatRef<'_, I, E>,
1213 conj: Conj,
1214 rhs: MatMut<'_, E>,
1215 parallelism: Parallelism,
1216) {
1217 let _ = parallelism;
1218 assert!(u.nrows() == u.ncols());
1219 assert!(rhs.nrows() == u.nrows());
1220
1221 faer_core::constrained::Size::with2(
1222 rhs.nrows(),
1223 rhs.ncols(),
1224 #[inline(always)]
1225 |N, K| {
1226 let mut x = faer_core::constrained::MatMut::new(rhs, N, K);
1227 let u = faer_core::constrained::sparse::SparseColMatRef::new(u, N, N);
1228
1229 let mut k = 0usize;
1230 while k < *K {
1231 let bs = Ord::min(*K - k, 4);
1232 match bs {
1233 1 => {
1234 let k0 = K.check(k);
1235
1236 for j in N.indices().rev() {
1237 let ui = u.row_indices_of_col_raw(j);
1238 let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1239
1240 let xj = x.read(j, k0);
1241
1242 for (i, u) in zip(
1243 &ui[..ui.len() - 1],
1244 ux.subslice(0..ui.len() - 1).into_ref_iter(),
1245 ) {
1246 let i = i.zx();
1247 let u = if conj == Conj::Yes {
1248 u.read().faer_conj()
1249 } else {
1250 u.read()
1251 };
1252
1253 x.write(i, k0, x.read(i, k0).faer_sub(E::faer_mul(u, xj)));
1254 }
1255 }
1256 }
1257 2 => {
1258 let k0 = K.check(k);
1259 let k1 = K.check(k + 1);
1260
1261 for j in N.indices().rev() {
1262 let ui = u.row_indices_of_col_raw(j);
1263 let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1264
1265 let xj0 = x.read(j, k0);
1266 let xj1 = x.read(j, k1);
1267
1268 for (i, u) in zip(
1269 &ui[..ui.len() - 1],
1270 ux.subslice(0..ui.len() - 1).into_ref_iter(),
1271 ) {
1272 let i = i.zx();
1273 let u = if conj == Conj::Yes {
1274 u.read().faer_conj()
1275 } else {
1276 u.read()
1277 };
1278
1279 x.write(i, k0, x.read(i, k0).faer_sub(E::faer_mul(u, xj0)));
1280 x.write(i, k1, x.read(i, k1).faer_sub(E::faer_mul(u, xj1)));
1281 }
1282 }
1283 }
1284 3 => {
1285 let k0 = K.check(k);
1286 let k1 = K.check(k + 1);
1287 let k2 = K.check(k + 2);
1288
1289 for j in N.indices().rev() {
1290 let ui = u.row_indices_of_col_raw(j);
1291 let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1292
1293 let xj0 = x.read(j, k0);
1294 let xj1 = x.read(j, k1);
1295 let xj2 = x.read(j, k2);
1296
1297 for (i, u) in zip(
1298 &ui[..ui.len() - 1],
1299 ux.subslice(0..ui.len() - 1).into_ref_iter(),
1300 ) {
1301 let i = i.zx();
1302 let u = if conj == Conj::Yes {
1303 u.read().faer_conj()
1304 } else {
1305 u.read()
1306 };
1307
1308 x.write(i, k0, x.read(i, k0).faer_sub(E::faer_mul(u, xj0)));
1309 x.write(i, k1, x.read(i, k1).faer_sub(E::faer_mul(u, xj1)));
1310 x.write(i, k2, x.read(i, k2).faer_sub(E::faer_mul(u, xj2)));
1311 }
1312 }
1313 }
1314 4 => {
1315 let k0 = K.check(k);
1316 let k1 = K.check(k + 1);
1317 let k2 = K.check(k + 2);
1318 let k3 = K.check(k + 3);
1319
1320 for j in N.indices().rev() {
1321 let ui = u.row_indices_of_col_raw(j);
1322 let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1323
1324 let xj0 = x.read(j, k0);
1325 let xj1 = x.read(j, k1);
1326 let xj2 = x.read(j, k2);
1327 let xj3 = x.read(j, k3);
1328
1329 for (i, u) in zip(
1330 &ui[..ui.len() - 1],
1331 ux.subslice(0..ui.len() - 1).into_ref_iter(),
1332 ) {
1333 let i = i.zx();
1334 let u = if conj == Conj::Yes {
1335 u.read().faer_conj()
1336 } else {
1337 u.read()
1338 };
1339
1340 x.write(i, k0, x.read(i, k0).faer_sub(E::faer_mul(u, xj0)));
1341 x.write(i, k1, x.read(i, k1).faer_sub(E::faer_mul(u, xj1)));
1342 x.write(i, k2, x.read(i, k2).faer_sub(E::faer_mul(u, xj2)));
1343 x.write(i, k3, x.read(i, k3).faer_sub(E::faer_mul(u, xj3)));
1344 }
1345 }
1346 }
1347 _ => unreachable!(),
1348 }
1349 k += bs;
1350 }
1351 },
1352 );
1353}
1354
1355pub fn solve_unit_upper_triangular_transpose_in_place<I: Index, E: ComplexField>(
1356 u: SparseColMatRef<'_, I, E>,
1357 conj: Conj,
1358 rhs: MatMut<'_, E>,
1359 parallelism: Parallelism,
1360) {
1361 let _ = parallelism;
1362 assert!(u.nrows() == u.ncols());
1363 assert!(rhs.nrows() == u.nrows());
1364
1365 faer_core::constrained::Size::with2(
1366 rhs.nrows(),
1367 rhs.ncols(),
1368 #[inline(always)]
1369 |N, K| {
1370 let mut x = faer_core::constrained::MatMut::new(rhs, N, K);
1371 let u = faer_core::constrained::sparse::SparseColMatRef::new(u, N, N);
1372
1373 let mut k = 0usize;
1374 while k < *K {
1375 let bs = Ord::min(*K - k, 4);
1376 match bs {
1377 1 => {
1378 let k0 = K.check(k);
1379 for j in N.indices() {
1380 let ui = u.row_indices_of_col_raw(j);
1381 let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1382
1383 let mut acc0a = E::faer_zero();
1384 let mut acc0b = E::faer_zero();
1385 let mut acc0c = E::faer_zero();
1386 let mut acc0d = E::faer_zero();
1387
1388 let a = 0;
1389 let b = 1;
1390 let c = 2;
1391 let d = 3;
1392
1393 let rows_head = ui[..ui.len() - 1].chunks_exact(4);
1394 let rows_tail = rows_head.remainder();
1395 let (values_head, values_tail) =
1396 ux.subslice(0..ui.len() - 1).into_chunks_exact(4);
1397
1398 for (i, uij) in zip(rows_head, values_head) {
1399 let uija = uij.read(a);
1400 let uijb = uij.read(b);
1401 let uijc = uij.read(c);
1402 let uijd = uij.read(d);
1403 let uija = if conj == Conj::Yes {
1404 uija.faer_conj()
1405 } else {
1406 uija
1407 };
1408 let uijb = if conj == Conj::Yes {
1409 uijb.faer_conj()
1410 } else {
1411 uijb
1412 };
1413 let uijc = if conj == Conj::Yes {
1414 uijc.faer_conj()
1415 } else {
1416 uijc
1417 };
1418 let uijd = if conj == Conj::Yes {
1419 uijd.faer_conj()
1420 } else {
1421 uijd
1422 };
1423 acc0a = acc0a.faer_add(uija.faer_mul(x.read(i[a].zx(), k0)));
1424 acc0b = acc0b.faer_add(uijb.faer_mul(x.read(i[b].zx(), k0)));
1425 acc0c = acc0c.faer_add(uijc.faer_mul(x.read(i[c].zx(), k0)));
1426 acc0d = acc0d.faer_add(uijd.faer_mul(x.read(i[d].zx(), k0)));
1427 }
1428
1429 for (i, uij) in zip(rows_tail, values_tail.into_ref_iter()) {
1430 let uija = uij.read();
1431 let uija = if conj == Conj::Yes {
1432 uija.faer_conj()
1433 } else {
1434 uija
1435 };
1436 acc0a = acc0a.faer_add(uija.faer_mul(x.read(i.zx(), k0)));
1437 }
1438
1439 x.write(
1440 j,
1441 k0,
1442 x.read(j, k0).faer_sub(
1443 acc0a.faer_add(acc0b).faer_add(acc0c.faer_add(acc0d)),
1444 ),
1445 );
1446 }
1447 }
1448 2 => {
1449 let k0 = K.check(k);
1450 let k1 = K.check(k + 1);
1451
1452 for j in N.indices() {
1453 let ui = u.row_indices_of_col_raw(j);
1454 let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1455
1456 let mut acc0a = E::faer_zero();
1457 let mut acc0b = E::faer_zero();
1458 let mut acc1a = E::faer_zero();
1459 let mut acc1b = E::faer_zero();
1460
1461 let a = 0;
1462 let b = 1;
1463
1464 let rows_head = ui[..ui.len() - 1].chunks_exact(4);
1465 let rows_tail = rows_head.remainder();
1466 let (values_head, values_tail) =
1467 ux.subslice(0..ui.len() - 1).into_chunks_exact(4);
1468
1469 for (i, uij) in zip(rows_head, values_head) {
1470 let uija = uij.read(a);
1471 let uijb = uij.read(b);
1472 let uija = if conj == Conj::Yes {
1473 uija.faer_conj()
1474 } else {
1475 uija
1476 };
1477 let uijb = if conj == Conj::Yes {
1478 uijb.faer_conj()
1479 } else {
1480 uijb
1481 };
1482 acc0a = acc0a.faer_add(uija.faer_mul(x.read(i[a].zx(), k0)));
1483 acc0b = acc0b.faer_add(uijb.faer_mul(x.read(i[b].zx(), k0)));
1484 acc1a = acc1a.faer_add(uija.faer_mul(x.read(i[a].zx(), k1)));
1485 acc1b = acc1b.faer_add(uijb.faer_mul(x.read(i[b].zx(), k1)));
1486 }
1487
1488 for (i, uij) in zip(rows_tail, values_tail.into_ref_iter()) {
1489 let uija = uij.read();
1490 let uija = if conj == Conj::Yes {
1491 uija.faer_conj()
1492 } else {
1493 uija
1494 };
1495 acc0a = acc0a.faer_add(uija.faer_mul(x.read(i.zx(), k0)));
1496 acc1a = acc1a.faer_add(uija.faer_mul(x.read(i.zx(), k1)));
1497 }
1498
1499 x.write(j, k0, x.read(j, k0).faer_sub(acc0a.faer_add(acc0b)));
1500 x.write(j, k1, x.read(j, k1).faer_sub(acc1a.faer_add(acc1b)));
1501 }
1502 }
1503 3 => {
1504 let k0 = K.check(k);
1505 let k1 = K.check(k + 1);
1506 let k2 = K.check(k + 2);
1507
1508 for j in N.indices() {
1509 let ui = u.row_indices_of_col_raw(j);
1510 let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1511
1512 let mut acc0a = E::faer_zero();
1513 let mut acc1a = E::faer_zero();
1514 let mut acc2a = E::faer_zero();
1515
1516 let a = 0;
1517
1518 let rows_head = ui[..ui.len() - 1].chunks_exact(4);
1519 let rows_tail = rows_head.remainder();
1520 let (values_head, values_tail) =
1521 ux.subslice(0..ui.len() - 1).into_chunks_exact(4);
1522
1523 for (i, uij) in zip(rows_head, values_head) {
1524 let uija = uij.read(a);
1525 let uija = if conj == Conj::Yes {
1526 uija.faer_conj()
1527 } else {
1528 uija
1529 };
1530 acc0a = acc0a.faer_add(uija.faer_mul(x.read(i[a].zx(), k0)));
1531 acc1a = acc1a.faer_add(uija.faer_mul(x.read(i[a].zx(), k1)));
1532 acc2a = acc2a.faer_add(uija.faer_mul(x.read(i[a].zx(), k2)));
1533 }
1534
1535 for (i, uij) in zip(rows_tail, values_tail.into_ref_iter()) {
1536 let uija = uij.read();
1537 let uija = if conj == Conj::Yes {
1538 uija.faer_conj()
1539 } else {
1540 uija
1541 };
1542 acc0a = acc0a.faer_add(uija.faer_mul(x.read(i.zx(), k0)));
1543 acc1a = acc1a.faer_add(uija.faer_mul(x.read(i.zx(), k1)));
1544 acc2a = acc2a.faer_add(uija.faer_mul(x.read(i.zx(), k2)));
1545 }
1546
1547 x.write(j, k0, x.read(j, k0).faer_sub(acc0a));
1548 x.write(j, k1, x.read(j, k1).faer_sub(acc1a));
1549 x.write(j, k2, x.read(j, k2).faer_sub(acc2a));
1550 }
1551 }
1552 4 => {
1553 let k0 = K.check(k);
1554 let k1 = K.check(k + 1);
1555 let k2 = K.check(k + 2);
1556 let k3 = K.check(k + 3);
1557
1558 for j in N.indices() {
1559 let ui = u.row_indices_of_col_raw(j);
1560 let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1561
1562 let mut acc0a = E::faer_zero();
1563 let mut acc1a = E::faer_zero();
1564 let mut acc2a = E::faer_zero();
1565 let mut acc3a = E::faer_zero();
1566
1567 let a = 0;
1568
1569 let rows_head = ui[..ui.len() - 1].chunks_exact(4);
1570 let rows_tail = rows_head.remainder();
1571 let (values_head, values_tail) =
1572 ux.subslice(0..ui.len() - 1).into_chunks_exact(4);
1573
1574 for (i, uij) in zip(rows_head, values_head) {
1575 let uija = uij.read(a);
1576 let uija = if conj == Conj::Yes {
1577 uija.faer_conj()
1578 } else {
1579 uija
1580 };
1581 acc0a = acc0a.faer_add(uija.faer_mul(x.read(i[a].zx(), k0)));
1582 acc1a = acc1a.faer_add(uija.faer_mul(x.read(i[a].zx(), k1)));
1583 acc2a = acc2a.faer_add(uija.faer_mul(x.read(i[a].zx(), k2)));
1584 acc3a = acc3a.faer_add(uija.faer_mul(x.read(i[a].zx(), k3)));
1585 }
1586
1587 for (i, uij) in zip(rows_tail, values_tail.into_ref_iter()) {
1588 let uija = uij.read();
1589 let uija = if conj == Conj::Yes {
1590 uija.faer_conj()
1591 } else {
1592 uija
1593 };
1594 acc0a = acc0a.faer_add(uija.faer_mul(x.read(i.zx(), k0)));
1595 acc1a = acc1a.faer_add(uija.faer_mul(x.read(i.zx(), k1)));
1596 acc2a = acc2a.faer_add(uija.faer_mul(x.read(i.zx(), k2)));
1597 acc3a = acc3a.faer_add(uija.faer_mul(x.read(i.zx(), k3)));
1598 }
1599
1600 x.write(j, k0, x.read(j, k0).faer_sub(acc0a));
1601 x.write(j, k1, x.read(j, k1).faer_sub(acc1a));
1602 x.write(j, k2, x.read(j, k2).faer_sub(acc2a));
1603 x.write(j, k3, x.read(j, k3).faer_sub(acc3a));
1604 }
1605 }
1606 _ => unreachable!(),
1607 }
1608 k += bs;
1609 }
1610 },
1611 );
1612}