faer_sparse/
triangular_solve.rs

1use core::iter::zip;
2use faer_core::{
3    assert, group_helpers::SliceGroup, permutation::Index, sparse::SparseColMatRef, Conj, MatMut,
4    Parallelism,
5};
6use faer_entity::ComplexField;
7
8pub fn solve_lower_triangular_in_place<I: Index, E: ComplexField>(
9    l: SparseColMatRef<'_, I, E>,
10    conj: Conj,
11    rhs: MatMut<'_, E>,
12    parallelism: Parallelism,
13) {
14    let _ = parallelism;
15    assert!(l.nrows() == l.ncols());
16    assert!(rhs.nrows() == l.nrows());
17
18    let slice_group = SliceGroup::<'_, E>::new;
19
20    faer_core::constrained::Size::with2(
21        rhs.nrows(),
22        rhs.ncols(),
23        #[inline(always)]
24        |N, K| {
25            let mut x = faer_core::constrained::MatMut::new(rhs, N, K);
26            let l = faer_core::constrained::sparse::SparseColMatRef::new(l, N, N);
27
28            let mut k = 0usize;
29            while k < *K {
30                let bs = Ord::min(*K - k, 4);
31                match bs {
32                    1 => {
33                        let k0 = K.check(k);
34
35                        for j in N.indices() {
36                            let d = slice_group(l.values_of_col(j)).read(0).faer_inv();
37                            let d = if conj == Conj::Yes { d.faer_conj() } else { d };
38
39                            let xj0 = x.read(j, k0).faer_mul(d);
40                            x.write(j, k0, xj0);
41
42                            for (i, lij) in zip(
43                                l.row_indices_of_col(j),
44                                slice_group(l.values_of_col(j)).into_ref_iter(),
45                            )
46                            .skip(1)
47                            {
48                                let lij = lij.read();
49                                let lij = if conj == Conj::Yes {
50                                    lij.faer_conj()
51                                } else {
52                                    lij
53                                };
54                                x.write(i, k0, x.read(i, k0).faer_sub(lij.faer_mul(xj0)));
55                            }
56                        }
57                    }
58                    2 => {
59                        let k0 = K.check(k);
60                        let k1 = K.check(k + 1);
61                        for j in N.indices() {
62                            let d = slice_group(l.values_of_col(j)).read(0).faer_inv();
63                            let d = if conj == Conj::Yes { d.faer_conj() } else { d };
64
65                            let xj0 = x.read(j, k0).faer_mul(d);
66                            x.write(j, k0, xj0);
67                            let xj1 = x.read(j, k1).faer_mul(d);
68                            x.write(j, k1, xj1);
69
70                            for (i, lij) in zip(
71                                l.row_indices_of_col(j),
72                                slice_group(l.values_of_col(j)).into_ref_iter(),
73                            )
74                            .skip(1)
75                            {
76                                let lij = lij.read();
77                                let lij = if conj == Conj::Yes {
78                                    lij.faer_conj()
79                                } else {
80                                    lij
81                                };
82                                x.write(i, k0, x.read(i, k0).faer_sub(lij.faer_mul(xj0)));
83                                x.write(i, k1, x.read(i, k1).faer_sub(lij.faer_mul(xj1)));
84                            }
85                        }
86                    }
87                    3 => {
88                        let k0 = K.check(k);
89                        let k1 = K.check(k + 1);
90                        let k2 = K.check(k + 2);
91                        for j in N.indices() {
92                            let d = slice_group(l.values_of_col(j)).read(0).faer_inv();
93                            let d = if conj == Conj::Yes { d.faer_conj() } else { d };
94
95                            let xj0 = x.read(j, k0).faer_mul(d);
96                            x.write(j, k0, xj0);
97                            let xj1 = x.read(j, k1).faer_mul(d);
98                            x.write(j, k1, xj1);
99                            let xj2 = x.read(j, k2).faer_mul(d);
100                            x.write(j, k2, xj2);
101
102                            for (i, lij) in zip(
103                                l.row_indices_of_col(j),
104                                slice_group(l.values_of_col(j)).into_ref_iter(),
105                            )
106                            .skip(1)
107                            {
108                                let lij = lij.read();
109                                let lij = if conj == Conj::Yes {
110                                    lij.faer_conj()
111                                } else {
112                                    lij
113                                };
114                                x.write(i, k0, x.read(i, k0).faer_sub(lij.faer_mul(xj0)));
115                                x.write(i, k1, x.read(i, k1).faer_sub(lij.faer_mul(xj1)));
116                                x.write(i, k2, x.read(i, k2).faer_sub(lij.faer_mul(xj2)));
117                            }
118                        }
119                    }
120                    4 => {
121                        let k0 = K.check(k);
122                        let k1 = K.check(k + 1);
123                        let k2 = K.check(k + 2);
124                        let k3 = K.check(k + 3);
125                        for j in N.indices() {
126                            let d = slice_group(l.values_of_col(j)).read(0).faer_inv();
127                            let d = if conj == Conj::Yes { d.faer_conj() } else { d };
128
129                            let xj0 = x.read(j, k0).faer_mul(d);
130                            x.write(j, k0, xj0);
131                            let xj1 = x.read(j, k1).faer_mul(d);
132                            x.write(j, k1, xj1);
133                            let xj2 = x.read(j, k2).faer_mul(d);
134                            x.write(j, k2, xj2);
135                            let xj3 = x.read(j, k3).faer_mul(d);
136                            x.write(j, k3, xj3);
137
138                            for (i, lij) in zip(
139                                l.row_indices_of_col(j),
140                                slice_group(l.values_of_col(j)).into_ref_iter(),
141                            )
142                            .skip(1)
143                            {
144                                let lij = lij.read();
145                                let lij = if conj == Conj::Yes {
146                                    lij.faer_conj()
147                                } else {
148                                    lij
149                                };
150                                x.write(i, k0, x.read(i, k0).faer_sub(lij.faer_mul(xj0)));
151                                x.write(i, k1, x.read(i, k1).faer_sub(lij.faer_mul(xj1)));
152                                x.write(i, k2, x.read(i, k2).faer_sub(lij.faer_mul(xj2)));
153                                x.write(i, k3, x.read(i, k3).faer_sub(lij.faer_mul(xj3)));
154                            }
155                        }
156                    }
157                    _ => unreachable!(),
158                }
159                k += bs;
160            }
161        },
162    );
163}
164
165pub fn solve_lower_triangular_transpose_in_place<I: Index, E: ComplexField>(
166    l: SparseColMatRef<'_, I, E>,
167    conj: Conj,
168    rhs: MatMut<'_, E>,
169    parallelism: Parallelism,
170) {
171    let _ = parallelism;
172    assert!(l.nrows() == l.ncols());
173    assert!(rhs.nrows() == l.nrows());
174
175    let slice_group = SliceGroup::<'_, E>::new;
176
177    faer_core::constrained::Size::with2(
178        rhs.nrows(),
179        rhs.ncols(),
180        #[inline(always)]
181        |N, K| {
182            let mut x = faer_core::constrained::MatMut::new(rhs, N, K);
183            let l = faer_core::constrained::sparse::SparseColMatRef::new(l, N, N);
184
185            let mut k = 0usize;
186            while k < *K {
187                let bs = Ord::min(*K - k, 4);
188                match bs {
189                    1 => {
190                        let k0 = K.check(k);
191
192                        for j in N.indices().rev() {
193                            let mut acc0a = E::faer_zero();
194                            let mut acc0b = E::faer_zero();
195                            let mut acc0c = E::faer_zero();
196                            let mut acc0d = E::faer_zero();
197
198                            let a = 0;
199                            let b = 1;
200                            let c = 2;
201                            let d = 3;
202
203                            let nrows = l.row_indices_of_col_raw(j).len();
204                            let rows_head = l.row_indices_of_col_raw(j)[1..].chunks_exact(4);
205                            let rows_tail = rows_head.remainder();
206                            let (values_head, values_tail) = slice_group(l.values_of_col(j))
207                                .subslice(1..nrows)
208                                .into_chunks_exact(4);
209
210                            for (i, lij) in zip(rows_head, values_head) {
211                                let lija = lij.read(a);
212                                let lijb = lij.read(b);
213                                let lijc = lij.read(c);
214                                let lijd = lij.read(d);
215                                let lija = if conj == Conj::Yes {
216                                    lija.faer_conj()
217                                } else {
218                                    lija
219                                };
220                                let lijb = if conj == Conj::Yes {
221                                    lijb.faer_conj()
222                                } else {
223                                    lijb
224                                };
225                                let lijc = if conj == Conj::Yes {
226                                    lijc.faer_conj()
227                                } else {
228                                    lijc
229                                };
230                                let lijd = if conj == Conj::Yes {
231                                    lijd.faer_conj()
232                                } else {
233                                    lijd
234                                };
235                                acc0a = acc0a.faer_add(lija.faer_mul(x.read(i[a].zx(), k0)));
236                                acc0b = acc0b.faer_add(lijb.faer_mul(x.read(i[b].zx(), k0)));
237                                acc0c = acc0c.faer_add(lijc.faer_mul(x.read(i[c].zx(), k0)));
238                                acc0d = acc0d.faer_add(lijd.faer_mul(x.read(i[d].zx(), k0)));
239                            }
240
241                            for (i, lij) in zip(rows_tail, values_tail.into_ref_iter()) {
242                                let lija = lij.read();
243                                let lija = if conj == Conj::Yes {
244                                    lija.faer_conj()
245                                } else {
246                                    lija
247                                };
248                                acc0a = acc0a.faer_add(lija.faer_mul(x.read(i.zx(), k0)));
249                            }
250
251                            let d = slice_group(l.values_of_col(j)).read(0).faer_inv();
252                            let d = if conj == Conj::Yes { d.faer_conj() } else { d };
253                            x.write(
254                                j,
255                                k0,
256                                x.read(j, k0)
257                                    .faer_sub(acc0a.faer_add(acc0b).faer_add(acc0c.faer_add(acc0d)))
258                                    .faer_mul(d),
259                            );
260                        }
261                    }
262                    2 => {
263                        let k0 = K.check(k);
264                        let k1 = K.check(k + 1);
265
266                        for j in N.indices().rev() {
267                            let mut acc0a = E::faer_zero();
268                            let mut acc0b = E::faer_zero();
269                            let mut acc1a = E::faer_zero();
270                            let mut acc1b = E::faer_zero();
271
272                            let a = 0;
273                            let b = 1;
274
275                            let nrows = l.row_indices_of_col_raw(j).len();
276                            let rows_head = l.row_indices_of_col_raw(j)[1..].chunks_exact(2);
277                            let rows_tail = rows_head.remainder();
278                            let (values_head, values_tail) = slice_group(l.values_of_col(j))
279                                .subslice(1..nrows)
280                                .into_chunks_exact(2);
281
282                            for (i, lij) in zip(rows_head, values_head) {
283                                let lija = lij.read(a);
284                                let lijb = lij.read(b);
285                                let lija = if conj == Conj::Yes {
286                                    lija.faer_conj()
287                                } else {
288                                    lija
289                                };
290                                let lijb = if conj == Conj::Yes {
291                                    lijb.faer_conj()
292                                } else {
293                                    lijb
294                                };
295                                acc0a = acc0a.faer_add(lija.faer_mul(x.read(i[a].zx(), k0)));
296                                acc0b = acc0b.faer_add(lijb.faer_mul(x.read(i[b].zx(), k0)));
297                                acc1a = acc1a.faer_add(lija.faer_mul(x.read(i[a].zx(), k1)));
298                                acc1b = acc1b.faer_add(lijb.faer_mul(x.read(i[b].zx(), k1)));
299                            }
300
301                            for (i, lij) in zip(rows_tail, values_tail.into_ref_iter()) {
302                                let lija = lij.read();
303                                let lija = if conj == Conj::Yes {
304                                    lija.faer_conj()
305                                } else {
306                                    lija
307                                };
308                                acc0a = acc0a.faer_add(lija.faer_mul(x.read(i.zx(), k0)));
309                                acc1a = acc1a.faer_add(lija.faer_mul(x.read(i.zx(), k1)));
310                            }
311
312                            let d = slice_group(l.values_of_col(j)).read(0).faer_inv();
313                            let d = if conj == Conj::Yes { d.faer_conj() } else { d };
314                            x.write(
315                                j,
316                                k0,
317                                x.read(j, k0).faer_sub(acc0a.faer_add(acc0b)).faer_mul(d),
318                            );
319                            x.write(
320                                j,
321                                k1,
322                                x.read(j, k1).faer_sub(acc1a.faer_add(acc1b)).faer_mul(d),
323                            );
324                        }
325                    }
326                    3 => {
327                        let k0 = K.check(k);
328                        let k1 = K.check(k + 1);
329                        let k2 = K.check(k + 2);
330
331                        for j in N.indices().rev() {
332                            let mut acc0a = E::faer_zero();
333                            let mut acc1a = E::faer_zero();
334                            let mut acc2a = E::faer_zero();
335
336                            for (i, lij) in zip(
337                                l.row_indices_of_col(j),
338                                slice_group(l.values_of_col(j)).into_ref_iter(),
339                            )
340                            .skip(1)
341                            {
342                                let lij = lij.read();
343                                let lij = if conj == Conj::Yes {
344                                    lij.faer_conj()
345                                } else {
346                                    lij
347                                };
348                                acc0a = acc0a.faer_add(lij.faer_mul(x.read(i, k0)));
349                                acc1a = acc1a.faer_add(lij.faer_mul(x.read(i, k1)));
350                                acc2a = acc2a.faer_add(lij.faer_mul(x.read(i, k2)));
351                            }
352
353                            let d = slice_group(l.values_of_col(j)).read(0).faer_inv();
354                            let d = if conj == Conj::Yes { d.faer_conj() } else { d };
355                            x.write(j, k0, x.read(j, k0).faer_sub(acc0a).faer_mul(d));
356                            x.write(j, k1, x.read(j, k1).faer_sub(acc1a).faer_mul(d));
357                            x.write(j, k2, x.read(j, k2).faer_sub(acc2a).faer_mul(d));
358                        }
359                    }
360                    4 => {
361                        let k0 = K.check(k);
362                        let k1 = K.check(k + 1);
363                        let k2 = K.check(k + 2);
364                        let k3 = K.check(k + 3);
365
366                        for j in N.indices().rev() {
367                            let mut acc0a = E::faer_zero();
368                            let mut acc1a = E::faer_zero();
369                            let mut acc2a = E::faer_zero();
370                            let mut acc3a = E::faer_zero();
371
372                            for (i, lij) in zip(
373                                l.row_indices_of_col(j),
374                                slice_group(l.values_of_col(j)).into_ref_iter(),
375                            )
376                            .skip(1)
377                            {
378                                let lij = lij.read();
379                                let lij = if conj == Conj::Yes {
380                                    lij.faer_conj()
381                                } else {
382                                    lij
383                                };
384                                acc0a = acc0a.faer_add(lij.faer_mul(x.read(i, k0)));
385                                acc1a = acc1a.faer_add(lij.faer_mul(x.read(i, k1)));
386                                acc2a = acc2a.faer_add(lij.faer_mul(x.read(i, k2)));
387                                acc3a = acc3a.faer_add(lij.faer_mul(x.read(i, k3)));
388                            }
389
390                            let d = slice_group(l.values_of_col(j)).read(0).faer_inv();
391                            let d = if conj == Conj::Yes { d.faer_conj() } else { d };
392                            x.write(j, k0, x.read(j, k0).faer_sub(acc0a).faer_mul(d));
393                            x.write(j, k1, x.read(j, k1).faer_sub(acc1a).faer_mul(d));
394                            x.write(j, k2, x.read(j, k2).faer_sub(acc2a).faer_mul(d));
395                            x.write(j, k3, x.read(j, k3).faer_sub(acc3a).faer_mul(d));
396                        }
397                    }
398                    _ => unreachable!(),
399                }
400                k += bs;
401            }
402        },
403    );
404}
405
406pub fn solve_unit_lower_triangular_in_place<I: Index, E: ComplexField>(
407    l: SparseColMatRef<'_, I, E>,
408    conj: Conj,
409    rhs: MatMut<'_, E>,
410    parallelism: Parallelism,
411) {
412    let _ = parallelism;
413    assert!(l.nrows() == l.ncols());
414    assert!(rhs.nrows() == l.nrows());
415
416    let slice_group = SliceGroup::<'_, E>::new;
417
418    faer_core::constrained::Size::with2(
419        rhs.nrows(),
420        rhs.ncols(),
421        #[inline(always)]
422        |N, K| {
423            let mut x = faer_core::constrained::MatMut::new(rhs, N, K);
424            let l = faer_core::constrained::sparse::SparseColMatRef::new(l, N, N);
425
426            let mut k = 0usize;
427            while k < *K {
428                let bs = Ord::min(*K - k, 4);
429                match bs {
430                    1 => {
431                        let k0 = K.check(k);
432
433                        for j in N.indices() {
434                            let xj0 = x.read(j, k0);
435
436                            for (i, lij) in zip(
437                                l.row_indices_of_col(j),
438                                slice_group(l.values_of_col(j)).into_ref_iter(),
439                            )
440                            .skip(1)
441                            {
442                                let lij = lij.read();
443                                let lij = if conj == Conj::Yes {
444                                    lij.faer_conj()
445                                } else {
446                                    lij
447                                };
448                                x.write(i, k0, x.read(i, k0).faer_sub(lij.faer_mul(xj0)));
449                            }
450                        }
451                    }
452                    2 => {
453                        let k0 = K.check(k);
454                        let k1 = K.check(k + 1);
455                        for j in N.indices() {
456                            let xj0 = x.read(j, k0);
457                            let xj1 = x.read(j, k1);
458
459                            for (i, lij) in zip(
460                                l.row_indices_of_col(j),
461                                slice_group(l.values_of_col(j)).into_ref_iter(),
462                            )
463                            .skip(1)
464                            {
465                                let lij = lij.read();
466                                let lij = if conj == Conj::Yes {
467                                    lij.faer_conj()
468                                } else {
469                                    lij
470                                };
471                                x.write(i, k0, x.read(i, k0).faer_sub(lij.faer_mul(xj0)));
472                                x.write(i, k1, x.read(i, k1).faer_sub(lij.faer_mul(xj1)));
473                            }
474                        }
475                    }
476                    3 => {
477                        let k0 = K.check(k);
478                        let k1 = K.check(k + 1);
479                        let k2 = K.check(k + 2);
480                        for j in N.indices() {
481                            let xj0 = x.read(j, k0);
482                            let xj1 = x.read(j, k1);
483                            let xj2 = x.read(j, k2);
484
485                            for (i, lij) in zip(
486                                l.row_indices_of_col(j),
487                                slice_group(l.values_of_col(j)).into_ref_iter(),
488                            )
489                            .skip(1)
490                            {
491                                let lij = lij.read();
492                                let lij = if conj == Conj::Yes {
493                                    lij.faer_conj()
494                                } else {
495                                    lij
496                                };
497                                x.write(i, k0, x.read(i, k0).faer_sub(lij.faer_mul(xj0)));
498                                x.write(i, k1, x.read(i, k1).faer_sub(lij.faer_mul(xj1)));
499                                x.write(i, k2, x.read(i, k2).faer_sub(lij.faer_mul(xj2)));
500                            }
501                        }
502                    }
503                    4 => {
504                        let k0 = K.check(k);
505                        let k1 = K.check(k + 1);
506                        let k2 = K.check(k + 2);
507                        let k3 = K.check(k + 3);
508                        for j in N.indices() {
509                            let xj0 = x.read(j, k0);
510                            let xj1 = x.read(j, k1);
511                            let xj2 = x.read(j, k2);
512                            let xj3 = x.read(j, k3);
513
514                            for (i, lij) in zip(
515                                l.row_indices_of_col(j),
516                                slice_group(l.values_of_col(j)).into_ref_iter(),
517                            )
518                            .skip(1)
519                            {
520                                let lij = lij.read();
521                                let lij = if conj == Conj::Yes {
522                                    lij.faer_conj()
523                                } else {
524                                    lij
525                                };
526                                x.write(i, k0, x.read(i, k0).faer_sub(lij.faer_mul(xj0)));
527                                x.write(i, k1, x.read(i, k1).faer_sub(lij.faer_mul(xj1)));
528                                x.write(i, k2, x.read(i, k2).faer_sub(lij.faer_mul(xj2)));
529                                x.write(i, k3, x.read(i, k3).faer_sub(lij.faer_mul(xj3)));
530                            }
531                        }
532                    }
533                    _ => unreachable!(),
534                }
535
536                k += bs;
537            }
538        },
539    );
540}
541
542pub fn solve_unit_lower_triangular_transpose_in_place<I: Index, E: ComplexField>(
543    l: SparseColMatRef<'_, I, E>,
544    conj: Conj,
545    rhs: MatMut<'_, E>,
546    parallelism: Parallelism,
547) {
548    let _ = parallelism;
549    assert!(l.nrows() == l.ncols());
550    assert!(rhs.nrows() == l.nrows());
551
552    let slice_group = SliceGroup::<'_, E>::new;
553
554    faer_core::constrained::Size::with2(
555        rhs.nrows(),
556        rhs.ncols(),
557        #[inline(always)]
558        |N, K| {
559            let mut x = faer_core::constrained::MatMut::new(rhs, N, K);
560            let l = faer_core::constrained::sparse::SparseColMatRef::new(l, N, N);
561
562            let mut k = 0usize;
563            while k < *K {
564                let bs = Ord::min(*K - k, 4);
565                match bs {
566                    1 => {
567                        let k0 = K.check(k);
568
569                        for j in N.indices().rev() {
570                            let mut acc0a = E::faer_zero();
571                            let mut acc0b = E::faer_zero();
572                            let mut acc0c = E::faer_zero();
573                            let mut acc0d = E::faer_zero();
574
575                            let a = 0;
576                            let b = 1;
577                            let c = 2;
578                            let d = 3;
579
580                            let nrows = l.row_indices_of_col_raw(j).len();
581                            let rows_head = l.row_indices_of_col_raw(j)[1..].chunks_exact(4);
582                            let rows_tail = rows_head.remainder();
583                            let (values_head, values_tail) = slice_group(l.values_of_col(j))
584                                .subslice(1..nrows)
585                                .into_chunks_exact(4);
586
587                            for (i, lij) in zip(rows_head, values_head) {
588                                let lija = lij.read(a);
589                                let lijb = lij.read(b);
590                                let lijc = lij.read(c);
591                                let lijd = lij.read(d);
592                                let lija = if conj == Conj::Yes {
593                                    lija.faer_conj()
594                                } else {
595                                    lija
596                                };
597                                let lijb = if conj == Conj::Yes {
598                                    lijb.faer_conj()
599                                } else {
600                                    lijb
601                                };
602                                let lijc = if conj == Conj::Yes {
603                                    lijc.faer_conj()
604                                } else {
605                                    lijc
606                                };
607                                let lijd = if conj == Conj::Yes {
608                                    lijd.faer_conj()
609                                } else {
610                                    lijd
611                                };
612                                acc0a = acc0a.faer_add(lija.faer_mul(x.read(i[a].zx(), k0)));
613                                acc0b = acc0b.faer_add(lijb.faer_mul(x.read(i[b].zx(), k0)));
614                                acc0c = acc0c.faer_add(lijc.faer_mul(x.read(i[c].zx(), k0)));
615                                acc0d = acc0d.faer_add(lijd.faer_mul(x.read(i[d].zx(), k0)));
616                            }
617
618                            for (i, lij) in zip(rows_tail, values_tail.into_ref_iter()) {
619                                let lija = lij.read();
620                                let lija = if conj == Conj::Yes {
621                                    lija.faer_conj()
622                                } else {
623                                    lija
624                                };
625                                acc0a = acc0a.faer_add(lija.faer_mul(x.read(i.zx(), k0)));
626                            }
627
628                            x.write(
629                                j,
630                                k0,
631                                x.read(j, k0).faer_sub(
632                                    acc0a.faer_add(acc0b).faer_add(acc0c.faer_add(acc0d)),
633                                ),
634                            );
635                        }
636                    }
637                    2 => {
638                        let k0 = K.check(k);
639                        let k1 = K.check(k + 1);
640
641                        for j in N.indices().rev() {
642                            let mut acc0a = E::faer_zero();
643                            let mut acc0b = E::faer_zero();
644                            let mut acc1a = E::faer_zero();
645                            let mut acc1b = E::faer_zero();
646
647                            let a = 0;
648                            let b = 1;
649
650                            let nrows = l.row_indices_of_col_raw(j).len();
651                            let rows_head = l.row_indices_of_col_raw(j)[1..].chunks_exact(2);
652                            let rows_tail = rows_head.remainder();
653                            let (values_head, values_tail) = slice_group(l.values_of_col(j))
654                                .subslice(1..nrows)
655                                .into_chunks_exact(2);
656
657                            for (i, lij) in zip(rows_head, values_head) {
658                                let lija = lij.read(a);
659                                let lijb = lij.read(b);
660                                let lija = if conj == Conj::Yes {
661                                    lija.faer_conj()
662                                } else {
663                                    lija
664                                };
665                                let lijb = if conj == Conj::Yes {
666                                    lijb.faer_conj()
667                                } else {
668                                    lijb
669                                };
670                                acc0a = acc0a.faer_add(lija.faer_mul(x.read(i[a].zx(), k0)));
671                                acc0b = acc0b.faer_add(lijb.faer_mul(x.read(i[b].zx(), k0)));
672                                acc1a = acc1a.faer_add(lija.faer_mul(x.read(i[a].zx(), k1)));
673                                acc1b = acc1b.faer_add(lijb.faer_mul(x.read(i[b].zx(), k1)));
674                            }
675
676                            for (i, lij) in zip(rows_tail, values_tail.into_ref_iter()) {
677                                let lija = lij.read();
678                                let lija = if conj == Conj::Yes {
679                                    lija.faer_conj()
680                                } else {
681                                    lija
682                                };
683                                acc0a = acc0a.faer_add(lija.faer_mul(x.read(i.zx(), k0)));
684                                acc1a = acc1a.faer_add(lija.faer_mul(x.read(i.zx(), k1)));
685                            }
686
687                            x.write(j, k0, x.read(j, k0).faer_sub(acc0a.faer_add(acc0b)));
688                            x.write(j, k1, x.read(j, k1).faer_sub(acc1a.faer_add(acc1b)));
689                        }
690                    }
691                    3 => {
692                        let k0 = K.check(k);
693                        let k1 = K.check(k + 1);
694                        let k2 = K.check(k + 2);
695
696                        for j in N.indices().rev() {
697                            let mut acc0a = E::faer_zero();
698                            let mut acc1a = E::faer_zero();
699                            let mut acc2a = E::faer_zero();
700
701                            for (i, lij) in zip(
702                                l.row_indices_of_col(j),
703                                slice_group(l.values_of_col(j)).into_ref_iter(),
704                            )
705                            .skip(1)
706                            {
707                                let lij = lij.read();
708                                let lij = if conj == Conj::Yes {
709                                    lij.faer_conj()
710                                } else {
711                                    lij
712                                };
713                                acc0a = acc0a.faer_add(lij.faer_mul(x.read(i, k0)));
714                                acc1a = acc1a.faer_add(lij.faer_mul(x.read(i, k1)));
715                                acc2a = acc2a.faer_add(lij.faer_mul(x.read(i, k2)));
716                            }
717
718                            x.write(j, k0, x.read(j, k0).faer_sub(acc0a));
719                            x.write(j, k1, x.read(j, k1).faer_sub(acc1a));
720                            x.write(j, k2, x.read(j, k2).faer_sub(acc2a));
721                        }
722                    }
723                    4 => {
724                        let k0 = K.check(k);
725                        let k1 = K.check(k + 1);
726                        let k2 = K.check(k + 2);
727                        let k3 = K.check(k + 3);
728
729                        for j in N.indices().rev() {
730                            let mut acc0a = E::faer_zero();
731                            let mut acc1a = E::faer_zero();
732                            let mut acc2a = E::faer_zero();
733                            let mut acc3a = E::faer_zero();
734
735                            for (i, lij) in zip(
736                                l.row_indices_of_col(j),
737                                slice_group(l.values_of_col(j)).into_ref_iter(),
738                            )
739                            .skip(1)
740                            {
741                                let lij = lij.read();
742                                let lij = if conj == Conj::Yes {
743                                    lij.faer_conj()
744                                } else {
745                                    lij
746                                };
747                                acc0a = acc0a.faer_add(lij.faer_mul(x.read(i, k0)));
748                                acc1a = acc1a.faer_add(lij.faer_mul(x.read(i, k1)));
749                                acc2a = acc2a.faer_add(lij.faer_mul(x.read(i, k2)));
750                                acc3a = acc3a.faer_add(lij.faer_mul(x.read(i, k3)));
751                            }
752
753                            x.write(j, k0, x.read(j, k0).faer_sub(acc0a));
754                            x.write(j, k1, x.read(j, k1).faer_sub(acc1a));
755                            x.write(j, k2, x.read(j, k2).faer_sub(acc2a));
756                            x.write(j, k3, x.read(j, k3).faer_sub(acc3a));
757                        }
758                    }
759                    _ => unreachable!(),
760                }
761                k += bs;
762            }
763        },
764    );
765}
766
767pub fn solve_upper_triangular_in_place<I: Index, E: ComplexField>(
768    u: SparseColMatRef<'_, I, E>,
769    conj: Conj,
770    rhs: MatMut<'_, E>,
771    parallelism: Parallelism,
772) {
773    let _ = parallelism;
774    assert!(u.nrows() == u.ncols());
775    assert!(rhs.nrows() == u.nrows());
776
777    faer_core::constrained::Size::with2(
778        rhs.nrows(),
779        rhs.ncols(),
780        #[inline(always)]
781        |N, K| {
782            let mut x = faer_core::constrained::MatMut::new(rhs, N, K);
783            let u = faer_core::constrained::sparse::SparseColMatRef::new(u, N, N);
784
785            let mut k = 0usize;
786            while k < *K {
787                let bs = Ord::min(*K - k, 4);
788                match bs {
789                    1 => {
790                        let k0 = K.check(k);
791
792                        for j in N.indices().rev() {
793                            let ui = u.row_indices_of_col_raw(j);
794                            let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
795
796                            let u_inv = ux.read(ui.len() - 1).faer_inv();
797                            let u_inv = if conj == Conj::Yes {
798                                u_inv.faer_conj()
799                            } else {
800                                u_inv
801                            };
802                            let xj = x.read(j, k0).faer_mul(u_inv);
803                            x.write(j, k0, xj);
804
805                            for (i, u) in zip(
806                                &ui[..ui.len() - 1],
807                                ux.subslice(0..ui.len() - 1).into_ref_iter(),
808                            ) {
809                                let i = i.zx();
810                                let u = if conj == Conj::Yes {
811                                    u.read().faer_conj()
812                                } else {
813                                    u.read()
814                                };
815
816                                x.write(i, k0, x.read(i, k0).faer_sub(E::faer_mul(u, xj)));
817                            }
818                        }
819                    }
820                    2 => {
821                        let k0 = K.check(k);
822                        let k1 = K.check(k + 1);
823
824                        for j in N.indices().rev() {
825                            let ui = u.row_indices_of_col_raw(j);
826                            let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
827
828                            let u_inv = ux.read(ui.len() - 1).faer_inv();
829                            let u_inv = if conj == Conj::Yes {
830                                u_inv.faer_conj()
831                            } else {
832                                u_inv
833                            };
834                            let xj0 = x.read(j, k0).faer_mul(u_inv);
835                            let xj1 = x.read(j, k1).faer_mul(u_inv);
836                            x.write(j, k0, xj0);
837                            x.write(j, k1, xj1);
838
839                            for (i, u) in zip(
840                                &ui[..ui.len() - 1],
841                                ux.subslice(0..ui.len() - 1).into_ref_iter(),
842                            ) {
843                                let i = i.zx();
844                                let u = if conj == Conj::Yes {
845                                    u.read().faer_conj()
846                                } else {
847                                    u.read()
848                                };
849
850                                x.write(i, k0, x.read(i, k0).faer_sub(E::faer_mul(u, xj0)));
851                                x.write(i, k1, x.read(i, k1).faer_sub(E::faer_mul(u, xj1)));
852                            }
853                        }
854                    }
855                    3 => {
856                        let k0 = K.check(k);
857                        let k1 = K.check(k + 1);
858                        let k2 = K.check(k + 2);
859
860                        for j in N.indices().rev() {
861                            let ui = u.row_indices_of_col_raw(j);
862                            let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
863
864                            let u_inv = ux.read(ui.len() - 1).faer_inv();
865                            let u_inv = if conj == Conj::Yes {
866                                u_inv.faer_conj()
867                            } else {
868                                u_inv
869                            };
870                            let xj0 = x.read(j, k0).faer_mul(u_inv);
871                            let xj1 = x.read(j, k1).faer_mul(u_inv);
872                            let xj2 = x.read(j, k2).faer_mul(u_inv);
873                            x.write(j, k0, xj0);
874                            x.write(j, k1, xj1);
875                            x.write(j, k2, xj2);
876
877                            for (i, u) in zip(
878                                &ui[..ui.len() - 1],
879                                ux.subslice(0..ui.len() - 1).into_ref_iter(),
880                            ) {
881                                let i = i.zx();
882                                let u = if conj == Conj::Yes {
883                                    u.read().faer_conj()
884                                } else {
885                                    u.read()
886                                };
887
888                                x.write(i, k0, x.read(i, k0).faer_sub(E::faer_mul(u, xj0)));
889                                x.write(i, k1, x.read(i, k1).faer_sub(E::faer_mul(u, xj1)));
890                                x.write(i, k2, x.read(i, k2).faer_sub(E::faer_mul(u, xj2)));
891                            }
892                        }
893                    }
894                    4 => {
895                        let k0 = K.check(k);
896                        let k1 = K.check(k + 1);
897                        let k2 = K.check(k + 2);
898                        let k3 = K.check(k + 3);
899
900                        for j in N.indices().rev() {
901                            let ui = u.row_indices_of_col_raw(j);
902                            let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
903
904                            let u_inv = ux.read(ui.len() - 1).faer_inv();
905                            let u_inv = if conj == Conj::Yes {
906                                u_inv.faer_conj()
907                            } else {
908                                u_inv
909                            };
910                            let xj0 = x.read(j, k0).faer_mul(u_inv);
911                            let xj1 = x.read(j, k1).faer_mul(u_inv);
912                            let xj2 = x.read(j, k2).faer_mul(u_inv);
913                            let xj3 = x.read(j, k3).faer_mul(u_inv);
914                            x.write(j, k0, xj0);
915                            x.write(j, k1, xj1);
916                            x.write(j, k2, xj2);
917                            x.write(j, k3, xj3);
918
919                            for (i, u) in zip(
920                                &ui[..ui.len() - 1],
921                                ux.subslice(0..ui.len() - 1).into_ref_iter(),
922                            ) {
923                                let i = i.zx();
924                                let u = if conj == Conj::Yes {
925                                    u.read().faer_conj()
926                                } else {
927                                    u.read()
928                                };
929
930                                x.write(i, k0, x.read(i, k0).faer_sub(E::faer_mul(u, xj0)));
931                                x.write(i, k1, x.read(i, k1).faer_sub(E::faer_mul(u, xj1)));
932                                x.write(i, k2, x.read(i, k2).faer_sub(E::faer_mul(u, xj2)));
933                                x.write(i, k3, x.read(i, k3).faer_sub(E::faer_mul(u, xj3)));
934                            }
935                        }
936                    }
937                    _ => unreachable!(),
938                }
939                k += bs;
940            }
941        },
942    );
943}
944
945pub fn solve_upper_triangular_transpose_in_place<I: Index, E: ComplexField>(
946    u: SparseColMatRef<'_, I, E>,
947    conj: Conj,
948    rhs: MatMut<'_, E>,
949    parallelism: Parallelism,
950) {
951    let _ = parallelism;
952    assert!(u.nrows() == u.ncols());
953    assert!(rhs.nrows() == u.nrows());
954
955    faer_core::constrained::Size::with2(
956        rhs.nrows(),
957        rhs.ncols(),
958        #[inline(always)]
959        |N, K| {
960            let mut x = faer_core::constrained::MatMut::new(rhs, N, K);
961            let u = faer_core::constrained::sparse::SparseColMatRef::new(u, N, N);
962
963            let mut k = 0usize;
964            while k < *K {
965                let bs = Ord::min(*K - k, 4);
966                match bs {
967                    1 => {
968                        let k0 = K.check(k);
969                        for j in N.indices() {
970                            let ui = u.row_indices_of_col_raw(j);
971                            let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
972
973                            let u_inv = ux.read(ui.len() - 1).faer_inv();
974                            let u_inv = if conj == Conj::Yes {
975                                u_inv.faer_conj()
976                            } else {
977                                u_inv
978                            };
979
980                            let mut acc0a = E::faer_zero();
981                            let mut acc0b = E::faer_zero();
982                            let mut acc0c = E::faer_zero();
983                            let mut acc0d = E::faer_zero();
984
985                            let a = 0;
986                            let b = 1;
987                            let c = 2;
988                            let d = 3;
989
990                            let rows_head = ui[..ui.len() - 1].chunks_exact(4);
991                            let rows_tail = rows_head.remainder();
992                            let (values_head, values_tail) =
993                                ux.subslice(0..ui.len() - 1).into_chunks_exact(4);
994
995                            for (i, uij) in zip(rows_head, values_head) {
996                                let uija = uij.read(a);
997                                let uijb = uij.read(b);
998                                let uijc = uij.read(c);
999                                let uijd = uij.read(d);
1000                                let uija = if conj == Conj::Yes {
1001                                    uija.faer_conj()
1002                                } else {
1003                                    uija
1004                                };
1005                                let uijb = if conj == Conj::Yes {
1006                                    uijb.faer_conj()
1007                                } else {
1008                                    uijb
1009                                };
1010                                let uijc = if conj == Conj::Yes {
1011                                    uijc.faer_conj()
1012                                } else {
1013                                    uijc
1014                                };
1015                                let uijd = if conj == Conj::Yes {
1016                                    uijd.faer_conj()
1017                                } else {
1018                                    uijd
1019                                };
1020                                acc0a = acc0a.faer_add(uija.faer_mul(x.read(i[a].zx(), k0)));
1021                                acc0b = acc0b.faer_add(uijb.faer_mul(x.read(i[b].zx(), k0)));
1022                                acc0c = acc0c.faer_add(uijc.faer_mul(x.read(i[c].zx(), k0)));
1023                                acc0d = acc0d.faer_add(uijd.faer_mul(x.read(i[d].zx(), k0)));
1024                            }
1025
1026                            for (i, uij) in zip(rows_tail, values_tail.into_ref_iter()) {
1027                                let uija = uij.read();
1028                                let uija = if conj == Conj::Yes {
1029                                    uija.faer_conj()
1030                                } else {
1031                                    uija
1032                                };
1033                                acc0a = acc0a.faer_add(uija.faer_mul(x.read(i.zx(), k0)));
1034                            }
1035
1036                            x.write(
1037                                j,
1038                                k0,
1039                                x.read(j, k0)
1040                                    .faer_sub(acc0a.faer_add(acc0b).faer_add(acc0c.faer_add(acc0d)))
1041                                    .faer_mul(u_inv),
1042                            );
1043                        }
1044                    }
1045                    2 => {
1046                        let k0 = K.check(k);
1047                        let k1 = K.check(k + 1);
1048
1049                        for j in N.indices() {
1050                            let ui = u.row_indices_of_col_raw(j);
1051                            let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1052
1053                            let u_inv = ux.read(ui.len() - 1).faer_inv();
1054                            let u_inv = if conj == Conj::Yes {
1055                                u_inv.faer_conj()
1056                            } else {
1057                                u_inv
1058                            };
1059
1060                            let mut acc0a = E::faer_zero();
1061                            let mut acc0b = E::faer_zero();
1062                            let mut acc1a = E::faer_zero();
1063                            let mut acc1b = E::faer_zero();
1064
1065                            let a = 0;
1066                            let b = 1;
1067
1068                            let rows_head = ui[..ui.len() - 1].chunks_exact(2);
1069                            let rows_tail = rows_head.remainder();
1070                            let (values_head, values_tail) =
1071                                ux.subslice(0..ui.len() - 1).into_chunks_exact(2);
1072
1073                            for (i, uij) in zip(rows_head, values_head) {
1074                                let uija = uij.read(a);
1075                                let uijb = uij.read(b);
1076                                let uija = if conj == Conj::Yes {
1077                                    uija.faer_conj()
1078                                } else {
1079                                    uija
1080                                };
1081                                let uijb = if conj == Conj::Yes {
1082                                    uijb.faer_conj()
1083                                } else {
1084                                    uijb
1085                                };
1086                                acc0a = acc0a.faer_add(uija.faer_mul(x.read(i[a].zx(), k0)));
1087                                acc0b = acc0b.faer_add(uijb.faer_mul(x.read(i[b].zx(), k0)));
1088                                acc1a = acc1a.faer_add(uija.faer_mul(x.read(i[a].zx(), k1)));
1089                                acc1b = acc1b.faer_add(uijb.faer_mul(x.read(i[b].zx(), k1)));
1090                            }
1091
1092                            for (i, uij) in zip(rows_tail, values_tail.into_ref_iter()) {
1093                                let uija = uij.read();
1094                                let uija = if conj == Conj::Yes {
1095                                    uija.faer_conj()
1096                                } else {
1097                                    uija
1098                                };
1099                                acc0a = acc0a.faer_add(uija.faer_mul(x.read(i.zx(), k0)));
1100                                acc1a = acc1a.faer_add(uija.faer_mul(x.read(i.zx(), k1)));
1101                            }
1102
1103                            x.write(
1104                                j,
1105                                k0,
1106                                x.read(j, k0)
1107                                    .faer_sub(acc0a.faer_add(acc0b))
1108                                    .faer_mul(u_inv),
1109                            );
1110                            x.write(
1111                                j,
1112                                k1,
1113                                x.read(j, k1)
1114                                    .faer_sub(acc1a.faer_add(acc1b))
1115                                    .faer_mul(u_inv),
1116                            );
1117                        }
1118                    }
1119                    3 => {
1120                        let k0 = K.check(k);
1121                        let k1 = K.check(k + 1);
1122                        let k2 = K.check(k + 2);
1123
1124                        for j in N.indices() {
1125                            let ui = u.row_indices_of_col_raw(j);
1126                            let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1127
1128                            let u_inv = ux.read(ui.len() - 1).faer_inv();
1129                            let u_inv = if conj == Conj::Yes {
1130                                u_inv.faer_conj()
1131                            } else {
1132                                u_inv
1133                            };
1134
1135                            let mut acc0a = E::faer_zero();
1136                            let mut acc1a = E::faer_zero();
1137                            let mut acc2a = E::faer_zero();
1138
1139                            let rows = &ui[..ui.len() - 1];
1140                            let values = ux.subslice(0..ui.len() - 1);
1141
1142                            for (i, uij) in zip(rows, values.into_ref_iter()) {
1143                                let uija = uij.read();
1144                                let uija = if conj == Conj::Yes {
1145                                    uija.faer_conj()
1146                                } else {
1147                                    uija
1148                                };
1149                                acc0a = acc0a.faer_add(uija.faer_mul(x.read(i.zx(), k0)));
1150                                acc1a = acc1a.faer_add(uija.faer_mul(x.read(i.zx(), k1)));
1151                                acc2a = acc2a.faer_add(uija.faer_mul(x.read(i.zx(), k2)));
1152                            }
1153
1154                            x.write(j, k0, x.read(j, k0).faer_sub(acc0a).faer_mul(u_inv));
1155                            x.write(j, k1, x.read(j, k1).faer_sub(acc1a).faer_mul(u_inv));
1156                            x.write(j, k2, x.read(j, k2).faer_sub(acc2a).faer_mul(u_inv));
1157                        }
1158                    }
1159                    4 => {
1160                        let k0 = K.check(k);
1161                        let k1 = K.check(k + 1);
1162                        let k2 = K.check(k + 2);
1163                        let k3 = K.check(k + 3);
1164
1165                        for j in N.indices() {
1166                            let ui = u.row_indices_of_col_raw(j);
1167                            let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1168
1169                            let u_inv = ux.read(ui.len() - 1).faer_inv();
1170                            let u_inv = if conj == Conj::Yes {
1171                                u_inv.faer_conj()
1172                            } else {
1173                                u_inv
1174                            };
1175
1176                            let mut acc0a = E::faer_zero();
1177                            let mut acc1a = E::faer_zero();
1178                            let mut acc2a = E::faer_zero();
1179                            let mut acc3a = E::faer_zero();
1180
1181                            let rows = &ui[..ui.len() - 1];
1182                            let values = ux.subslice(0..ui.len() - 1);
1183
1184                            for (i, uij) in zip(rows, values.into_ref_iter()) {
1185                                let uija = uij.read();
1186                                let uija = if conj == Conj::Yes {
1187                                    uija.faer_conj()
1188                                } else {
1189                                    uija
1190                                };
1191                                acc0a = acc0a.faer_add(uija.faer_mul(x.read(i.zx(), k0)));
1192                                acc1a = acc1a.faer_add(uija.faer_mul(x.read(i.zx(), k1)));
1193                                acc2a = acc2a.faer_add(uija.faer_mul(x.read(i.zx(), k2)));
1194                                acc3a = acc3a.faer_add(uija.faer_mul(x.read(i.zx(), k3)));
1195                            }
1196
1197                            x.write(j, k0, x.read(j, k0).faer_sub(acc0a).faer_mul(u_inv));
1198                            x.write(j, k1, x.read(j, k1).faer_sub(acc1a).faer_mul(u_inv));
1199                            x.write(j, k2, x.read(j, k2).faer_sub(acc2a).faer_mul(u_inv));
1200                            x.write(j, k3, x.read(j, k3).faer_sub(acc3a).faer_mul(u_inv));
1201                        }
1202                    }
1203                    _ => unreachable!(),
1204                }
1205                k += bs;
1206            }
1207        },
1208    );
1209}
1210
1211pub fn solve_unit_upper_triangular_in_place<I: Index, E: ComplexField>(
1212    u: SparseColMatRef<'_, I, E>,
1213    conj: Conj,
1214    rhs: MatMut<'_, E>,
1215    parallelism: Parallelism,
1216) {
1217    let _ = parallelism;
1218    assert!(u.nrows() == u.ncols());
1219    assert!(rhs.nrows() == u.nrows());
1220
1221    faer_core::constrained::Size::with2(
1222        rhs.nrows(),
1223        rhs.ncols(),
1224        #[inline(always)]
1225        |N, K| {
1226            let mut x = faer_core::constrained::MatMut::new(rhs, N, K);
1227            let u = faer_core::constrained::sparse::SparseColMatRef::new(u, N, N);
1228
1229            let mut k = 0usize;
1230            while k < *K {
1231                let bs = Ord::min(*K - k, 4);
1232                match bs {
1233                    1 => {
1234                        let k0 = K.check(k);
1235
1236                        for j in N.indices().rev() {
1237                            let ui = u.row_indices_of_col_raw(j);
1238                            let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1239
1240                            let xj = x.read(j, k0);
1241
1242                            for (i, u) in zip(
1243                                &ui[..ui.len() - 1],
1244                                ux.subslice(0..ui.len() - 1).into_ref_iter(),
1245                            ) {
1246                                let i = i.zx();
1247                                let u = if conj == Conj::Yes {
1248                                    u.read().faer_conj()
1249                                } else {
1250                                    u.read()
1251                                };
1252
1253                                x.write(i, k0, x.read(i, k0).faer_sub(E::faer_mul(u, xj)));
1254                            }
1255                        }
1256                    }
1257                    2 => {
1258                        let k0 = K.check(k);
1259                        let k1 = K.check(k + 1);
1260
1261                        for j in N.indices().rev() {
1262                            let ui = u.row_indices_of_col_raw(j);
1263                            let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1264
1265                            let xj0 = x.read(j, k0);
1266                            let xj1 = x.read(j, k1);
1267
1268                            for (i, u) in zip(
1269                                &ui[..ui.len() - 1],
1270                                ux.subslice(0..ui.len() - 1).into_ref_iter(),
1271                            ) {
1272                                let i = i.zx();
1273                                let u = if conj == Conj::Yes {
1274                                    u.read().faer_conj()
1275                                } else {
1276                                    u.read()
1277                                };
1278
1279                                x.write(i, k0, x.read(i, k0).faer_sub(E::faer_mul(u, xj0)));
1280                                x.write(i, k1, x.read(i, k1).faer_sub(E::faer_mul(u, xj1)));
1281                            }
1282                        }
1283                    }
1284                    3 => {
1285                        let k0 = K.check(k);
1286                        let k1 = K.check(k + 1);
1287                        let k2 = K.check(k + 2);
1288
1289                        for j in N.indices().rev() {
1290                            let ui = u.row_indices_of_col_raw(j);
1291                            let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1292
1293                            let xj0 = x.read(j, k0);
1294                            let xj1 = x.read(j, k1);
1295                            let xj2 = x.read(j, k2);
1296
1297                            for (i, u) in zip(
1298                                &ui[..ui.len() - 1],
1299                                ux.subslice(0..ui.len() - 1).into_ref_iter(),
1300                            ) {
1301                                let i = i.zx();
1302                                let u = if conj == Conj::Yes {
1303                                    u.read().faer_conj()
1304                                } else {
1305                                    u.read()
1306                                };
1307
1308                                x.write(i, k0, x.read(i, k0).faer_sub(E::faer_mul(u, xj0)));
1309                                x.write(i, k1, x.read(i, k1).faer_sub(E::faer_mul(u, xj1)));
1310                                x.write(i, k2, x.read(i, k2).faer_sub(E::faer_mul(u, xj2)));
1311                            }
1312                        }
1313                    }
1314                    4 => {
1315                        let k0 = K.check(k);
1316                        let k1 = K.check(k + 1);
1317                        let k2 = K.check(k + 2);
1318                        let k3 = K.check(k + 3);
1319
1320                        for j in N.indices().rev() {
1321                            let ui = u.row_indices_of_col_raw(j);
1322                            let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1323
1324                            let xj0 = x.read(j, k0);
1325                            let xj1 = x.read(j, k1);
1326                            let xj2 = x.read(j, k2);
1327                            let xj3 = x.read(j, k3);
1328
1329                            for (i, u) in zip(
1330                                &ui[..ui.len() - 1],
1331                                ux.subslice(0..ui.len() - 1).into_ref_iter(),
1332                            ) {
1333                                let i = i.zx();
1334                                let u = if conj == Conj::Yes {
1335                                    u.read().faer_conj()
1336                                } else {
1337                                    u.read()
1338                                };
1339
1340                                x.write(i, k0, x.read(i, k0).faer_sub(E::faer_mul(u, xj0)));
1341                                x.write(i, k1, x.read(i, k1).faer_sub(E::faer_mul(u, xj1)));
1342                                x.write(i, k2, x.read(i, k2).faer_sub(E::faer_mul(u, xj2)));
1343                                x.write(i, k3, x.read(i, k3).faer_sub(E::faer_mul(u, xj3)));
1344                            }
1345                        }
1346                    }
1347                    _ => unreachable!(),
1348                }
1349                k += bs;
1350            }
1351        },
1352    );
1353}
1354
1355pub fn solve_unit_upper_triangular_transpose_in_place<I: Index, E: ComplexField>(
1356    u: SparseColMatRef<'_, I, E>,
1357    conj: Conj,
1358    rhs: MatMut<'_, E>,
1359    parallelism: Parallelism,
1360) {
1361    let _ = parallelism;
1362    assert!(u.nrows() == u.ncols());
1363    assert!(rhs.nrows() == u.nrows());
1364
1365    faer_core::constrained::Size::with2(
1366        rhs.nrows(),
1367        rhs.ncols(),
1368        #[inline(always)]
1369        |N, K| {
1370            let mut x = faer_core::constrained::MatMut::new(rhs, N, K);
1371            let u = faer_core::constrained::sparse::SparseColMatRef::new(u, N, N);
1372
1373            let mut k = 0usize;
1374            while k < *K {
1375                let bs = Ord::min(*K - k, 4);
1376                match bs {
1377                    1 => {
1378                        let k0 = K.check(k);
1379                        for j in N.indices() {
1380                            let ui = u.row_indices_of_col_raw(j);
1381                            let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1382
1383                            let mut acc0a = E::faer_zero();
1384                            let mut acc0b = E::faer_zero();
1385                            let mut acc0c = E::faer_zero();
1386                            let mut acc0d = E::faer_zero();
1387
1388                            let a = 0;
1389                            let b = 1;
1390                            let c = 2;
1391                            let d = 3;
1392
1393                            let rows_head = ui[..ui.len() - 1].chunks_exact(4);
1394                            let rows_tail = rows_head.remainder();
1395                            let (values_head, values_tail) =
1396                                ux.subslice(0..ui.len() - 1).into_chunks_exact(4);
1397
1398                            for (i, uij) in zip(rows_head, values_head) {
1399                                let uija = uij.read(a);
1400                                let uijb = uij.read(b);
1401                                let uijc = uij.read(c);
1402                                let uijd = uij.read(d);
1403                                let uija = if conj == Conj::Yes {
1404                                    uija.faer_conj()
1405                                } else {
1406                                    uija
1407                                };
1408                                let uijb = if conj == Conj::Yes {
1409                                    uijb.faer_conj()
1410                                } else {
1411                                    uijb
1412                                };
1413                                let uijc = if conj == Conj::Yes {
1414                                    uijc.faer_conj()
1415                                } else {
1416                                    uijc
1417                                };
1418                                let uijd = if conj == Conj::Yes {
1419                                    uijd.faer_conj()
1420                                } else {
1421                                    uijd
1422                                };
1423                                acc0a = acc0a.faer_add(uija.faer_mul(x.read(i[a].zx(), k0)));
1424                                acc0b = acc0b.faer_add(uijb.faer_mul(x.read(i[b].zx(), k0)));
1425                                acc0c = acc0c.faer_add(uijc.faer_mul(x.read(i[c].zx(), k0)));
1426                                acc0d = acc0d.faer_add(uijd.faer_mul(x.read(i[d].zx(), k0)));
1427                            }
1428
1429                            for (i, uij) in zip(rows_tail, values_tail.into_ref_iter()) {
1430                                let uija = uij.read();
1431                                let uija = if conj == Conj::Yes {
1432                                    uija.faer_conj()
1433                                } else {
1434                                    uija
1435                                };
1436                                acc0a = acc0a.faer_add(uija.faer_mul(x.read(i.zx(), k0)));
1437                            }
1438
1439                            x.write(
1440                                j,
1441                                k0,
1442                                x.read(j, k0).faer_sub(
1443                                    acc0a.faer_add(acc0b).faer_add(acc0c.faer_add(acc0d)),
1444                                ),
1445                            );
1446                        }
1447                    }
1448                    2 => {
1449                        let k0 = K.check(k);
1450                        let k1 = K.check(k + 1);
1451
1452                        for j in N.indices() {
1453                            let ui = u.row_indices_of_col_raw(j);
1454                            let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1455
1456                            let mut acc0a = E::faer_zero();
1457                            let mut acc0b = E::faer_zero();
1458                            let mut acc1a = E::faer_zero();
1459                            let mut acc1b = E::faer_zero();
1460
1461                            let a = 0;
1462                            let b = 1;
1463
1464                            let rows_head = ui[..ui.len() - 1].chunks_exact(4);
1465                            let rows_tail = rows_head.remainder();
1466                            let (values_head, values_tail) =
1467                                ux.subslice(0..ui.len() - 1).into_chunks_exact(4);
1468
1469                            for (i, uij) in zip(rows_head, values_head) {
1470                                let uija = uij.read(a);
1471                                let uijb = uij.read(b);
1472                                let uija = if conj == Conj::Yes {
1473                                    uija.faer_conj()
1474                                } else {
1475                                    uija
1476                                };
1477                                let uijb = if conj == Conj::Yes {
1478                                    uijb.faer_conj()
1479                                } else {
1480                                    uijb
1481                                };
1482                                acc0a = acc0a.faer_add(uija.faer_mul(x.read(i[a].zx(), k0)));
1483                                acc0b = acc0b.faer_add(uijb.faer_mul(x.read(i[b].zx(), k0)));
1484                                acc1a = acc1a.faer_add(uija.faer_mul(x.read(i[a].zx(), k1)));
1485                                acc1b = acc1b.faer_add(uijb.faer_mul(x.read(i[b].zx(), k1)));
1486                            }
1487
1488                            for (i, uij) in zip(rows_tail, values_tail.into_ref_iter()) {
1489                                let uija = uij.read();
1490                                let uija = if conj == Conj::Yes {
1491                                    uija.faer_conj()
1492                                } else {
1493                                    uija
1494                                };
1495                                acc0a = acc0a.faer_add(uija.faer_mul(x.read(i.zx(), k0)));
1496                                acc1a = acc1a.faer_add(uija.faer_mul(x.read(i.zx(), k1)));
1497                            }
1498
1499                            x.write(j, k0, x.read(j, k0).faer_sub(acc0a.faer_add(acc0b)));
1500                            x.write(j, k1, x.read(j, k1).faer_sub(acc1a.faer_add(acc1b)));
1501                        }
1502                    }
1503                    3 => {
1504                        let k0 = K.check(k);
1505                        let k1 = K.check(k + 1);
1506                        let k2 = K.check(k + 2);
1507
1508                        for j in N.indices() {
1509                            let ui = u.row_indices_of_col_raw(j);
1510                            let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1511
1512                            let mut acc0a = E::faer_zero();
1513                            let mut acc1a = E::faer_zero();
1514                            let mut acc2a = E::faer_zero();
1515
1516                            let a = 0;
1517
1518                            let rows_head = ui[..ui.len() - 1].chunks_exact(4);
1519                            let rows_tail = rows_head.remainder();
1520                            let (values_head, values_tail) =
1521                                ux.subslice(0..ui.len() - 1).into_chunks_exact(4);
1522
1523                            for (i, uij) in zip(rows_head, values_head) {
1524                                let uija = uij.read(a);
1525                                let uija = if conj == Conj::Yes {
1526                                    uija.faer_conj()
1527                                } else {
1528                                    uija
1529                                };
1530                                acc0a = acc0a.faer_add(uija.faer_mul(x.read(i[a].zx(), k0)));
1531                                acc1a = acc1a.faer_add(uija.faer_mul(x.read(i[a].zx(), k1)));
1532                                acc2a = acc2a.faer_add(uija.faer_mul(x.read(i[a].zx(), k2)));
1533                            }
1534
1535                            for (i, uij) in zip(rows_tail, values_tail.into_ref_iter()) {
1536                                let uija = uij.read();
1537                                let uija = if conj == Conj::Yes {
1538                                    uija.faer_conj()
1539                                } else {
1540                                    uija
1541                                };
1542                                acc0a = acc0a.faer_add(uija.faer_mul(x.read(i.zx(), k0)));
1543                                acc1a = acc1a.faer_add(uija.faer_mul(x.read(i.zx(), k1)));
1544                                acc2a = acc2a.faer_add(uija.faer_mul(x.read(i.zx(), k2)));
1545                            }
1546
1547                            x.write(j, k0, x.read(j, k0).faer_sub(acc0a));
1548                            x.write(j, k1, x.read(j, k1).faer_sub(acc1a));
1549                            x.write(j, k2, x.read(j, k2).faer_sub(acc2a));
1550                        }
1551                    }
1552                    4 => {
1553                        let k0 = K.check(k);
1554                        let k1 = K.check(k + 1);
1555                        let k2 = K.check(k + 2);
1556                        let k3 = K.check(k + 3);
1557
1558                        for j in N.indices() {
1559                            let ui = u.row_indices_of_col_raw(j);
1560                            let ux = SliceGroup::<'_, E>::new(u.values_of_col(j));
1561
1562                            let mut acc0a = E::faer_zero();
1563                            let mut acc1a = E::faer_zero();
1564                            let mut acc2a = E::faer_zero();
1565                            let mut acc3a = E::faer_zero();
1566
1567                            let a = 0;
1568
1569                            let rows_head = ui[..ui.len() - 1].chunks_exact(4);
1570                            let rows_tail = rows_head.remainder();
1571                            let (values_head, values_tail) =
1572                                ux.subslice(0..ui.len() - 1).into_chunks_exact(4);
1573
1574                            for (i, uij) in zip(rows_head, values_head) {
1575                                let uija = uij.read(a);
1576                                let uija = if conj == Conj::Yes {
1577                                    uija.faer_conj()
1578                                } else {
1579                                    uija
1580                                };
1581                                acc0a = acc0a.faer_add(uija.faer_mul(x.read(i[a].zx(), k0)));
1582                                acc1a = acc1a.faer_add(uija.faer_mul(x.read(i[a].zx(), k1)));
1583                                acc2a = acc2a.faer_add(uija.faer_mul(x.read(i[a].zx(), k2)));
1584                                acc3a = acc3a.faer_add(uija.faer_mul(x.read(i[a].zx(), k3)));
1585                            }
1586
1587                            for (i, uij) in zip(rows_tail, values_tail.into_ref_iter()) {
1588                                let uija = uij.read();
1589                                let uija = if conj == Conj::Yes {
1590                                    uija.faer_conj()
1591                                } else {
1592                                    uija
1593                                };
1594                                acc0a = acc0a.faer_add(uija.faer_mul(x.read(i.zx(), k0)));
1595                                acc1a = acc1a.faer_add(uija.faer_mul(x.read(i.zx(), k1)));
1596                                acc2a = acc2a.faer_add(uija.faer_mul(x.read(i.zx(), k2)));
1597                                acc3a = acc3a.faer_add(uija.faer_mul(x.read(i.zx(), k3)));
1598                            }
1599
1600                            x.write(j, k0, x.read(j, k0).faer_sub(acc0a));
1601                            x.write(j, k1, x.read(j, k1).faer_sub(acc1a));
1602                            x.write(j, k2, x.read(j, k2).faer_sub(acc2a));
1603                            x.write(j, k3, x.read(j, k3).faer_sub(acc3a));
1604                        }
1605                    }
1606                    _ => unreachable!(),
1607                }
1608                k += bs;
1609            }
1610        },
1611    );
1612}