Skip to main content

neco_stencil/
lib.rs

1//! Finite-difference stencil operators on uniform 2D grids.
2use core::fmt;
3
4#[cfg(feature = "rayon")]
5const RAYON_THRESHOLD: usize = 10_000;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum StencilError {
9    GridTooLarge {
10        nx: usize,
11        ny: usize,
12    },
13    InvalidLength {
14        name: &'static str,
15        expected: usize,
16        actual: usize,
17    },
18}
19
20impl fmt::Display for StencilError {
21    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
22        match self {
23            Self::GridTooLarge { nx, ny } => {
24                write!(f, "grid dimensions nx={nx}, ny={ny} are too large")
25            }
26            Self::InvalidLength {
27                name,
28                expected,
29                actual,
30            } => write!(f, "{name} must have length {expected}, got {actual}"),
31        }
32    }
33}
34
35impl std::error::Error for StencilError {}
36
37#[inline]
38fn idx(ny: usize, i: usize, j: usize) -> usize {
39    i * ny + j
40}
41
42#[inline]
43fn grid_len(nx: usize, ny: usize) -> Result<usize, StencilError> {
44    nx.checked_mul(ny)
45        .ok_or(StencilError::GridTooLarge { nx, ny })
46}
47
48fn assert_len(name: &'static str, data: &[f64], nx: usize, ny: usize) -> Result<(), StencilError> {
49    let expected = grid_len(nx, ny)?;
50    if data.len() != expected {
51        return Err(StencilError::InvalidLength {
52            name,
53            expected,
54            actual: data.len(),
55        });
56    }
57    Ok(())
58}
59
60fn assert_len_mut(
61    name: &'static str,
62    data: &mut [f64],
63    nx: usize,
64    ny: usize,
65) -> Result<(), StencilError> {
66    let expected = grid_len(nx, ny)?;
67    if data.len() != expected {
68        return Err(StencilError::InvalidLength {
69            name,
70            expected,
71            actual: data.len(),
72        });
73    }
74    Ok(())
75}
76
77#[inline]
78pub fn laplacian(
79    u: &[f64],
80    nx: usize,
81    ny: usize,
82    dx: f64,
83    out: &mut [f64],
84) -> Result<(), StencilError> {
85    assert_len("u", u, nx, ny)?;
86    assert_len_mut("out", out, nx, ny)?;
87    out.fill(0.0);
88    if nx < 3 || ny < 3 {
89        return Ok(());
90    }
91    let inv_dx2 = 1.0 / (dx * dx);
92    #[cfg(feature = "rayon")]
93    if nx * ny >= RAYON_THRESHOLD {
94        use rayon::prelude::*;
95        let u_base = u.as_ptr() as usize;
96        let out_base = out.as_mut_ptr() as usize;
97        (1..nx - 1).into_par_iter().for_each(|i| {
98            let u_ptr = u_base as *const f64;
99            let out_ptr = out_base as *mut f64;
100            for j in 1..ny - 1 {
101                unsafe {
102                    let center = idx(ny, i, j);
103                    *out_ptr.add(center) = (*u_ptr.add(idx(ny, i + 1, j))
104                        + *u_ptr.add(idx(ny, i - 1, j))
105                        + *u_ptr.add(idx(ny, i, j + 1))
106                        + *u_ptr.add(idx(ny, i, j - 1))
107                        - 4.0 * *u_ptr.add(center))
108                        * inv_dx2;
109                }
110            }
111        });
112        return Ok(());
113    }
114    for i in 1..nx - 1 {
115        for j in 1..ny - 1 {
116            let center = idx(ny, i, j);
117            out[center] = (u[idx(ny, i + 1, j)]
118                + u[idx(ny, i - 1, j)]
119                + u[idx(ny, i, j + 1)]
120                + u[idx(ny, i, j - 1)]
121                - 4.0 * u[center])
122                * inv_dx2;
123        }
124    }
125    Ok(())
126}
127
128#[inline]
129pub fn gradient_x(
130    u: &[f64],
131    nx: usize,
132    ny: usize,
133    dx: f64,
134    out: &mut [f64],
135) -> Result<(), StencilError> {
136    assert_len("u", u, nx, ny)?;
137    assert_len_mut("out", out, nx, ny)?;
138    out.fill(0.0);
139    if nx < 3 {
140        return Ok(());
141    }
142    let inv_2dx = 1.0 / (2.0 * dx);
143    for i in 1..nx - 1 {
144        for j in 0..ny {
145            out[idx(ny, i, j)] = (u[idx(ny, i + 1, j)] - u[idx(ny, i - 1, j)]) * inv_2dx;
146        }
147    }
148    Ok(())
149}
150
151#[inline]
152pub fn gradient_y(
153    u: &[f64],
154    nx: usize,
155    ny: usize,
156    dx: f64,
157    out: &mut [f64],
158) -> Result<(), StencilError> {
159    assert_len("u", u, nx, ny)?;
160    assert_len_mut("out", out, nx, ny)?;
161    out.fill(0.0);
162    if ny < 3 {
163        return Ok(());
164    }
165    let inv_2dx = 1.0 / (2.0 * dx);
166    for i in 0..nx {
167        for j in 1..ny - 1 {
168            out[idx(ny, i, j)] = (u[idx(ny, i, j + 1)] - u[idx(ny, i, j - 1)]) * inv_2dx;
169        }
170    }
171    Ok(())
172}
173
174#[inline]
175pub fn d2_dx2(
176    u: &[f64],
177    nx: usize,
178    ny: usize,
179    dx: f64,
180    out: &mut [f64],
181) -> Result<(), StencilError> {
182    assert_len("u", u, nx, ny)?;
183    assert_len_mut("out", out, nx, ny)?;
184    out.fill(0.0);
185    if nx < 3 {
186        return Ok(());
187    }
188    let inv_dx2 = 1.0 / (dx * dx);
189    for i in 1..nx - 1 {
190        for j in 0..ny {
191            out[idx(ny, i, j)] =
192                (u[idx(ny, i + 1, j)] - 2.0 * u[idx(ny, i, j)] + u[idx(ny, i - 1, j)]) * inv_dx2;
193        }
194    }
195    Ok(())
196}
197
198#[inline]
199pub fn d2_dy2(
200    u: &[f64],
201    nx: usize,
202    ny: usize,
203    dx: f64,
204    out: &mut [f64],
205) -> Result<(), StencilError> {
206    assert_len("u", u, nx, ny)?;
207    assert_len_mut("out", out, nx, ny)?;
208    out.fill(0.0);
209    if ny < 3 {
210        return Ok(());
211    }
212    let inv_dx2 = 1.0 / (dx * dx);
213    for i in 0..nx {
214        for j in 1..ny - 1 {
215            out[idx(ny, i, j)] =
216                (u[idx(ny, i, j + 1)] - 2.0 * u[idx(ny, i, j)] + u[idx(ny, i, j - 1)]) * inv_dx2;
217        }
218    }
219    Ok(())
220}
221
222#[inline]
223pub fn d2_dxdy(
224    u: &[f64],
225    nx: usize,
226    ny: usize,
227    dx: f64,
228    out: &mut [f64],
229) -> Result<(), StencilError> {
230    assert_len("u", u, nx, ny)?;
231    assert_len_mut("out", out, nx, ny)?;
232    out.fill(0.0);
233    if nx < 3 || ny < 3 {
234        return Ok(());
235    }
236    let inv_4dx2 = 1.0 / (4.0 * dx * dx);
237    for i in 1..nx - 1 {
238        for j in 1..ny - 1 {
239            out[idx(ny, i, j)] =
240                (u[idx(ny, i + 1, j + 1)] - u[idx(ny, i + 1, j - 1)] - u[idx(ny, i - 1, j + 1)]
241                    + u[idx(ny, i - 1, j - 1)])
242                    * inv_4dx2;
243        }
244    }
245    Ok(())
246}
247
248#[allow(clippy::too_many_arguments)]
249pub fn w_derivatives(
250    w: &[f64],
251    nx: usize,
252    ny: usize,
253    dx: f64,
254    wx: &mut [f64],
255    wy: &mut [f64],
256    wxx: &mut [f64],
257    wyy: &mut [f64],
258    wxy: &mut [f64],
259) -> Result<(), StencilError> {
260    assert_len("w", w, nx, ny)?;
261    assert_len_mut("wx", wx, nx, ny)?;
262    assert_len_mut("wy", wy, nx, ny)?;
263    assert_len_mut("wxx", wxx, nx, ny)?;
264    assert_len_mut("wyy", wyy, nx, ny)?;
265    assert_len_mut("wxy", wxy, nx, ny)?;
266    wx.fill(0.0);
267    wy.fill(0.0);
268    wxx.fill(0.0);
269    wyy.fill(0.0);
270    wxy.fill(0.0);
271    if nx < 3 || ny < 3 {
272        return Ok(());
273    }
274    let inv_2dx = 1.0 / (2.0 * dx);
275    let inv_dx2 = 1.0 / (dx * dx);
276    let inv_4dx2 = 1.0 / (4.0 * dx * dx);
277    #[cfg(feature = "rayon")]
278    if nx * ny >= RAYON_THRESHOLD {
279        use rayon::prelude::*;
280        let w_base = w.as_ptr() as usize;
281        let wx_base = wx.as_mut_ptr() as usize;
282        let wy_base = wy.as_mut_ptr() as usize;
283        let wxx_base = wxx.as_mut_ptr() as usize;
284        let wyy_base = wyy.as_mut_ptr() as usize;
285        let wxy_base = wxy.as_mut_ptr() as usize;
286        (1..nx - 1).into_par_iter().for_each(|i| {
287            let w_ptr = w_base as *const f64;
288            let wx_ptr = wx_base as *mut f64;
289            let wy_ptr = wy_base as *mut f64;
290            let wxx_ptr = wxx_base as *mut f64;
291            let wyy_ptr = wyy_base as *mut f64;
292            let wxy_ptr = wxy_base as *mut f64;
293            for j in 1..ny - 1 {
294                unsafe {
295                    let center = idx(ny, i, j);
296                    let wc = *w_ptr.add(center);
297                    let wp = *w_ptr.add(idx(ny, i + 1, j));
298                    let wm = *w_ptr.add(idx(ny, i - 1, j));
299                    let wn = *w_ptr.add(idx(ny, i, j + 1));
300                    let ws = *w_ptr.add(idx(ny, i, j - 1));
301                    *wx_ptr.add(center) = (wp - wm) * inv_2dx;
302                    *wy_ptr.add(center) = (wn - ws) * inv_2dx;
303                    *wxx_ptr.add(center) = (wp - 2.0 * wc + wm) * inv_dx2;
304                    *wyy_ptr.add(center) = (wn - 2.0 * wc + ws) * inv_dx2;
305                    *wxy_ptr.add(center) = (*w_ptr.add(idx(ny, i + 1, j + 1))
306                        - *w_ptr.add(idx(ny, i + 1, j - 1))
307                        - *w_ptr.add(idx(ny, i - 1, j + 1))
308                        + *w_ptr.add(idx(ny, i - 1, j - 1)))
309                        * inv_4dx2;
310                }
311            }
312        });
313        return Ok(());
314    }
315    for i in 1..nx - 1 {
316        for j in 1..ny - 1 {
317            let center = idx(ny, i, j);
318            let wc = w[center];
319            let wp = w[idx(ny, i + 1, j)];
320            let wm = w[idx(ny, i - 1, j)];
321            let wn = w[idx(ny, i, j + 1)];
322            let ws = w[idx(ny, i, j - 1)];
323            wx[center] = (wp - wm) * inv_2dx;
324            wy[center] = (wn - ws) * inv_2dx;
325            wxx[center] = (wp - 2.0 * wc + wm) * inv_dx2;
326            wyy[center] = (wn - 2.0 * wc + ws) * inv_dx2;
327            wxy[center] =
328                (w[idx(ny, i + 1, j + 1)] - w[idx(ny, i + 1, j - 1)] - w[idx(ny, i - 1, j + 1)]
329                    + w[idx(ny, i - 1, j - 1)])
330                    * inv_4dx2;
331        }
332    }
333    Ok(())
334}
335
336#[allow(clippy::too_many_arguments)]
337pub fn uv_gradients(
338    u: &[f64],
339    v: &[f64],
340    nx: usize,
341    ny: usize,
342    dx: f64,
343    ux: &mut [f64],
344    uy: &mut [f64],
345    vx: &mut [f64],
346    vy: &mut [f64],
347) -> Result<(), StencilError> {
348    assert_len("u", u, nx, ny)?;
349    assert_len("v", v, nx, ny)?;
350    assert_len_mut("ux", ux, nx, ny)?;
351    assert_len_mut("uy", uy, nx, ny)?;
352    assert_len_mut("vx", vx, nx, ny)?;
353    assert_len_mut("vy", vy, nx, ny)?;
354    ux.fill(0.0);
355    uy.fill(0.0);
356    vx.fill(0.0);
357    vy.fill(0.0);
358    if nx < 3 || ny < 3 {
359        return Ok(());
360    }
361    let inv_2dx = 1.0 / (2.0 * dx);
362    #[cfg(feature = "rayon")]
363    if nx * ny >= RAYON_THRESHOLD {
364        use rayon::prelude::*;
365        let u_base = u.as_ptr() as usize;
366        let v_base = v.as_ptr() as usize;
367        let ux_base = ux.as_mut_ptr() as usize;
368        let uy_base = uy.as_mut_ptr() as usize;
369        let vx_base = vx.as_mut_ptr() as usize;
370        let vy_base = vy.as_mut_ptr() as usize;
371        (1..nx - 1).into_par_iter().for_each(|i| {
372            let u_ptr = u_base as *const f64;
373            let v_ptr = v_base as *const f64;
374            let ux_ptr = ux_base as *mut f64;
375            let uy_ptr = uy_base as *mut f64;
376            let vx_ptr = vx_base as *mut f64;
377            let vy_ptr = vy_base as *mut f64;
378            for j in 1..ny - 1 {
379                unsafe {
380                    let center = idx(ny, i, j);
381                    *ux_ptr.add(center) =
382                        (*u_ptr.add(idx(ny, i + 1, j)) - *u_ptr.add(idx(ny, i - 1, j))) * inv_2dx;
383                    *uy_ptr.add(center) =
384                        (*u_ptr.add(idx(ny, i, j + 1)) - *u_ptr.add(idx(ny, i, j - 1))) * inv_2dx;
385                    *vx_ptr.add(center) =
386                        (*v_ptr.add(idx(ny, i + 1, j)) - *v_ptr.add(idx(ny, i - 1, j))) * inv_2dx;
387                    *vy_ptr.add(center) =
388                        (*v_ptr.add(idx(ny, i, j + 1)) - *v_ptr.add(idx(ny, i, j - 1))) * inv_2dx;
389                }
390            }
391        });
392        return Ok(());
393    }
394    for i in 1..nx - 1 {
395        for j in 1..ny - 1 {
396            let center = idx(ny, i, j);
397            ux[center] = (u[idx(ny, i + 1, j)] - u[idx(ny, i - 1, j)]) * inv_2dx;
398            uy[center] = (u[idx(ny, i, j + 1)] - u[idx(ny, i, j - 1)]) * inv_2dx;
399            vx[center] = (v[idx(ny, i + 1, j)] - v[idx(ny, i - 1, j)]) * inv_2dx;
400            vy[center] = (v[idx(ny, i, j + 1)] - v[idx(ny, i, j - 1)]) * inv_2dx;
401        }
402    }
403    Ok(())
404}
405
406#[allow(clippy::too_many_arguments)]
407pub fn biharmonic_pass1_fused(
408    w: &[f64],
409    d_grid: &[f64],
410    nx: usize,
411    ny: usize,
412    dx: f64,
413    d_lap: &mut [f64],
414    wx: &mut [f64],
415    wy: &mut [f64],
416    wxx: &mut [f64],
417    wyy: &mut [f64],
418    wxy: &mut [f64],
419) -> Result<(), StencilError> {
420    assert_len("w", w, nx, ny)?;
421    assert_len("d_grid", d_grid, nx, ny)?;
422    assert_len_mut("d_lap", d_lap, nx, ny)?;
423    assert_len_mut("wx", wx, nx, ny)?;
424    assert_len_mut("wy", wy, nx, ny)?;
425    assert_len_mut("wxx", wxx, nx, ny)?;
426    assert_len_mut("wyy", wyy, nx, ny)?;
427    assert_len_mut("wxy", wxy, nx, ny)?;
428    d_lap.fill(0.0);
429    wx.fill(0.0);
430    wy.fill(0.0);
431    wxx.fill(0.0);
432    wyy.fill(0.0);
433    wxy.fill(0.0);
434    if nx < 3 || ny < 3 {
435        return Ok(());
436    }
437    let inv_2dx = 1.0 / (2.0 * dx);
438    let inv_dx2 = 1.0 / (dx * dx);
439    let inv_4dx2 = 1.0 / (4.0 * dx * dx);
440    #[cfg(feature = "rayon")]
441    if nx * ny >= RAYON_THRESHOLD {
442        use rayon::prelude::*;
443        let w_base = w.as_ptr() as usize;
444        let d_base = d_grid.as_ptr() as usize;
445        let d_lap_base = d_lap.as_mut_ptr() as usize;
446        let wx_base = wx.as_mut_ptr() as usize;
447        let wy_base = wy.as_mut_ptr() as usize;
448        let wxx_base = wxx.as_mut_ptr() as usize;
449        let wyy_base = wyy.as_mut_ptr() as usize;
450        let wxy_base = wxy.as_mut_ptr() as usize;
451        (1..nx - 1).into_par_iter().for_each(|i| {
452            let w_ptr = w_base as *const f64;
453            let d_ptr = d_base as *const f64;
454            let d_lap_ptr = d_lap_base as *mut f64;
455            let wx_ptr = wx_base as *mut f64;
456            let wy_ptr = wy_base as *mut f64;
457            let wxx_ptr = wxx_base as *mut f64;
458            let wyy_ptr = wyy_base as *mut f64;
459            let wxy_ptr = wxy_base as *mut f64;
460            for j in 1..ny - 1 {
461                unsafe {
462                    let center = idx(ny, i, j);
463                    let wc = *w_ptr.add(center);
464                    let wp = *w_ptr.add(idx(ny, i + 1, j));
465                    let wm = *w_ptr.add(idx(ny, i - 1, j));
466                    let wn = *w_ptr.add(idx(ny, i, j + 1));
467                    let ws = *w_ptr.add(idx(ny, i, j - 1));
468                    let xx = (wp - 2.0 * wc + wm) * inv_dx2;
469                    let yy = (wn - 2.0 * wc + ws) * inv_dx2;
470                    *wxx_ptr.add(center) = xx;
471                    *wyy_ptr.add(center) = yy;
472                    *wx_ptr.add(center) = (wp - wm) * inv_2dx;
473                    *wy_ptr.add(center) = (wn - ws) * inv_2dx;
474                    *wxy_ptr.add(center) = (*w_ptr.add(idx(ny, i + 1, j + 1))
475                        - *w_ptr.add(idx(ny, i + 1, j - 1))
476                        - *w_ptr.add(idx(ny, i - 1, j + 1))
477                        + *w_ptr.add(idx(ny, i - 1, j - 1)))
478                        * inv_4dx2;
479                    *d_lap_ptr.add(center) = *d_ptr.add(center) * (xx + yy);
480                }
481            }
482        });
483        return Ok(());
484    }
485    for i in 1..nx - 1 {
486        for j in 1..ny - 1 {
487            let center = idx(ny, i, j);
488            let wc = w[center];
489            let wp = w[idx(ny, i + 1, j)];
490            let wm = w[idx(ny, i - 1, j)];
491            let wn = w[idx(ny, i, j + 1)];
492            let ws = w[idx(ny, i, j - 1)];
493            let xx = (wp - 2.0 * wc + wm) * inv_dx2;
494            let yy = (wn - 2.0 * wc + ws) * inv_dx2;
495            wxx[center] = xx;
496            wyy[center] = yy;
497            wx[center] = (wp - wm) * inv_2dx;
498            wy[center] = (wn - ws) * inv_2dx;
499            wxy[center] =
500                (w[idx(ny, i + 1, j + 1)] - w[idx(ny, i + 1, j - 1)] - w[idx(ny, i - 1, j + 1)]
501                    + w[idx(ny, i - 1, j - 1)])
502                    * inv_4dx2;
503            d_lap[center] = d_grid[center] * (xx + yy);
504        }
505    }
506    Ok(())
507}
508
509#[allow(clippy::too_many_arguments)]
510pub fn biharmonic(
511    w: &[f64],
512    d_grid: &[f64],
513    nx: usize,
514    ny: usize,
515    dx: f64,
516    lap: &mut [f64],
517    d_lap: &mut [f64],
518    bilap: &mut [f64],
519) -> Result<(), StencilError> {
520    assert_len("w", w, nx, ny)?;
521    assert_len("d_grid", d_grid, nx, ny)?;
522    assert_len_mut("lap", lap, nx, ny)?;
523    assert_len_mut("d_lap", d_lap, nx, ny)?;
524    assert_len_mut("bilap", bilap, nx, ny)?;
525    lap.fill(0.0);
526    d_lap.fill(0.0);
527    bilap.fill(0.0);
528    if nx < 3 || ny < 3 {
529        return Ok(());
530    }
531    let inv_dx2 = 1.0 / (dx * dx);
532    #[cfg(feature = "rayon")]
533    let use_par = nx * ny >= RAYON_THRESHOLD;
534    #[cfg(not(feature = "rayon"))]
535    let use_par = false;
536    if use_par {
537        #[cfg(feature = "rayon")]
538        {
539            use rayon::prelude::*;
540            let w_base = w.as_ptr() as usize;
541            let d_base = d_grid.as_ptr() as usize;
542            let lap_base = lap.as_mut_ptr() as usize;
543            let d_lap_base = d_lap.as_mut_ptr() as usize;
544            (1..nx - 1).into_par_iter().for_each(|i| {
545                let w_ptr = w_base as *const f64;
546                let d_ptr = d_base as *const f64;
547                let lap_ptr = lap_base as *mut f64;
548                let d_lap_ptr = d_lap_base as *mut f64;
549                for j in 1..ny - 1 {
550                    unsafe {
551                        let center = idx(ny, i, j);
552                        let l = (*w_ptr.add(idx(ny, i + 1, j))
553                            + *w_ptr.add(idx(ny, i - 1, j))
554                            + *w_ptr.add(idx(ny, i, j + 1))
555                            + *w_ptr.add(idx(ny, i, j - 1))
556                            - 4.0 * *w_ptr.add(center))
557                            * inv_dx2;
558                        *lap_ptr.add(center) = l;
559                        *d_lap_ptr.add(center) = *d_ptr.add(center) * l;
560                    }
561                }
562            });
563        }
564    } else {
565        for i in 1..nx - 1 {
566            for j in 1..ny - 1 {
567                let center = idx(ny, i, j);
568                let l = (w[idx(ny, i + 1, j)]
569                    + w[idx(ny, i - 1, j)]
570                    + w[idx(ny, i, j + 1)]
571                    + w[idx(ny, i, j - 1)]
572                    - 4.0 * w[center])
573                    * inv_dx2;
574                lap[center] = l;
575                d_lap[center] = d_grid[center] * l;
576            }
577        }
578    }
579
580    if use_par {
581        #[cfg(feature = "rayon")]
582        {
583            use rayon::prelude::*;
584            let d_lap_base = d_lap.as_ptr() as usize;
585            let bilap_base = bilap.as_mut_ptr() as usize;
586            (1..nx - 1).into_par_iter().for_each(|i| {
587                let d_lap_ptr = d_lap_base as *const f64;
588                let bilap_ptr = bilap_base as *mut f64;
589                for j in 1..ny - 1 {
590                    unsafe {
591                        let center = idx(ny, i, j);
592                        *bilap_ptr.add(center) = (*d_lap_ptr.add(idx(ny, i + 1, j))
593                            + *d_lap_ptr.add(idx(ny, i - 1, j))
594                            + *d_lap_ptr.add(idx(ny, i, j + 1))
595                            + *d_lap_ptr.add(idx(ny, i, j - 1))
596                            - 4.0 * *d_lap_ptr.add(center))
597                            * inv_dx2;
598                    }
599                }
600            });
601        }
602    } else {
603        for i in 1..nx - 1 {
604            for j in 1..ny - 1 {
605                let center = idx(ny, i, j);
606                bilap[center] = (d_lap[idx(ny, i + 1, j)]
607                    + d_lap[idx(ny, i - 1, j)]
608                    + d_lap[idx(ny, i, j + 1)]
609                    + d_lap[idx(ny, i, j - 1)]
610                    - 4.0 * d_lap[center])
611                    * inv_dx2;
612            }
613        }
614    }
615    Ok(())
616}
617
618#[inline]
619pub fn bilaplacian_uniform(
620    w: &[f64],
621    nx: usize,
622    ny: usize,
623    d: f64,
624    dx: f64,
625    bilap: &mut [f64],
626) -> Result<(), StencilError> {
627    assert_len("w", w, nx, ny)?;
628    assert_len_mut("bilap", bilap, nx, ny)?;
629    bilap.fill(0.0);
630    if nx < 5 || ny < 5 {
631        return Ok(());
632    }
633    let coeff = d / (dx * dx * dx * dx);
634    #[cfg(feature = "rayon")]
635    if nx * ny >= RAYON_THRESHOLD {
636        use rayon::prelude::*;
637        let w_base = w.as_ptr() as usize;
638        let bilap_base = bilap.as_mut_ptr() as usize;
639        (2..nx - 2).into_par_iter().for_each(|i| {
640            let w_ptr = w_base as *const f64;
641            let bilap_ptr = bilap_base as *mut f64;
642            for j in 2..ny - 2 {
643                unsafe {
644                    let center = idx(ny, i, j);
645                    let val = *w_ptr.add(idx(ny, i + 2, j))
646                        + *w_ptr.add(idx(ny, i - 2, j))
647                        + *w_ptr.add(idx(ny, i, j + 2))
648                        + *w_ptr.add(idx(ny, i, j - 2))
649                        + 2.0
650                            * (*w_ptr.add(idx(ny, i + 1, j + 1))
651                                + *w_ptr.add(idx(ny, i + 1, j - 1))
652                                + *w_ptr.add(idx(ny, i - 1, j + 1))
653                                + *w_ptr.add(idx(ny, i - 1, j - 1)))
654                        - 8.0
655                            * (*w_ptr.add(idx(ny, i + 1, j))
656                                + *w_ptr.add(idx(ny, i - 1, j))
657                                + *w_ptr.add(idx(ny, i, j + 1))
658                                + *w_ptr.add(idx(ny, i, j - 1)))
659                        + 20.0 * *w_ptr.add(center);
660                    *bilap_ptr.add(center) = val * coeff;
661                }
662            }
663        });
664        return Ok(());
665    }
666    for i in 2..nx - 2 {
667        for j in 2..ny - 2 {
668            let center = idx(ny, i, j);
669            let val = w[idx(ny, i + 2, j)]
670                + w[idx(ny, i - 2, j)]
671                + w[idx(ny, i, j + 2)]
672                + w[idx(ny, i, j - 2)]
673                + 2.0
674                    * (w[idx(ny, i + 1, j + 1)]
675                        + w[idx(ny, i + 1, j - 1)]
676                        + w[idx(ny, i - 1, j + 1)]
677                        + w[idx(ny, i - 1, j - 1)])
678                - 8.0
679                    * (w[idx(ny, i + 1, j)]
680                        + w[idx(ny, i - 1, j)]
681                        + w[idx(ny, i, j + 1)]
682                        + w[idx(ny, i, j - 1)])
683                + 20.0 * w[center];
684            bilap[center] = val * coeff;
685        }
686    }
687    Ok(())
688}
689
690#[allow(clippy::too_many_arguments)]
691pub fn bilaplacian_ortho_uniform(
692    w: &[f64],
693    nx: usize,
694    ny: usize,
695    d_x: f64,
696    d_y: f64,
697    h_p: f64,
698    dx: f64,
699    bilap: &mut [f64],
700    cells: &[(usize, usize)],
701) -> Result<(), StencilError> {
702    assert_len("w", w, nx, ny)?;
703    assert_len_mut("bilap", bilap, nx, ny)?;
704    let inv_dx4 = 1.0 / (dx * dx * dx * dx);
705    let c_pm2_i = d_x * inv_dx4;
706    let c_pm2_j = d_y * inv_dx4;
707    let c_diag = 2.0 * h_p * inv_dx4;
708    let c_pm1_i = (-4.0 * d_x - 4.0 * h_p) * inv_dx4;
709    let c_pm1_j = (-4.0 * d_y - 4.0 * h_p) * inv_dx4;
710    let c_center = (6.0 * d_x + 8.0 * h_p + 6.0 * d_y) * inv_dx4;
711    for &(i, j) in cells {
712        let center = idx(ny, i, j);
713        bilap[center] = c_pm2_i * (w[idx(ny, i + 2, j)] + w[idx(ny, i - 2, j)])
714            + c_pm2_j * (w[idx(ny, i, j + 2)] + w[idx(ny, i, j - 2)])
715            + c_diag
716                * (w[idx(ny, i + 1, j + 1)]
717                    + w[idx(ny, i + 1, j - 1)]
718                    + w[idx(ny, i - 1, j + 1)]
719                    + w[idx(ny, i - 1, j - 1)])
720            + c_pm1_i * (w[idx(ny, i + 1, j)] + w[idx(ny, i - 1, j)])
721            + c_pm1_j * (w[idx(ny, i, j + 1)] + w[idx(ny, i, j - 1)])
722            + c_center * w[center];
723    }
724    Ok(())
725}