1use core::fmt;
3
4#[cfg(feature = "rayon")]
5const RAYON_THRESHOLD: usize = 10_000;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum StencilError {
9 GridTooLarge {
10 nx: usize,
11 ny: usize,
12 },
13 InvalidLength {
14 name: &'static str,
15 expected: usize,
16 actual: usize,
17 },
18}
19
20impl fmt::Display for StencilError {
21 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
22 match self {
23 Self::GridTooLarge { nx, ny } => {
24 write!(f, "grid dimensions nx={nx}, ny={ny} are too large")
25 }
26 Self::InvalidLength {
27 name,
28 expected,
29 actual,
30 } => write!(f, "{name} must have length {expected}, got {actual}"),
31 }
32 }
33}
34
35impl std::error::Error for StencilError {}
36
37#[inline]
38fn idx(ny: usize, i: usize, j: usize) -> usize {
39 i * ny + j
40}
41
42#[inline]
43fn grid_len(nx: usize, ny: usize) -> Result<usize, StencilError> {
44 nx.checked_mul(ny)
45 .ok_or(StencilError::GridTooLarge { nx, ny })
46}
47
48fn assert_len(name: &'static str, data: &[f64], nx: usize, ny: usize) -> Result<(), StencilError> {
49 let expected = grid_len(nx, ny)?;
50 if data.len() != expected {
51 return Err(StencilError::InvalidLength {
52 name,
53 expected,
54 actual: data.len(),
55 });
56 }
57 Ok(())
58}
59
60fn assert_len_mut(
61 name: &'static str,
62 data: &mut [f64],
63 nx: usize,
64 ny: usize,
65) -> Result<(), StencilError> {
66 let expected = grid_len(nx, ny)?;
67 if data.len() != expected {
68 return Err(StencilError::InvalidLength {
69 name,
70 expected,
71 actual: data.len(),
72 });
73 }
74 Ok(())
75}
76
77#[inline]
78pub fn laplacian(
79 u: &[f64],
80 nx: usize,
81 ny: usize,
82 dx: f64,
83 out: &mut [f64],
84) -> Result<(), StencilError> {
85 assert_len("u", u, nx, ny)?;
86 assert_len_mut("out", out, nx, ny)?;
87 out.fill(0.0);
88 if nx < 3 || ny < 3 {
89 return Ok(());
90 }
91 let inv_dx2 = 1.0 / (dx * dx);
92 #[cfg(feature = "rayon")]
93 if nx * ny >= RAYON_THRESHOLD {
94 use rayon::prelude::*;
95 let u_base = u.as_ptr() as usize;
96 let out_base = out.as_mut_ptr() as usize;
97 (1..nx - 1).into_par_iter().for_each(|i| {
98 let u_ptr = u_base as *const f64;
99 let out_ptr = out_base as *mut f64;
100 for j in 1..ny - 1 {
101 unsafe {
102 let center = idx(ny, i, j);
103 *out_ptr.add(center) = (*u_ptr.add(idx(ny, i + 1, j))
104 + *u_ptr.add(idx(ny, i - 1, j))
105 + *u_ptr.add(idx(ny, i, j + 1))
106 + *u_ptr.add(idx(ny, i, j - 1))
107 - 4.0 * *u_ptr.add(center))
108 * inv_dx2;
109 }
110 }
111 });
112 return Ok(());
113 }
114 for i in 1..nx - 1 {
115 for j in 1..ny - 1 {
116 let center = idx(ny, i, j);
117 out[center] = (u[idx(ny, i + 1, j)]
118 + u[idx(ny, i - 1, j)]
119 + u[idx(ny, i, j + 1)]
120 + u[idx(ny, i, j - 1)]
121 - 4.0 * u[center])
122 * inv_dx2;
123 }
124 }
125 Ok(())
126}
127
128#[inline]
129pub fn gradient_x(
130 u: &[f64],
131 nx: usize,
132 ny: usize,
133 dx: f64,
134 out: &mut [f64],
135) -> Result<(), StencilError> {
136 assert_len("u", u, nx, ny)?;
137 assert_len_mut("out", out, nx, ny)?;
138 out.fill(0.0);
139 if nx < 3 {
140 return Ok(());
141 }
142 let inv_2dx = 1.0 / (2.0 * dx);
143 for i in 1..nx - 1 {
144 for j in 0..ny {
145 out[idx(ny, i, j)] = (u[idx(ny, i + 1, j)] - u[idx(ny, i - 1, j)]) * inv_2dx;
146 }
147 }
148 Ok(())
149}
150
151#[inline]
152pub fn gradient_y(
153 u: &[f64],
154 nx: usize,
155 ny: usize,
156 dx: f64,
157 out: &mut [f64],
158) -> Result<(), StencilError> {
159 assert_len("u", u, nx, ny)?;
160 assert_len_mut("out", out, nx, ny)?;
161 out.fill(0.0);
162 if ny < 3 {
163 return Ok(());
164 }
165 let inv_2dx = 1.0 / (2.0 * dx);
166 for i in 0..nx {
167 for j in 1..ny - 1 {
168 out[idx(ny, i, j)] = (u[idx(ny, i, j + 1)] - u[idx(ny, i, j - 1)]) * inv_2dx;
169 }
170 }
171 Ok(())
172}
173
174#[inline]
175pub fn d2_dx2(
176 u: &[f64],
177 nx: usize,
178 ny: usize,
179 dx: f64,
180 out: &mut [f64],
181) -> Result<(), StencilError> {
182 assert_len("u", u, nx, ny)?;
183 assert_len_mut("out", out, nx, ny)?;
184 out.fill(0.0);
185 if nx < 3 {
186 return Ok(());
187 }
188 let inv_dx2 = 1.0 / (dx * dx);
189 for i in 1..nx - 1 {
190 for j in 0..ny {
191 out[idx(ny, i, j)] =
192 (u[idx(ny, i + 1, j)] - 2.0 * u[idx(ny, i, j)] + u[idx(ny, i - 1, j)]) * inv_dx2;
193 }
194 }
195 Ok(())
196}
197
198#[inline]
199pub fn d2_dy2(
200 u: &[f64],
201 nx: usize,
202 ny: usize,
203 dx: f64,
204 out: &mut [f64],
205) -> Result<(), StencilError> {
206 assert_len("u", u, nx, ny)?;
207 assert_len_mut("out", out, nx, ny)?;
208 out.fill(0.0);
209 if ny < 3 {
210 return Ok(());
211 }
212 let inv_dx2 = 1.0 / (dx * dx);
213 for i in 0..nx {
214 for j in 1..ny - 1 {
215 out[idx(ny, i, j)] =
216 (u[idx(ny, i, j + 1)] - 2.0 * u[idx(ny, i, j)] + u[idx(ny, i, j - 1)]) * inv_dx2;
217 }
218 }
219 Ok(())
220}
221
222#[inline]
223pub fn d2_dxdy(
224 u: &[f64],
225 nx: usize,
226 ny: usize,
227 dx: f64,
228 out: &mut [f64],
229) -> Result<(), StencilError> {
230 assert_len("u", u, nx, ny)?;
231 assert_len_mut("out", out, nx, ny)?;
232 out.fill(0.0);
233 if nx < 3 || ny < 3 {
234 return Ok(());
235 }
236 let inv_4dx2 = 1.0 / (4.0 * dx * dx);
237 for i in 1..nx - 1 {
238 for j in 1..ny - 1 {
239 out[idx(ny, i, j)] =
240 (u[idx(ny, i + 1, j + 1)] - u[idx(ny, i + 1, j - 1)] - u[idx(ny, i - 1, j + 1)]
241 + u[idx(ny, i - 1, j - 1)])
242 * inv_4dx2;
243 }
244 }
245 Ok(())
246}
247
248#[allow(clippy::too_many_arguments)]
249pub fn w_derivatives(
250 w: &[f64],
251 nx: usize,
252 ny: usize,
253 dx: f64,
254 wx: &mut [f64],
255 wy: &mut [f64],
256 wxx: &mut [f64],
257 wyy: &mut [f64],
258 wxy: &mut [f64],
259) -> Result<(), StencilError> {
260 assert_len("w", w, nx, ny)?;
261 assert_len_mut("wx", wx, nx, ny)?;
262 assert_len_mut("wy", wy, nx, ny)?;
263 assert_len_mut("wxx", wxx, nx, ny)?;
264 assert_len_mut("wyy", wyy, nx, ny)?;
265 assert_len_mut("wxy", wxy, nx, ny)?;
266 wx.fill(0.0);
267 wy.fill(0.0);
268 wxx.fill(0.0);
269 wyy.fill(0.0);
270 wxy.fill(0.0);
271 if nx < 3 || ny < 3 {
272 return Ok(());
273 }
274 let inv_2dx = 1.0 / (2.0 * dx);
275 let inv_dx2 = 1.0 / (dx * dx);
276 let inv_4dx2 = 1.0 / (4.0 * dx * dx);
277 #[cfg(feature = "rayon")]
278 if nx * ny >= RAYON_THRESHOLD {
279 use rayon::prelude::*;
280 let w_base = w.as_ptr() as usize;
281 let wx_base = wx.as_mut_ptr() as usize;
282 let wy_base = wy.as_mut_ptr() as usize;
283 let wxx_base = wxx.as_mut_ptr() as usize;
284 let wyy_base = wyy.as_mut_ptr() as usize;
285 let wxy_base = wxy.as_mut_ptr() as usize;
286 (1..nx - 1).into_par_iter().for_each(|i| {
287 let w_ptr = w_base as *const f64;
288 let wx_ptr = wx_base as *mut f64;
289 let wy_ptr = wy_base as *mut f64;
290 let wxx_ptr = wxx_base as *mut f64;
291 let wyy_ptr = wyy_base as *mut f64;
292 let wxy_ptr = wxy_base as *mut f64;
293 for j in 1..ny - 1 {
294 unsafe {
295 let center = idx(ny, i, j);
296 let wc = *w_ptr.add(center);
297 let wp = *w_ptr.add(idx(ny, i + 1, j));
298 let wm = *w_ptr.add(idx(ny, i - 1, j));
299 let wn = *w_ptr.add(idx(ny, i, j + 1));
300 let ws = *w_ptr.add(idx(ny, i, j - 1));
301 *wx_ptr.add(center) = (wp - wm) * inv_2dx;
302 *wy_ptr.add(center) = (wn - ws) * inv_2dx;
303 *wxx_ptr.add(center) = (wp - 2.0 * wc + wm) * inv_dx2;
304 *wyy_ptr.add(center) = (wn - 2.0 * wc + ws) * inv_dx2;
305 *wxy_ptr.add(center) = (*w_ptr.add(idx(ny, i + 1, j + 1))
306 - *w_ptr.add(idx(ny, i + 1, j - 1))
307 - *w_ptr.add(idx(ny, i - 1, j + 1))
308 + *w_ptr.add(idx(ny, i - 1, j - 1)))
309 * inv_4dx2;
310 }
311 }
312 });
313 return Ok(());
314 }
315 for i in 1..nx - 1 {
316 for j in 1..ny - 1 {
317 let center = idx(ny, i, j);
318 let wc = w[center];
319 let wp = w[idx(ny, i + 1, j)];
320 let wm = w[idx(ny, i - 1, j)];
321 let wn = w[idx(ny, i, j + 1)];
322 let ws = w[idx(ny, i, j - 1)];
323 wx[center] = (wp - wm) * inv_2dx;
324 wy[center] = (wn - ws) * inv_2dx;
325 wxx[center] = (wp - 2.0 * wc + wm) * inv_dx2;
326 wyy[center] = (wn - 2.0 * wc + ws) * inv_dx2;
327 wxy[center] =
328 (w[idx(ny, i + 1, j + 1)] - w[idx(ny, i + 1, j - 1)] - w[idx(ny, i - 1, j + 1)]
329 + w[idx(ny, i - 1, j - 1)])
330 * inv_4dx2;
331 }
332 }
333 Ok(())
334}
335
336#[allow(clippy::too_many_arguments)]
337pub fn uv_gradients(
338 u: &[f64],
339 v: &[f64],
340 nx: usize,
341 ny: usize,
342 dx: f64,
343 ux: &mut [f64],
344 uy: &mut [f64],
345 vx: &mut [f64],
346 vy: &mut [f64],
347) -> Result<(), StencilError> {
348 assert_len("u", u, nx, ny)?;
349 assert_len("v", v, nx, ny)?;
350 assert_len_mut("ux", ux, nx, ny)?;
351 assert_len_mut("uy", uy, nx, ny)?;
352 assert_len_mut("vx", vx, nx, ny)?;
353 assert_len_mut("vy", vy, nx, ny)?;
354 ux.fill(0.0);
355 uy.fill(0.0);
356 vx.fill(0.0);
357 vy.fill(0.0);
358 if nx < 3 || ny < 3 {
359 return Ok(());
360 }
361 let inv_2dx = 1.0 / (2.0 * dx);
362 #[cfg(feature = "rayon")]
363 if nx * ny >= RAYON_THRESHOLD {
364 use rayon::prelude::*;
365 let u_base = u.as_ptr() as usize;
366 let v_base = v.as_ptr() as usize;
367 let ux_base = ux.as_mut_ptr() as usize;
368 let uy_base = uy.as_mut_ptr() as usize;
369 let vx_base = vx.as_mut_ptr() as usize;
370 let vy_base = vy.as_mut_ptr() as usize;
371 (1..nx - 1).into_par_iter().for_each(|i| {
372 let u_ptr = u_base as *const f64;
373 let v_ptr = v_base as *const f64;
374 let ux_ptr = ux_base as *mut f64;
375 let uy_ptr = uy_base as *mut f64;
376 let vx_ptr = vx_base as *mut f64;
377 let vy_ptr = vy_base as *mut f64;
378 for j in 1..ny - 1 {
379 unsafe {
380 let center = idx(ny, i, j);
381 *ux_ptr.add(center) =
382 (*u_ptr.add(idx(ny, i + 1, j)) - *u_ptr.add(idx(ny, i - 1, j))) * inv_2dx;
383 *uy_ptr.add(center) =
384 (*u_ptr.add(idx(ny, i, j + 1)) - *u_ptr.add(idx(ny, i, j - 1))) * inv_2dx;
385 *vx_ptr.add(center) =
386 (*v_ptr.add(idx(ny, i + 1, j)) - *v_ptr.add(idx(ny, i - 1, j))) * inv_2dx;
387 *vy_ptr.add(center) =
388 (*v_ptr.add(idx(ny, i, j + 1)) - *v_ptr.add(idx(ny, i, j - 1))) * inv_2dx;
389 }
390 }
391 });
392 return Ok(());
393 }
394 for i in 1..nx - 1 {
395 for j in 1..ny - 1 {
396 let center = idx(ny, i, j);
397 ux[center] = (u[idx(ny, i + 1, j)] - u[idx(ny, i - 1, j)]) * inv_2dx;
398 uy[center] = (u[idx(ny, i, j + 1)] - u[idx(ny, i, j - 1)]) * inv_2dx;
399 vx[center] = (v[idx(ny, i + 1, j)] - v[idx(ny, i - 1, j)]) * inv_2dx;
400 vy[center] = (v[idx(ny, i, j + 1)] - v[idx(ny, i, j - 1)]) * inv_2dx;
401 }
402 }
403 Ok(())
404}
405
406#[allow(clippy::too_many_arguments)]
407pub fn biharmonic_pass1_fused(
408 w: &[f64],
409 d_grid: &[f64],
410 nx: usize,
411 ny: usize,
412 dx: f64,
413 d_lap: &mut [f64],
414 wx: &mut [f64],
415 wy: &mut [f64],
416 wxx: &mut [f64],
417 wyy: &mut [f64],
418 wxy: &mut [f64],
419) -> Result<(), StencilError> {
420 assert_len("w", w, nx, ny)?;
421 assert_len("d_grid", d_grid, nx, ny)?;
422 assert_len_mut("d_lap", d_lap, nx, ny)?;
423 assert_len_mut("wx", wx, nx, ny)?;
424 assert_len_mut("wy", wy, nx, ny)?;
425 assert_len_mut("wxx", wxx, nx, ny)?;
426 assert_len_mut("wyy", wyy, nx, ny)?;
427 assert_len_mut("wxy", wxy, nx, ny)?;
428 d_lap.fill(0.0);
429 wx.fill(0.0);
430 wy.fill(0.0);
431 wxx.fill(0.0);
432 wyy.fill(0.0);
433 wxy.fill(0.0);
434 if nx < 3 || ny < 3 {
435 return Ok(());
436 }
437 let inv_2dx = 1.0 / (2.0 * dx);
438 let inv_dx2 = 1.0 / (dx * dx);
439 let inv_4dx2 = 1.0 / (4.0 * dx * dx);
440 #[cfg(feature = "rayon")]
441 if nx * ny >= RAYON_THRESHOLD {
442 use rayon::prelude::*;
443 let w_base = w.as_ptr() as usize;
444 let d_base = d_grid.as_ptr() as usize;
445 let d_lap_base = d_lap.as_mut_ptr() as usize;
446 let wx_base = wx.as_mut_ptr() as usize;
447 let wy_base = wy.as_mut_ptr() as usize;
448 let wxx_base = wxx.as_mut_ptr() as usize;
449 let wyy_base = wyy.as_mut_ptr() as usize;
450 let wxy_base = wxy.as_mut_ptr() as usize;
451 (1..nx - 1).into_par_iter().for_each(|i| {
452 let w_ptr = w_base as *const f64;
453 let d_ptr = d_base as *const f64;
454 let d_lap_ptr = d_lap_base as *mut f64;
455 let wx_ptr = wx_base as *mut f64;
456 let wy_ptr = wy_base as *mut f64;
457 let wxx_ptr = wxx_base as *mut f64;
458 let wyy_ptr = wyy_base as *mut f64;
459 let wxy_ptr = wxy_base as *mut f64;
460 for j in 1..ny - 1 {
461 unsafe {
462 let center = idx(ny, i, j);
463 let wc = *w_ptr.add(center);
464 let wp = *w_ptr.add(idx(ny, i + 1, j));
465 let wm = *w_ptr.add(idx(ny, i - 1, j));
466 let wn = *w_ptr.add(idx(ny, i, j + 1));
467 let ws = *w_ptr.add(idx(ny, i, j - 1));
468 let xx = (wp - 2.0 * wc + wm) * inv_dx2;
469 let yy = (wn - 2.0 * wc + ws) * inv_dx2;
470 *wxx_ptr.add(center) = xx;
471 *wyy_ptr.add(center) = yy;
472 *wx_ptr.add(center) = (wp - wm) * inv_2dx;
473 *wy_ptr.add(center) = (wn - ws) * inv_2dx;
474 *wxy_ptr.add(center) = (*w_ptr.add(idx(ny, i + 1, j + 1))
475 - *w_ptr.add(idx(ny, i + 1, j - 1))
476 - *w_ptr.add(idx(ny, i - 1, j + 1))
477 + *w_ptr.add(idx(ny, i - 1, j - 1)))
478 * inv_4dx2;
479 *d_lap_ptr.add(center) = *d_ptr.add(center) * (xx + yy);
480 }
481 }
482 });
483 return Ok(());
484 }
485 for i in 1..nx - 1 {
486 for j in 1..ny - 1 {
487 let center = idx(ny, i, j);
488 let wc = w[center];
489 let wp = w[idx(ny, i + 1, j)];
490 let wm = w[idx(ny, i - 1, j)];
491 let wn = w[idx(ny, i, j + 1)];
492 let ws = w[idx(ny, i, j - 1)];
493 let xx = (wp - 2.0 * wc + wm) * inv_dx2;
494 let yy = (wn - 2.0 * wc + ws) * inv_dx2;
495 wxx[center] = xx;
496 wyy[center] = yy;
497 wx[center] = (wp - wm) * inv_2dx;
498 wy[center] = (wn - ws) * inv_2dx;
499 wxy[center] =
500 (w[idx(ny, i + 1, j + 1)] - w[idx(ny, i + 1, j - 1)] - w[idx(ny, i - 1, j + 1)]
501 + w[idx(ny, i - 1, j - 1)])
502 * inv_4dx2;
503 d_lap[center] = d_grid[center] * (xx + yy);
504 }
505 }
506 Ok(())
507}
508
509#[allow(clippy::too_many_arguments)]
510pub fn biharmonic(
511 w: &[f64],
512 d_grid: &[f64],
513 nx: usize,
514 ny: usize,
515 dx: f64,
516 lap: &mut [f64],
517 d_lap: &mut [f64],
518 bilap: &mut [f64],
519) -> Result<(), StencilError> {
520 assert_len("w", w, nx, ny)?;
521 assert_len("d_grid", d_grid, nx, ny)?;
522 assert_len_mut("lap", lap, nx, ny)?;
523 assert_len_mut("d_lap", d_lap, nx, ny)?;
524 assert_len_mut("bilap", bilap, nx, ny)?;
525 lap.fill(0.0);
526 d_lap.fill(0.0);
527 bilap.fill(0.0);
528 if nx < 3 || ny < 3 {
529 return Ok(());
530 }
531 let inv_dx2 = 1.0 / (dx * dx);
532 #[cfg(feature = "rayon")]
533 let use_par = nx * ny >= RAYON_THRESHOLD;
534 #[cfg(not(feature = "rayon"))]
535 let use_par = false;
536 if use_par {
537 #[cfg(feature = "rayon")]
538 {
539 use rayon::prelude::*;
540 let w_base = w.as_ptr() as usize;
541 let d_base = d_grid.as_ptr() as usize;
542 let lap_base = lap.as_mut_ptr() as usize;
543 let d_lap_base = d_lap.as_mut_ptr() as usize;
544 (1..nx - 1).into_par_iter().for_each(|i| {
545 let w_ptr = w_base as *const f64;
546 let d_ptr = d_base as *const f64;
547 let lap_ptr = lap_base as *mut f64;
548 let d_lap_ptr = d_lap_base as *mut f64;
549 for j in 1..ny - 1 {
550 unsafe {
551 let center = idx(ny, i, j);
552 let l = (*w_ptr.add(idx(ny, i + 1, j))
553 + *w_ptr.add(idx(ny, i - 1, j))
554 + *w_ptr.add(idx(ny, i, j + 1))
555 + *w_ptr.add(idx(ny, i, j - 1))
556 - 4.0 * *w_ptr.add(center))
557 * inv_dx2;
558 *lap_ptr.add(center) = l;
559 *d_lap_ptr.add(center) = *d_ptr.add(center) * l;
560 }
561 }
562 });
563 }
564 } else {
565 for i in 1..nx - 1 {
566 for j in 1..ny - 1 {
567 let center = idx(ny, i, j);
568 let l = (w[idx(ny, i + 1, j)]
569 + w[idx(ny, i - 1, j)]
570 + w[idx(ny, i, j + 1)]
571 + w[idx(ny, i, j - 1)]
572 - 4.0 * w[center])
573 * inv_dx2;
574 lap[center] = l;
575 d_lap[center] = d_grid[center] * l;
576 }
577 }
578 }
579
580 if use_par {
581 #[cfg(feature = "rayon")]
582 {
583 use rayon::prelude::*;
584 let d_lap_base = d_lap.as_ptr() as usize;
585 let bilap_base = bilap.as_mut_ptr() as usize;
586 (1..nx - 1).into_par_iter().for_each(|i| {
587 let d_lap_ptr = d_lap_base as *const f64;
588 let bilap_ptr = bilap_base as *mut f64;
589 for j in 1..ny - 1 {
590 unsafe {
591 let center = idx(ny, i, j);
592 *bilap_ptr.add(center) = (*d_lap_ptr.add(idx(ny, i + 1, j))
593 + *d_lap_ptr.add(idx(ny, i - 1, j))
594 + *d_lap_ptr.add(idx(ny, i, j + 1))
595 + *d_lap_ptr.add(idx(ny, i, j - 1))
596 - 4.0 * *d_lap_ptr.add(center))
597 * inv_dx2;
598 }
599 }
600 });
601 }
602 } else {
603 for i in 1..nx - 1 {
604 for j in 1..ny - 1 {
605 let center = idx(ny, i, j);
606 bilap[center] = (d_lap[idx(ny, i + 1, j)]
607 + d_lap[idx(ny, i - 1, j)]
608 + d_lap[idx(ny, i, j + 1)]
609 + d_lap[idx(ny, i, j - 1)]
610 - 4.0 * d_lap[center])
611 * inv_dx2;
612 }
613 }
614 }
615 Ok(())
616}
617
618#[inline]
619pub fn bilaplacian_uniform(
620 w: &[f64],
621 nx: usize,
622 ny: usize,
623 d: f64,
624 dx: f64,
625 bilap: &mut [f64],
626) -> Result<(), StencilError> {
627 assert_len("w", w, nx, ny)?;
628 assert_len_mut("bilap", bilap, nx, ny)?;
629 bilap.fill(0.0);
630 if nx < 5 || ny < 5 {
631 return Ok(());
632 }
633 let coeff = d / (dx * dx * dx * dx);
634 #[cfg(feature = "rayon")]
635 if nx * ny >= RAYON_THRESHOLD {
636 use rayon::prelude::*;
637 let w_base = w.as_ptr() as usize;
638 let bilap_base = bilap.as_mut_ptr() as usize;
639 (2..nx - 2).into_par_iter().for_each(|i| {
640 let w_ptr = w_base as *const f64;
641 let bilap_ptr = bilap_base as *mut f64;
642 for j in 2..ny - 2 {
643 unsafe {
644 let center = idx(ny, i, j);
645 let val = *w_ptr.add(idx(ny, i + 2, j))
646 + *w_ptr.add(idx(ny, i - 2, j))
647 + *w_ptr.add(idx(ny, i, j + 2))
648 + *w_ptr.add(idx(ny, i, j - 2))
649 + 2.0
650 * (*w_ptr.add(idx(ny, i + 1, j + 1))
651 + *w_ptr.add(idx(ny, i + 1, j - 1))
652 + *w_ptr.add(idx(ny, i - 1, j + 1))
653 + *w_ptr.add(idx(ny, i - 1, j - 1)))
654 - 8.0
655 * (*w_ptr.add(idx(ny, i + 1, j))
656 + *w_ptr.add(idx(ny, i - 1, j))
657 + *w_ptr.add(idx(ny, i, j + 1))
658 + *w_ptr.add(idx(ny, i, j - 1)))
659 + 20.0 * *w_ptr.add(center);
660 *bilap_ptr.add(center) = val * coeff;
661 }
662 }
663 });
664 return Ok(());
665 }
666 for i in 2..nx - 2 {
667 for j in 2..ny - 2 {
668 let center = idx(ny, i, j);
669 let val = w[idx(ny, i + 2, j)]
670 + w[idx(ny, i - 2, j)]
671 + w[idx(ny, i, j + 2)]
672 + w[idx(ny, i, j - 2)]
673 + 2.0
674 * (w[idx(ny, i + 1, j + 1)]
675 + w[idx(ny, i + 1, j - 1)]
676 + w[idx(ny, i - 1, j + 1)]
677 + w[idx(ny, i - 1, j - 1)])
678 - 8.0
679 * (w[idx(ny, i + 1, j)]
680 + w[idx(ny, i - 1, j)]
681 + w[idx(ny, i, j + 1)]
682 + w[idx(ny, i, j - 1)])
683 + 20.0 * w[center];
684 bilap[center] = val * coeff;
685 }
686 }
687 Ok(())
688}
689
690#[allow(clippy::too_many_arguments)]
691pub fn bilaplacian_ortho_uniform(
692 w: &[f64],
693 nx: usize,
694 ny: usize,
695 d_x: f64,
696 d_y: f64,
697 h_p: f64,
698 dx: f64,
699 bilap: &mut [f64],
700 cells: &[(usize, usize)],
701) -> Result<(), StencilError> {
702 assert_len("w", w, nx, ny)?;
703 assert_len_mut("bilap", bilap, nx, ny)?;
704 let inv_dx4 = 1.0 / (dx * dx * dx * dx);
705 let c_pm2_i = d_x * inv_dx4;
706 let c_pm2_j = d_y * inv_dx4;
707 let c_diag = 2.0 * h_p * inv_dx4;
708 let c_pm1_i = (-4.0 * d_x - 4.0 * h_p) * inv_dx4;
709 let c_pm1_j = (-4.0 * d_y - 4.0 * h_p) * inv_dx4;
710 let c_center = (6.0 * d_x + 8.0 * h_p + 6.0 * d_y) * inv_dx4;
711 for &(i, j) in cells {
712 let center = idx(ny, i, j);
713 bilap[center] = c_pm2_i * (w[idx(ny, i + 2, j)] + w[idx(ny, i - 2, j)])
714 + c_pm2_j * (w[idx(ny, i, j + 2)] + w[idx(ny, i, j - 2)])
715 + c_diag
716 * (w[idx(ny, i + 1, j + 1)]
717 + w[idx(ny, i + 1, j - 1)]
718 + w[idx(ny, i - 1, j + 1)]
719 + w[idx(ny, i - 1, j - 1)])
720 + c_pm1_i * (w[idx(ny, i + 1, j)] + w[idx(ny, i - 1, j)])
721 + c_pm1_j * (w[idx(ny, i, j + 1)] + w[idx(ny, i, j - 1)])
722 + c_center * w[center];
723 }
724 Ok(())
725}